1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include <gsl/gsl_cdf.h>
30 #include "dictionary.h"
38 #include "value-labels.h"
43 #include "factor_stats.h"
52 +missing=miss:pairwise/!listwise,
54 incl:include/!exclude;
55 +compare=cmp:variables/!groups;
56 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
58 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
67 static struct cmd_examine cmd;
69 static struct variable **dependent_vars;
71 static int n_dependent_vars;
73 static struct hsh_table *hash_table_factors=0;
80 /* The independent variable for this factor */
81 struct variable *indep_var;
83 /* The factor statistics for each value of the independent variable */
84 struct hsh_table *hash_table_val;
86 /* The subfactor (if any) */
87 struct factor *subfactor;
94 /* Parse the clause specifying the factors */
95 static int examine_parse_independent_vars(struct cmd_examine *cmd,
96 struct hsh_table *hash_factors );
101 /* Functions to support hashes of factors */
102 int compare_factors(const struct factor *f1, const struct factor *f2,
105 unsigned hash_factor(const struct factor *f, void *aux);
107 void free_factor(struct factor *f, void *aux UNUSED);
110 /* Output functions */
111 static void show_summary(struct variable **dependent_var, int n_dep_var,
114 static void show_descriptives(struct variable **dependent_var,
116 struct factor *factor);
119 static void show_extremes(struct variable **dependent_var,
121 struct factor *factor,
125 void np_plot(const struct metrics *m, const char *varname);
129 /* Per Split function */
130 static void run_examine(const struct casefile *cf, void *);
132 static void output_examine(void);
135 static struct factor_statistics *totals = 0;
143 if ( !parse_examine(&cmd) )
146 if ( cmd.st_n == SYSMIS )
149 if ( ! cmd.sbc_cinterval)
150 cmd.n_cinterval[0] = 95.0;
153 totals = xmalloc ( sizeof (struct factor_statistics *) );
155 totals->stats = xmalloc(sizeof ( struct metrics ) * n_dependent_vars);
157 multipass_procedure_with_splits (run_examine, NULL);
160 hsh_destroy(hash_table_factors);
169 /* Show all the appropriate tables */
174 /* Show totals if appropriate */
175 if ( ! cmd.sbc_nototal ||
176 ! hash_table_factors || 0 == hsh_count (hash_table_factors))
178 show_summary(dependent_vars, n_dependent_vars,0);
180 if ( cmd.sbc_statistics )
182 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
183 show_descriptives(dependent_vars, n_dependent_vars, 0);
185 if ( cmd.a_statistics[XMN_ST_EXTREME])
186 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
191 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
195 for ( v = 0 ; v < n_dependent_vars; ++v )
197 np_plot(&totals->stats[v], var_to_string(dependent_vars[v]));
206 /* Show grouped statistics if appropriate */
207 if ( hash_table_factors && 0 != hsh_count (hash_table_factors))
209 struct hsh_iterator hi;
212 for(f = hsh_first(hash_table_factors,&hi);
214 f = hsh_next(hash_table_factors,&hi))
216 show_summary(dependent_vars, n_dependent_vars,f);
218 if ( cmd.sbc_statistics )
220 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
221 show_descriptives(dependent_vars, n_dependent_vars, f);
223 if ( cmd.a_statistics[XMN_ST_EXTREME])
224 show_extremes(dependent_vars, n_dependent_vars, f, cmd.st_n);
230 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
232 struct hsh_iterator h2;
233 struct factor_statistics *foo ;
234 for (foo = hsh_first(f->hash_table_val,&h2);
236 foo = hsh_next(f->hash_table_val,&h2))
239 for ( v = 0 ; v < n_dependent_vars; ++ v)
242 sprintf(buf, "%s (%s = %s)",
243 var_to_string(dependent_vars[v]),
244 var_to_string(f->indep_var),
245 value_to_string(foo->id,f->indep_var));
246 np_plot(&foo->stats[v], buf);
258 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
260 xmn_custom_total(struct cmd_examine *p)
262 if ( p->sbc_nototal )
264 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
272 xmn_custom_nototal(struct cmd_examine *p)
276 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
284 /* Compare two factors */
286 compare_factors (const struct factor *f1,
287 const struct factor *f2,
290 int indep_var_cmp = strcmp(f1->indep_var->name, f2->indep_var->name);
292 if ( 0 != indep_var_cmp )
293 return indep_var_cmp;
295 /* If the names are identical, and there are no subfactors then
296 the factors are identical */
297 if ( ! f1->subfactor && ! f2->subfactor )
300 /* ... otherwise we must compare the subfactors */
302 return compare_factors(f1->subfactor, f2->subfactor, aux);
306 /* Create a hash of a factor */
308 hash_factor( const struct factor *f, void *aux)
311 h = hsh_hash_string(f->indep_var->name);
314 h += hash_factor(f->subfactor, aux);
320 /* Free up a factor */
322 free_factor(struct factor *f, void *aux)
324 hsh_destroy(f->hash_table_val);
327 free_factor(f->subfactor, aux);
333 /* Parser for the variables sub command */
335 xmn_custom_variables(struct cmd_examine *cmd )
340 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
344 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
345 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
347 free (dependent_vars);
351 assert(n_dependent_vars);
353 if ( lex_match(T_BY))
355 hash_table_factors = hsh_create(4,
356 (hsh_compare_func *) compare_factors,
357 (hsh_hash_func *) hash_factor,
358 (hsh_free_func *) free_factor, 0);
360 return examine_parse_independent_vars(cmd, hash_table_factors);
369 /* Parse the clause specifying the factors */
371 examine_parse_independent_vars(struct cmd_examine *cmd,
372 struct hsh_table *hash_table_factors )
374 struct factor *f = 0;
376 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
382 f = xmalloc(sizeof(struct factor));
384 f->hash_table_val = 0;
388 f->indep_var = parse_variable();
390 if ( ! f->hash_table_val )
391 f->hash_table_val = hsh_create(4,(hsh_compare_func *) compare_indep_values,
392 (hsh_hash_func *) hash_indep_value,
393 (hsh_free_func *) free_factor_stats,
394 (void *) f->indep_var->width);
400 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
404 f->subfactor = xmalloc(sizeof(struct factor));
406 f->subfactor->indep_var = parse_variable();
408 f->subfactor->subfactor = 0;
410 f->subfactor->hash_table_val =
412 (hsh_compare_func *) compare_indep_values,
413 (hsh_hash_func *) hash_indep_value,
414 (hsh_free_func *) free_factor_stats,
415 (void *) f->subfactor->indep_var->width);
418 hsh_insert(hash_table_factors, f);
422 if ( token == '.' || token == '/' )
425 return examine_parse_independent_vars(cmd, hash_table_factors);
429 void populate_descriptives(struct tab_table *t, int col, int row,
430 const struct metrics *fs);
433 void populate_extremities(struct tab_table *t, int col, int row, int n);
436 /* Show the descriptives table */
438 show_descriptives(struct variable **dependent_var,
440 struct factor *factor)
443 int heading_columns ;
445 const int n_stat_rows = 13;
447 const int heading_rows = 1;
448 int n_rows = heading_rows ;
456 n_rows += n_dep_var * n_stat_rows;
460 assert(factor->indep_var);
461 if ( factor->subfactor == 0 )
464 n_rows += n_dep_var * hsh_count(factor->hash_table_val) * n_stat_rows;
469 n_rows += n_dep_var * hsh_count(factor->hash_table_val) *
470 hsh_count(factor->subfactor->hash_table_val) * n_stat_rows ;
474 n_cols = heading_columns + 4;
476 t = tab_create (n_cols, n_rows, 0);
478 tab_headers (t, heading_columns + 1, 0, heading_rows, 0);
480 tab_dim (t, tab_natural_dimensions);
482 /* Outline the box and have no internal lines*/
487 n_cols - 1, n_rows - 1);
489 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
491 tab_vline (t, TAL_1, heading_columns, 0, n_rows - 1);
492 tab_vline (t, TAL_2, n_cols - 2, 0, n_rows - 1);
493 tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1);
495 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
496 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
499 for ( i = 0 ; i < n_dep_var ; ++i )
502 int n_subfactors = 1;
507 n_factors = hsh_count(factor->hash_table_val);
508 if ( factor->subfactor )
509 n_subfactors = hsh_count(factor->subfactor->hash_table_val);
513 row = heading_rows + i * n_stat_rows * n_factors * n_subfactors;
516 tab_hline(t, TAL_1, 0, n_cols - 1, row );
520 struct hsh_iterator hi;
521 const struct factor_statistics *fs;
524 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
525 var_to_string(factor->indep_var));
529 for (fs = hsh_first(factor->hash_table_val, &hi);
531 fs = hsh_next(factor->hash_table_val, &hi))
534 row + count * n_subfactors * n_stat_rows,
535 TAB_RIGHT | TAT_TITLE,
536 value_to_string(fs->id, factor->indep_var)
540 tab_hline (t, TAL_1, 1, n_cols - 1,
541 row + count * n_subfactors * n_stat_rows);
543 if ( factor->subfactor )
546 struct hsh_iterator h2;
547 const struct factor_statistics *sub_fs;
549 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
550 var_to_string(factor->subfactor->indep_var));
552 for ( sub_fs = hsh_first(factor->subfactor->hash_table_val,
555 sub_fs = hsh_next(factor->subfactor->hash_table_val,
561 + count * n_subfactors * n_stat_rows
562 + count2 * n_stat_rows,
563 TAB_RIGHT | TAT_TITLE ,
564 value_to_string(sub_fs->id, factor->subfactor->indep_var)
568 tab_hline (t, TAL_1, 2, n_cols - 1,
570 + count * n_subfactors * n_stat_rows
571 + count2 * n_stat_rows);
573 populate_descriptives(t, heading_columns,
575 + count * n_subfactors
577 + count2 * n_stat_rows,
587 populate_descriptives(t, heading_columns,
589 + count * n_subfactors * n_stat_rows,
598 populate_descriptives(t, heading_columns,
599 row, &totals->stats[i]);
604 TAB_LEFT | TAT_TITLE,
605 var_to_string(dependent_var[i])
610 tab_title (t, 0, _("Descriptives"));
618 /* Fill in the descriptives data */
620 populate_descriptives(struct tab_table *tbl, int col, int row,
621 const struct metrics *m)
624 const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
630 TAB_LEFT | TAT_TITLE,
633 tab_float (tbl, col + 2,
639 tab_float (tbl, col + 3,
648 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
649 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
652 tab_text (tbl, col + 1,
654 TAB_LEFT | TAT_TITLE,
657 tab_float (tbl, col + 2,
660 m->mean - t * m->stderr,
663 tab_text (tbl, col + 1,
665 TAB_LEFT | TAT_TITLE,
669 tab_float (tbl, col + 2,
672 m->mean + t * m->stderr,
677 TAB_LEFT | TAT_TITLE,
678 _("5% Trimmed Mean"));
680 tab_float (tbl, col + 2,
688 TAB_LEFT | TAT_TITLE,
693 TAB_LEFT | TAT_TITLE,
696 tab_float (tbl, col + 2,
705 TAB_LEFT | TAT_TITLE,
706 _("Std. Deviation"));
709 tab_float (tbl, col + 2,
718 TAB_LEFT | TAT_TITLE,
721 tab_float (tbl, col + 2,
729 TAB_LEFT | TAT_TITLE,
732 tab_float (tbl, col + 2,
741 TAB_LEFT | TAT_TITLE,
745 tab_float (tbl, col + 2,
753 TAB_LEFT | TAT_TITLE,
754 _("Interquartile Range"));
758 TAB_LEFT | TAT_TITLE,
763 TAB_LEFT | TAT_TITLE,
769 show_summary(struct variable **dependent_var,
771 struct factor *factor)
773 static const char *subtitle[]=
781 int heading_columns ;
783 const int heading_rows = 3;
784 struct tab_table *tbl;
786 int n_rows = heading_rows;
795 assert(factor->indep_var);
796 if ( factor->subfactor == 0 )
799 n_rows += n_dep_var * hsh_count(factor->hash_table_val);
804 n_rows += n_dep_var * hsh_count(factor->hash_table_val) *
805 hsh_count(factor->subfactor->hash_table_val) ;
810 n_cols = heading_columns + 6;
812 tbl = tab_create (n_cols,n_rows,0);
813 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
815 tab_dim (tbl, tab_natural_dimensions);
817 /* Outline the box and have vertical internal lines*/
822 n_cols - 1, n_rows - 1);
824 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
825 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
826 tab_hline (tbl, TAL_1, 0, n_cols - 1, heading_rows -1 );
828 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
831 tab_title (tbl, 0, _("Case Processing Summary"));
834 tab_joint_text(tbl, heading_columns, 0,
836 TAB_CENTER | TAT_TITLE,
839 /* Remove lines ... */
848 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
849 var_to_string(factor->indep_var));
851 if ( factor->subfactor )
852 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
853 var_to_string(factor->subfactor->indep_var));
856 for ( i = 0 ; i < 3 ; ++i )
858 tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
859 tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
862 tab_joint_text(tbl, heading_columns + i*2 , 1,
863 heading_columns + i*2 + 1, 1,
864 TAB_CENTER | TAT_TITLE,
867 tab_box (tbl, -1, -1,
869 heading_columns + i*2, 1,
870 heading_columns + i*2 + 1, 1);
875 for ( i = 0 ; i < n_dep_var ; ++i )
877 int n_subfactors = 1;
882 n_factors = hsh_count(factor->hash_table_val);
883 if ( factor->subfactor )
884 n_subfactors = hsh_count(factor->subfactor->hash_table_val);
888 0, i * n_factors * n_subfactors + heading_rows,
889 TAB_LEFT | TAT_TITLE,
890 var_to_string(dependent_var[i])
895 struct hsh_iterator hi;
896 const struct factor_statistics *fs;
899 for (fs = hsh_first(factor->hash_table_val, &hi);
901 fs = hsh_next(factor->hash_table_val, &hi))
904 i * n_factors * n_subfactors + heading_rows
905 + count * n_subfactors,
906 TAB_RIGHT | TAT_TITLE,
907 value_to_string(fs->id, factor->indep_var)
910 if ( factor->subfactor )
913 struct hsh_iterator h2;
914 const struct factor_statistics *sub_fs;
916 for ( sub_fs = hsh_first(factor->subfactor->hash_table_val,
919 sub_fs = hsh_next(factor->subfactor->hash_table_val,
924 i * n_factors * n_subfactors + heading_rows
925 + count * n_subfactors + count2,
926 TAB_RIGHT | TAT_TITLE ,
927 value_to_string(sub_fs->id, factor->subfactor->indep_var)
943 static int bad_weight_warn = 1;
947 run_examine(const struct casefile *cf, void *aux UNUSED)
949 struct hsh_iterator hi;
952 struct casereader *r;
956 /* Make sure we haven't got rubbish left over from a
958 if ( hash_table_factors )
960 for ( fctr = hsh_first(hash_table_factors, &hi);
962 fctr = hsh_next (hash_table_factors, &hi) )
964 hsh_clear(fctr->hash_table_val);
966 while ( (fctr = fctr->subfactor) )
967 hsh_clear(fctr->hash_table_val);
971 for ( v = 0 ; v < n_dependent_vars ; ++v )
972 metrics_precalc(&totals->stats[v]);
974 for(r = casefile_get_reader (cf);
975 casereader_read (r, &c) ;
978 const double weight =
979 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
981 for ( v = 0 ; v < n_dependent_vars ; ++v )
983 const struct variable *var = dependent_vars[v];
984 const union value *val = case_data (&c, var->fv);
986 metrics_calc(&totals->stats[v], val, weight);
989 if ( hash_table_factors )
991 for ( fctr = hsh_first(hash_table_factors, &hi);
993 fctr = hsh_next (hash_table_factors, &hi) )
995 const union value *indep_val =
996 case_data(&c, fctr->indep_var->fv);
998 struct factor_statistics **foo = ( struct factor_statistics ** )
999 hsh_probe(fctr->hash_table_val, (void *) &indep_val);
1003 *foo = xmalloc ( sizeof ( struct factor_statistics));
1004 (*foo)->id = indep_val;
1005 (*foo)->stats = xmalloc ( sizeof ( struct metrics )
1006 * n_dependent_vars);
1008 for ( v = 0 ; v < n_dependent_vars ; ++v )
1009 metrics_precalc( &(*foo)->stats[v] );
1011 hsh_insert(fctr->hash_table_val, (void *) *foo);
1014 for ( v = 0 ; v < n_dependent_vars ; ++v )
1016 const struct variable *var = dependent_vars[v];
1017 const union value *val = case_data (&c, var->fv);
1019 metrics_calc( &(*foo)->stats[v], val, weight );
1022 if ( fctr->subfactor )
1024 struct factor *sfctr = fctr->subfactor;
1026 const union value *ii_val =
1027 case_data (&c, sfctr->indep_var->fv);
1029 struct factor_statistics **bar =
1030 (struct factor_statistics **)
1031 hsh_probe(sfctr->hash_table_val, (void *) &ii_val);
1035 *bar = xmalloc ( sizeof ( struct factor_statistics));
1036 (*bar)->id = ii_val;
1037 (*bar)->stats = xmalloc ( sizeof ( struct metrics )
1038 * n_dependent_vars);
1040 for ( v = 0 ; v < n_dependent_vars ; ++v )
1041 metrics_precalc( &(*bar)->stats[v] );
1043 hsh_insert(sfctr->hash_table_val,
1047 for ( v = 0 ; v < n_dependent_vars ; ++v )
1049 const struct variable *var = dependent_vars[v];
1050 const union value *val = case_data (&c, var->fv);
1052 metrics_calc( &(*bar)->stats[v], val, weight );
1060 for ( v = 0 ; v < n_dependent_vars ; ++v)
1062 if ( hash_table_factors )
1064 for ( fctr = hsh_first(hash_table_factors, &hi);
1066 fctr = hsh_next (hash_table_factors, &hi) )
1068 struct hsh_iterator h2;
1069 struct factor_statistics *fs;
1071 for ( fs = hsh_first(fctr->hash_table_val,&h2);
1073 fs = hsh_next(fctr->hash_table_val,&h2))
1075 metrics_postcalc( &fs->stats[v] );
1078 if ( fctr->subfactor)
1080 struct hsh_iterator hsf;
1081 struct factor_statistics *fss;
1083 for ( fss = hsh_first(fctr->subfactor->hash_table_val,&hsf);
1085 fss = hsh_next(fctr->subfactor->hash_table_val,&hsf))
1087 metrics_postcalc( &fss->stats[v] );
1093 metrics_postcalc(&totals->stats[v]);
1102 show_extremes(struct variable **dependent_var,
1104 struct factor *factor,
1108 int heading_columns ;
1110 const int heading_rows = 1;
1111 struct tab_table *t;
1113 int n_rows = heading_rows;
1117 heading_columns = 1 + 1;
1118 n_rows += n_dep_var * 2 * n_extremities;
1122 assert(factor->indep_var);
1123 if ( factor->subfactor == 0 )
1125 heading_columns = 2 + 1;
1126 n_rows += n_dep_var * 2 * n_extremities
1127 * hsh_count(factor->hash_table_val);
1131 heading_columns = 3 + 1;
1132 n_rows += n_dep_var * 2 * n_extremities
1133 * hsh_count(factor->hash_table_val)
1134 * hsh_count(factor->subfactor->hash_table_val) ;
1139 n_cols = heading_columns + 3;
1141 t = tab_create (n_cols,n_rows,0);
1142 tab_headers (t, heading_columns, 0, heading_rows, 0);
1144 tab_dim (t, tab_natural_dimensions);
1146 /* Outline the box and have vertical internal lines*/
1151 n_cols - 1, n_rows - 1);
1155 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
1157 tab_title (t, 0, _("Extreme Values"));
1162 /* Remove lines ... */
1171 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1172 var_to_string(factor->indep_var));
1174 if ( factor->subfactor )
1175 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1176 var_to_string(factor->subfactor->indep_var));
1179 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
1180 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
1183 for ( i = 0 ; i < n_dep_var ; ++i )
1185 int n_subfactors = 1;
1190 n_factors = hsh_count(factor->hash_table_val);
1191 if ( factor->subfactor )
1192 n_subfactors = hsh_count(factor->subfactor->hash_table_val);
1196 0, i * 2 * n_extremities * n_factors *
1197 n_subfactors + heading_rows,
1198 TAB_LEFT | TAT_TITLE,
1199 var_to_string(dependent_var[i])
1205 TAL_1, 0, n_cols - 1,
1206 heading_rows + 2 * n_extremities *
1207 (i * n_factors * n_subfactors )
1212 struct hsh_iterator hi;
1213 const struct factor_statistics *fs;
1216 for ( fs = hsh_first(factor->hash_table_val, &hi);
1218 fs = hsh_next(factor->hash_table_val, &hi))
1220 tab_text (t, 1, heading_rows + 2 * n_extremities *
1221 (i * n_factors * n_subfactors
1222 + count * n_subfactors),
1223 TAB_RIGHT | TAT_TITLE,
1224 value_to_string(fs->id, factor->indep_var)
1228 tab_hline (t, TAL_1, 1, n_cols - 1,
1229 heading_rows + 2 * n_extremities *
1230 (i * n_factors * n_subfactors
1231 + count * n_subfactors));
1234 if ( factor->subfactor )
1236 struct hsh_iterator h2;
1237 const struct factor_statistics *sub_fs;
1240 for ( sub_fs = hsh_first(factor->subfactor->hash_table_val,
1243 sub_fs = hsh_next(factor->subfactor->hash_table_val,
1247 tab_text(t, 2, heading_rows + 2 * n_extremities *
1248 (i * n_factors * n_subfactors
1249 + count * n_subfactors + count2 ),
1250 TAB_RIGHT | TAT_TITLE ,
1251 value_to_string(sub_fs->id,
1252 factor->subfactor->indep_var)
1257 tab_hline (t, TAL_1, 2, n_cols - 1,
1258 heading_rows + 2 * n_extremities *
1259 (i * n_factors * n_subfactors
1260 + count * n_subfactors + count2 ));
1262 populate_extremities(t,3,
1263 heading_rows + 2 * n_extremities *
1264 (i * n_factors * n_subfactors
1265 + count * n_subfactors + count2),
1273 populate_extremities(t,2,
1274 heading_rows + 2 * n_extremities *
1275 (i * n_factors * n_subfactors
1276 + count * n_subfactors),
1285 populate_extremities(t, 1,
1286 heading_rows + 2 * n_extremities *
1287 (i * n_factors * n_subfactors ),
1298 /* Fill in the extremities table */
1300 populate_extremities(struct tab_table *t, int col, int row, int n)
1304 tab_text(t, col, row,
1305 TAB_RIGHT | TAT_TITLE ,
1309 tab_text(t, col, row + n ,
1310 TAB_RIGHT | TAT_TITLE ,
1314 for (i = 0; i < n ; ++i )
1316 tab_float(t, col + 1, row + i,
1320 tab_float(t, col + 1, row + i + n,
1328 /* Plot the normal and detrended normal plots for m
1329 Label the plots with factorname */
1331 np_plot(const struct metrics *m, const char *factorname)
1334 double yfirst=0, ylast=0;
1337 struct chart np_chart;
1339 /* Detrended Normal Plot */
1340 struct chart dnp_chart;
1342 const struct weighted_value *wv = m->wv;
1343 const int n_data = hsh_count(m->ordered_data) ;
1345 /* The slope and intercept of the ideal normal probability line */
1346 const double slope = 1.0 / m->stddev;
1347 const double intercept = - m->mean / m->stddev;
1349 chart_initialise(&np_chart);
1350 chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
1351 chart_write_xlabel(&np_chart, _("Observed Value"));
1352 chart_write_ylabel(&np_chart, _("Expected Normal"));
1354 chart_initialise(&dnp_chart);
1355 chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
1357 chart_write_xlabel(&dnp_chart, _("Observed Value"));
1358 chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
1360 yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1));
1361 ylast = gsl_cdf_ugaussian_Pinv (wv[n_data-1].rank / ( m->n + 1));
1364 /* Need to make sure that both the scatter plot and the ideal fit into the
1366 double x_lower = min(m->min, (yfirst - intercept) / slope) ;
1367 double x_upper = max(m->max, (ylast - intercept) / slope) ;
1368 double slack = (x_upper - x_lower) * 0.05 ;
1370 chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack,
1371 chart_rounded_tick((m->max - m->min) / 5.0));
1374 chart_write_xscale(&dnp_chart, m->min, m->max,
1375 chart_rounded_tick((m->max - m->min) / 5.0));
1379 chart_write_yscale(&np_chart, yfirst, ylast,
1380 chart_rounded_tick((ylast - yfirst)/5.0) );
1383 /* We have to cache the detrended data, beacause we need to
1384 find its limits before we can plot it */
1386 d_data = xmalloc (n_data * sizeof(double));
1387 double d_max = -DBL_MAX;
1388 double d_min = DBL_MAX;
1389 for ( i = 0 ; i < n_data; ++i )
1391 const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1));
1393 chart_datum(&np_chart, 0, wv[i].v.f, ns);
1395 d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns;
1397 if ( d_data[i] < d_min ) d_min = d_data[i];
1398 if ( d_data[i] > d_max ) d_max = d_data[i];
1401 chart_write_yscale(&dnp_chart, d_min, d_max,
1402 chart_rounded_tick((d_max - d_min) / 5.0));
1404 for ( i = 0 ; i < n_data; ++i )
1405 chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]);
1410 chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
1411 chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
1413 chart_finalise(&np_chart);
1414 chart_finalise(&dnp_chart);