1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include <gsl/gsl_cdf.h>
30 #include "dictionary.h"
38 #include "value-labels.h"
43 #include "factor_stats.h"
54 +missing=miss:pairwise/!listwise,
56 incl:include/!exclude;
57 +compare=cmp:variables/!groups;
58 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
60 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
69 static struct cmd_examine cmd;
71 static struct variable **dependent_vars;
73 static int n_dependent_vars;
78 /* The independent variable */
79 struct variable *indep_var[2];
82 /* Hash table of factor stats indexed by 2 values */
83 struct hsh_table *fstats;
85 /* The hash table after it has been crunched */
86 struct factor_statistics **fs;
92 /* Linked list of factors */
93 static struct factor *factors=0;
95 static struct metrics *totals=0;
97 /* Parse the clause specifying the factors */
98 static int examine_parse_independent_vars(struct cmd_examine *cmd);
102 /* Output functions */
103 static void show_summary(struct variable **dependent_var, int n_dep_var,
104 const struct factor *f);
106 static void show_extremes(struct variable **dependent_var,
108 const struct factor *factor,
111 static void show_descriptives(struct variable **dependent_var,
113 struct factor *factor);
116 void np_plot(const struct metrics *m, const char *factorname);
120 /* Per Split function */
121 static void run_examine(const struct casefile *cf, void *cmd_);
123 static void output_examine(void);
126 void factor_calc(struct ccase *c, int case_no,
127 double weight, int case_missing);
130 /* Function to use for testing for missing values */
131 static is_missing_func value_is_missing;
138 if ( !parse_examine(&cmd) )
141 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
142 if (cmd.incl == XMN_INCLUDE )
143 value_is_missing = is_system_missing;
145 value_is_missing = is_missing;
147 if ( cmd.st_n == SYSMIS )
150 if ( ! cmd.sbc_cinterval)
151 cmd.n_cinterval[0] = 95.0;
153 multipass_procedure_with_splits (run_examine, &cmd);
163 /* Show all the appropriate tables */
169 /* Show totals if appropriate */
170 if ( ! cmd.sbc_nototal || factors == 0 )
172 show_summary(dependent_vars, n_dependent_vars, 0);
174 if ( cmd.sbc_statistics )
176 if ( cmd.a_statistics[XMN_ST_EXTREME])
177 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
179 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
180 show_descriptives(dependent_vars, n_dependent_vars, 0);
187 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
189 for ( v = 0 ; v < n_dependent_vars; ++v )
190 np_plot(&totals[v], var_to_string(dependent_vars[v]));
193 if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
195 for ( v = 0 ; v < n_dependent_vars; ++v )
197 struct normal_curve normal;
199 normal.N = totals[v].n;
200 normal.mean = totals[v].mean;
201 normal.stddev = totals[v].stddev;
203 histogram_plot(totals[v].histogram,
204 var_to_string(dependent_vars[v]),
215 /* Show grouped statistics as appropriate */
219 show_summary(dependent_vars, n_dependent_vars, fctr);
221 if ( cmd.sbc_statistics )
223 if ( cmd.a_statistics[XMN_ST_EXTREME])
224 show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n);
226 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
227 show_descriptives(dependent_vars, n_dependent_vars, fctr);
234 struct factor_statistics **fs = fctr->fs ;
236 for ( v = 0 ; v < n_dependent_vars; ++v )
239 for ( fs = fctr->fs ; *fs ; ++fs )
243 sprintf(buf1, "%s (",
244 var_to_string(dependent_vars[v]));
246 snprintf(buf2, 100, "%s = %s",
247 var_to_string(fctr->indep_var[0]),
248 value_to_string(&(*fs)->id[0],fctr->indep_var[0]));
252 if ( fctr->indep_var[1] )
254 sprintf(buf2, "; %s = %s)",
255 var_to_string(fctr->indep_var[1]),
256 value_to_string(&(*fs)->id[1],
257 fctr->indep_var[1]));
265 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
266 np_plot(&(*fs)->m[v],buf1);
269 if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
271 struct normal_curve normal;
273 normal.N = (*fs)->m[v].n;
274 normal.mean = (*fs)->m[v].mean;
275 normal.stddev = (*fs)->m[v].stddev;
277 histogram_plot((*fs)->m[v].histogram,
281 } /* for ( fs .... */
283 } /* for ( v = 0 ..... */
294 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
296 xmn_custom_total(struct cmd_examine *p)
298 if ( p->sbc_nototal )
300 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
308 xmn_custom_nototal(struct cmd_examine *p)
312 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
321 /* Parser for the variables sub command */
323 xmn_custom_variables(struct cmd_examine *cmd )
328 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
332 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
333 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
335 free (dependent_vars);
339 assert(n_dependent_vars);
341 totals = xmalloc( sizeof(struct metrics) * n_dependent_vars);
343 if ( lex_match(T_BY))
345 return examine_parse_independent_vars(cmd);
353 /* Parse the clause specifying the factors */
355 examine_parse_independent_vars(struct cmd_examine *cmd)
358 struct factor *sf = xmalloc(sizeof(struct factor));
360 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
365 sf->indep_var[0] = parse_variable();
366 sf->indep_var[1] = 0;
373 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
377 sf->indep_var[1] = parse_variable();
382 sf->fstats = hsh_create(4,
383 (hsh_compare_func *) factor_statistics_compare,
384 (hsh_hash_func *) factor_statistics_hash,
385 (hsh_free_func *) factor_statistics_free,
393 if ( token == '.' || token == '/' )
396 return examine_parse_independent_vars(cmd);
402 void populate_descriptives(struct tab_table *t, int col, int row,
403 const struct metrics *fs);
405 void populate_extremes(struct tab_table *t, int col, int row, int n,
406 const struct metrics *m);
408 void populate_summary(struct tab_table *t, int col, int row,
409 const struct metrics *m);
414 static int bad_weight_warn = 1;
417 /* Perform calculations for the sub factors */
419 factor_calc(struct ccase *c, int case_no, double weight, int case_missing)
422 struct factor *fctr = factors;
426 union value indep_vals[2] ;
428 indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv);
430 if ( fctr->indep_var[1] )
431 indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv);
433 indep_vals[1].f = SYSMIS;
435 assert(fctr->fstats);
437 struct factor_statistics **foo = ( struct factor_statistics ** )
438 hsh_probe(fctr->fstats, (void *) &indep_vals);
443 *foo = create_factor_statistics(n_dependent_vars,
447 for ( v = 0 ; v < n_dependent_vars ; ++v )
449 metrics_precalc( &(*foo)->m[v] );
454 for ( v = 0 ; v < n_dependent_vars ; ++v )
456 const struct variable *var = dependent_vars[v];
457 const union value *val = case_data (c, var->fv);
459 if ( value_is_missing(val,var) || case_missing )
462 metrics_calc( &(*foo)->m[v], val, weight, case_no );
475 run_examine(const struct casefile *cf, void *cmd_ )
477 struct casereader *r;
481 const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
483 /* Make sure we haven't got rubbish left over from a
486 struct factor *fctr = factors;
489 struct factor *next = fctr->next;
491 hsh_clear(fctr->fstats);
500 for ( v = 0 ; v < n_dependent_vars ; ++v )
501 metrics_precalc(&totals[v]);
503 for(r = casefile_get_reader (cf);
504 casereader_read (r, &c) ;
508 const int case_no = casereader_cnum(r);
510 const double weight =
511 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
513 if ( cmd->miss == XMN_LISTWISE )
515 for ( v = 0 ; v < n_dependent_vars ; ++v )
517 const struct variable *var = dependent_vars[v];
518 const union value *val = case_data (&c, var->fv);
520 if ( value_is_missing(val,var))
526 for ( v = 0 ; v < n_dependent_vars ; ++v )
528 const struct variable *var = dependent_vars[v];
529 const union value *val = case_data (&c, var->fv);
531 if ( value_is_missing(val,var) || case_missing )
534 metrics_calc(&totals[v], val, weight, case_no );
538 factor_calc(&c, case_no, weight, case_missing);
543 for ( v = 0 ; v < n_dependent_vars ; ++v)
548 struct hsh_iterator hi;
549 struct factor_statistics *fs;
551 for ( fs = hsh_first(fctr->fstats, &hi);
553 fs = hsh_next(fctr->fstats, &hi))
555 metrics_postcalc(&fs->m[v]);
560 metrics_postcalc(&totals[v]);
564 /* Make sure that the combination of factors are complete */
569 struct hsh_iterator hi;
570 struct hsh_iterator hi0;
571 struct hsh_iterator hi1;
572 struct factor_statistics *fs;
574 struct hsh_table *idh0=0;
575 struct hsh_table *idh1=0;
579 idh0 = hsh_create(4, (hsh_compare_func *) compare_values,
580 (hsh_hash_func *) hash_value,
583 idh1 = hsh_create(4, (hsh_compare_func *) compare_values,
584 (hsh_hash_func *) hash_value,
588 for ( fs = hsh_first(fctr->fstats, &hi);
590 fs = hsh_next(fctr->fstats, &hi))
592 hsh_insert(idh0,(void *) &fs->id[0]);
593 hsh_insert(idh1,(void *) &fs->id[1]);
596 /* Ensure that the factors combination is complete */
597 for ( val0 = hsh_first(idh0, &hi0);
599 val0 = hsh_next(idh0, &hi0))
601 for ( val1 = hsh_first(idh1, &hi1);
603 val1 = hsh_next(idh1, &hi1))
605 struct factor_statistics **ffs;
610 ffs = (struct factor_statistics **)
611 hsh_probe(fctr->fstats, (void *) &key );
615 (*ffs) = create_factor_statistics (n_dependent_vars,
617 for ( i = 0 ; i < n_dependent_vars ; ++i )
618 metrics_precalc( &(*ffs)->m[i]);
626 fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats);
633 for ( v = 0 ; v < n_dependent_vars ; ++v )
634 hsh_destroy(totals[v].ordered_data);
640 show_summary(struct variable **dependent_var, int n_dep_var,
641 const struct factor *fctr)
643 static const char *subtitle[]=
651 int heading_columns ;
653 const int heading_rows = 3;
654 struct tab_table *tbl;
662 n_factors = hsh_count(fctr->fstats);
663 n_rows = n_dep_var * n_factors ;
665 if ( fctr->indep_var[1] )
674 n_rows += heading_rows;
676 n_cols = heading_columns + 6;
678 tbl = tab_create (n_cols,n_rows,0);
679 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
681 tab_dim (tbl, tab_natural_dimensions);
683 /* Outline the box */
688 n_cols - 1, n_rows - 1);
690 /* Vertical lines for the data only */
695 n_cols - 1, n_rows - 1);
698 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
699 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
700 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
702 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
705 tab_title (tbl, 0, _("Case Processing Summary"));
708 tab_joint_text(tbl, heading_columns, 0,
710 TAB_CENTER | TAT_TITLE,
713 /* Remove lines ... */
720 for ( i = 0 ; i < 3 ; ++i )
722 tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE,
725 tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
728 tab_joint_text(tbl, heading_columns + i*2 , 1,
729 heading_columns + i*2 + 1, 1,
730 TAB_CENTER | TAT_TITLE,
733 tab_box (tbl, -1, -1,
735 heading_columns + i*2, 1,
736 heading_columns + i*2 + 1, 1);
741 /* Titles for the independent variables */
744 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
745 var_to_string(fctr->indep_var[0]));
747 if ( fctr->indep_var[1] )
749 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
750 var_to_string(fctr->indep_var[1]));
756 for ( i = 0 ; i < n_dep_var ; ++i )
760 n_factors = hsh_count(fctr->fstats);
764 tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows);
767 0, i * n_factors + heading_rows,
768 TAB_LEFT | TAT_TITLE,
769 var_to_string(dependent_var[i])
774 populate_summary(tbl, heading_columns,
775 (i * n_factors) + heading_rows,
781 struct factor_statistics **fs = fctr->fs;
786 static union value prev;
788 if ( 0 != compare_values(&prev, &(*fs)->id[0],
789 fctr->indep_var[0]->width))
793 (i * n_factors ) + count +
795 TAB_LEFT | TAT_TITLE,
796 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
799 if (fctr->indep_var[1] && count > 0 )
800 tab_hline(tbl, TAL_1, 1, n_cols - 1,
801 (i * n_factors ) + count + heading_rows);
808 if ( fctr->indep_var[1])
811 (i * n_factors ) + count +
813 TAB_LEFT | TAT_TITLE,
814 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
817 populate_summary(tbl, heading_columns,
818 (i * n_factors) + count
833 populate_summary(struct tab_table *t, int col, int row,
834 const struct metrics *m)
837 const double total = m->n + m->n_missing ;
839 tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0);
840 tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0);
841 tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0);
845 tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
846 100.0 * m->n / total );
848 tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
849 100.0 * m->n_missing / total );
851 /* This seems a bit pointless !!! */
852 tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
853 100.0 * total / total );
864 show_extremes(struct variable **dependent_var, int n_dep_var,
865 const struct factor *fctr, int n_extremities)
868 int heading_columns ;
870 const int heading_rows = 1;
871 struct tab_table *tbl;
879 n_factors = hsh_count(fctr->fstats);
881 n_rows = n_dep_var * 2 * n_extremities * n_factors;
883 if ( fctr->indep_var[1] )
889 n_rows = n_dep_var * 2 * n_extremities;
892 n_rows += heading_rows;
894 heading_columns += 2;
895 n_cols = heading_columns + 2;
897 tbl = tab_create (n_cols,n_rows,0);
898 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
900 tab_dim (tbl, tab_natural_dimensions);
902 /* Outline the box, No internal lines*/
907 n_cols - 1, n_rows - 1);
909 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
911 tab_title (tbl, 0, _("Extreme Values"));
914 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1);
915 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1);
919 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
920 var_to_string(fctr->indep_var[0]));
922 if ( fctr->indep_var[1] )
923 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
924 var_to_string(fctr->indep_var[1]));
927 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
928 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
933 for ( i = 0 ; i < n_dep_var ; ++i )
937 tab_hline(tbl, TAL_1, 0, n_cols -1 ,
938 i * 2 * n_extremities * n_factors + heading_rows);
941 i * 2 * n_extremities * n_factors + heading_rows,
942 TAB_LEFT | TAT_TITLE,
943 var_to_string(dependent_var[i])
948 populate_extremes(tbl, heading_columns - 2,
949 i * 2 * n_extremities * n_factors + heading_rows,
950 n_extremities, &totals[i]);
954 struct factor_statistics **fs = fctr->fs;
959 static union value prev ;
961 const int row = heading_rows + ( 2 * n_extremities ) *
962 ( ( i * n_factors ) + count );
965 if ( 0 != compare_values(&prev, &(*fs)->id[0],
966 fctr->indep_var[0]->width))
970 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
974 TAB_LEFT | TAT_TITLE,
975 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
981 if (fctr->indep_var[1] && count > 0 )
982 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
984 if ( fctr->indep_var[1])
985 tab_text (tbl, 2, row,
986 TAB_LEFT | TAT_TITLE,
987 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
990 populate_extremes(tbl, heading_columns - 2,
1005 /* Fill in the extremities table */
1007 populate_extremes(struct tab_table *t,
1008 int col, int row, int n, const struct metrics *m)
1014 tab_text(t, col, row,
1015 TAB_RIGHT | TAT_TITLE ,
1019 tab_text(t, col, row + n ,
1020 TAB_RIGHT | TAT_TITLE ,
1025 tab_hline(t, TAL_1, col, col + 3, row + n );
1027 for (extremity = 0; extremity < n ; ++extremity )
1030 tab_float(t, col + 1, row + extremity,
1032 extremity + 1, 8, 0);
1036 tab_float(t, col + 1, row + extremity + n,
1038 extremity + 1, 8, 0);
1044 for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx )
1047 const struct weighted_value *wv = m->wvp[idx];
1048 struct case_node *cn = wv->case_nos;
1051 for (j = 0 ; j < wv->w ; ++j )
1053 if ( extremity + j >= n )
1056 tab_float(t, col + 3, row + extremity + j + n,
1060 tab_float(t, col + 2, row + extremity + j + n,
1069 extremity += wv->w ;
1074 for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
1077 const struct weighted_value *wv = m->wvp[idx];
1078 struct case_node *cn = wv->case_nos;
1080 for (j = 0 ; j < wv->w ; ++j )
1082 if ( extremity + j >= n )
1085 tab_float(t, col + 3, row + extremity + j,
1089 tab_float(t, col + 2, row + extremity + j,
1098 extremity += wv->w ;
1103 /* Show the descriptives table */
1105 show_descriptives(struct variable **dependent_var,
1107 struct factor *fctr)
1110 int heading_columns ;
1112 const int n_stat_rows = 13;
1114 const int heading_rows = 1;
1116 struct tab_table *tbl;
1123 heading_columns = 4;
1124 n_factors = hsh_count(fctr->fstats);
1126 n_rows = n_dep_var * n_stat_rows * n_factors;
1128 if ( fctr->indep_var[1] )
1129 heading_columns = 5;
1133 heading_columns = 3;
1134 n_rows = n_dep_var * n_stat_rows;
1137 n_rows += heading_rows;
1139 n_cols = heading_columns + 2;
1142 tbl = tab_create (n_cols, n_rows, 0);
1144 tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0);
1146 tab_dim (tbl, tab_natural_dimensions);
1148 /* Outline the box and have no internal lines*/
1153 n_cols - 1, n_rows - 1);
1155 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1157 tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1);
1158 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1);
1159 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1161 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
1162 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
1164 tab_title (tbl, 0, _("Descriptives"));
1167 for ( i = 0 ; i < n_dep_var ; ++i )
1169 const int row = heading_rows + i * n_stat_rows * n_factors ;
1172 tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
1175 i * n_stat_rows * n_factors + heading_rows,
1176 TAB_LEFT | TAT_TITLE,
1177 var_to_string(dependent_var[i])
1183 struct factor_statistics **fs = fctr->fs;
1186 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1187 var_to_string(fctr->indep_var[0]));
1190 if ( fctr->indep_var[1])
1191 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1192 var_to_string(fctr->indep_var[1]));
1197 static union value prev ;
1199 const int row = heading_rows + n_stat_rows *
1200 ( ( i * n_factors ) + count );
1203 if ( 0 != compare_values(&prev, &(*fs)->id[0],
1204 fctr->indep_var[0]->width))
1208 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
1212 TAB_LEFT | TAT_TITLE,
1213 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
1217 prev = (*fs)->id[0];
1219 if (fctr->indep_var[1] && count > 0 )
1220 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
1222 if ( fctr->indep_var[1])
1223 tab_text (tbl, 2, row,
1224 TAB_LEFT | TAT_TITLE,
1225 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1228 populate_descriptives(tbl, heading_columns - 2,
1240 populate_descriptives(tbl, heading_columns - 2,
1241 i * n_stat_rows * n_factors + heading_rows,
1255 /* Fill in the descriptives data */
1257 populate_descriptives(struct tab_table *tbl, int col, int row,
1258 const struct metrics *m)
1261 const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
1267 TAB_LEFT | TAT_TITLE,
1270 tab_float (tbl, col + 2,
1276 tab_float (tbl, col + 3,
1285 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1286 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
1289 tab_text (tbl, col + 1,
1291 TAB_LEFT | TAT_TITLE,
1294 tab_float (tbl, col + 2,
1297 m->mean - t * m->se_mean,
1300 tab_text (tbl, col + 1,
1302 TAB_LEFT | TAT_TITLE,
1306 tab_float (tbl, col + 2,
1309 m->mean + t * m->se_mean,
1314 TAB_LEFT | TAT_TITLE,
1315 _("5% Trimmed Mean"));
1317 tab_float (tbl, col + 2,
1325 TAB_LEFT | TAT_TITLE,
1330 TAB_LEFT | TAT_TITLE,
1333 tab_float (tbl, col + 2,
1342 TAB_LEFT | TAT_TITLE,
1343 _("Std. Deviation"));
1346 tab_float (tbl, col + 2,
1355 TAB_LEFT | TAT_TITLE,
1358 tab_float (tbl, col + 2,
1366 TAB_LEFT | TAT_TITLE,
1369 tab_float (tbl, col + 2,
1378 TAB_LEFT | TAT_TITLE,
1382 tab_float (tbl, col + 2,
1390 TAB_LEFT | TAT_TITLE,
1391 _("Interquartile Range"));
1395 TAB_LEFT | TAT_TITLE,
1399 tab_float (tbl, col + 2,
1405 /* stderr of skewness */
1406 tab_float (tbl, col + 3,
1415 TAB_LEFT | TAT_TITLE,
1419 tab_float (tbl, col + 2,
1425 /* stderr of kurtosis */
1426 tab_float (tbl, col + 3,
1436 /* Plot the normal and detrended normal plots for m
1437 Label the plots with factorname */
1439 np_plot(const struct metrics *m, const char *factorname)
1442 double yfirst=0, ylast=0;
1445 struct chart np_chart;
1447 /* Detrended Normal Plot */
1448 struct chart dnp_chart;
1450 /* The slope and intercept of the ideal normal probability line */
1451 const double slope = 1.0 / m->stddev;
1452 const double intercept = - m->mean / m->stddev;
1454 /* Cowardly refuse to plot an empty data set */
1455 if ( m->n_data == 0 )
1458 chart_initialise(&np_chart);
1459 chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
1460 chart_write_xlabel(&np_chart, _("Observed Value"));
1461 chart_write_ylabel(&np_chart, _("Expected Normal"));
1463 chart_initialise(&dnp_chart);
1464 chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
1466 chart_write_xlabel(&dnp_chart, _("Observed Value"));
1467 chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
1469 yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1));
1470 ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1));
1474 /* Need to make sure that both the scatter plot and the ideal fit into the
1476 double x_lower = min(m->min, (yfirst - intercept) / slope) ;
1477 double x_upper = max(m->max, (ylast - intercept) / slope) ;
1478 double slack = (x_upper - x_lower) * 0.05 ;
1480 chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, 5);
1482 chart_write_xscale(&dnp_chart, m->min, m->max, 5);
1486 chart_write_yscale(&np_chart, yfirst, ylast, 5);
1489 /* We have to cache the detrended data, beacause we need to
1490 find its limits before we can plot it */
1492 d_data = xmalloc (m->n_data * sizeof(double));
1493 double d_max = -DBL_MAX;
1494 double d_min = DBL_MAX;
1495 for ( i = 0 ; i < m->n_data; ++i )
1497 const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1));
1499 chart_datum(&np_chart, 0, m->wvp[i]->v.f, ns);
1501 d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns;
1503 if ( d_data[i] < d_min ) d_min = d_data[i];
1504 if ( d_data[i] > d_max ) d_max = d_data[i];
1506 chart_write_yscale(&dnp_chart, d_min, d_max, 5);
1508 for ( i = 0 ; i < m->n_data; ++i )
1509 chart_datum(&dnp_chart, 0, m->wvp[i]->v.f, d_data[i]);
1514 chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
1515 chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
1517 chart_finalise(&np_chart);
1518 chart_finalise(&dnp_chart);