1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include <gsl/gsl_cdf.h>
30 #include "dictionary.h"
38 #include "value-labels.h"
43 #include "factor_stats.h"
52 +missing=miss:pairwise/!listwise,
54 incl:include/!exclude;
55 +compare=cmp:variables/!groups;
56 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
58 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
67 static struct cmd_examine cmd;
69 static struct variable **dependent_vars;
71 static int n_dependent_vars;
76 /* The independent variable */
77 struct variable *indep_var[2];
80 /* Hash table of factor stats indexed by 2 values */
81 struct hsh_table *fstats;
83 /* The hash table after it's been crunched */
84 struct factor_statistics **fs;
90 /* Linked list of factors */
91 static struct factor *factors=0;
93 static struct metrics *totals=0;
98 struct factor *f = factors;
102 struct factor_statistics **fs = f->fs;
104 printf("Factor: %s BY %s\n",
105 var_to_string(f->indep_var[0]),
106 var_to_string(f->indep_var[1]) );
109 printf("Contains %d entries\n", hsh_count(f->fstats));
114 printf("Factor %g; %g\n", (*fs)->id[0].f, (*fs)->id[1].f);
117 printf("Factor %s; %s\n",
118 value_to_string(&(*fs)->id[0], f->indep_var[0]),
119 value_to_string(&(*fs)->id[1], f->indep_var[1]));
123 printf("Sum is %g; ",(*fs)->m[0].sum);
124 printf("N is %g; ",(*fs)->m[0].n);
125 printf("Mean is %g\n",(*fs)->m[0].mean);
137 /* Parse the clause specifying the factors */
138 static int examine_parse_independent_vars(struct cmd_examine *cmd);
142 /* Output functions */
143 static void show_summary(struct variable **dependent_var, int n_dep_var,
144 const struct factor *f);
146 static void show_extremes(struct variable **dependent_var,
148 const struct factor *factor,
151 static void show_descriptives(struct variable **dependent_var,
153 struct factor *factor);
156 void np_plot(const struct metrics *m, const char *factorname);
161 /* Per Split function */
162 static void run_examine(const struct casefile *cf, void *cmd_);
164 static void output_examine(void);
167 void factor_calc(struct ccase *c, int case_no,
168 double weight, int case_missing);
171 /* Function to use for testing for missing values */
172 static is_missing_func value_is_missing;
179 if ( !parse_examine(&cmd) )
182 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
183 if (cmd.incl == XMN_INCLUDE )
184 value_is_missing = is_system_missing;
186 value_is_missing = is_missing;
188 if ( cmd.st_n == SYSMIS )
191 if ( ! cmd.sbc_cinterval)
192 cmd.n_cinterval[0] = 95.0;
194 multipass_procedure_with_splits (run_examine, &cmd);
201 /* Show all the appropriate tables */
207 /* Show totals if appropriate */
208 if ( ! cmd.sbc_nototal )
210 show_summary(dependent_vars, n_dependent_vars, 0);
212 if ( cmd.sbc_statistics )
214 if ( cmd.a_statistics[XMN_ST_EXTREME])
215 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
217 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
218 show_descriptives(dependent_vars, n_dependent_vars, 0);
224 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
228 for ( v = 0 ; v < n_dependent_vars; ++v )
229 np_plot(&totals[v], var_to_string(dependent_vars[v]));
237 /* Show grouped statistics as appropriate */
241 show_summary(dependent_vars, n_dependent_vars, fctr);
243 if ( cmd.sbc_statistics )
245 if ( cmd.a_statistics[XMN_ST_EXTREME])
246 show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n);
248 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
249 show_descriptives(dependent_vars, n_dependent_vars, fctr);
254 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
257 for ( v = 0 ; v < n_dependent_vars; ++ v)
260 struct factor_statistics **fs = fctr->fs ;
261 for ( fs = fctr->fs ; *fs ; ++fs )
265 sprintf(buf1, "%s (",
266 var_to_string(dependent_vars[v]));
268 sprintf(buf2, "%s = %s",
269 var_to_string(fctr->indep_var[0]),
270 value_to_string(&(*fs)->id[0],fctr->indep_var[0]));
275 if ( fctr->indep_var[1] )
277 sprintf(buf2, "; %s = %s)",
278 var_to_string(fctr->indep_var[1]),
279 value_to_string(&(*fs)->id[1],
280 fctr->indep_var[1]));
288 np_plot(&(*fs)->m[v],buf1);
304 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
306 xmn_custom_total(struct cmd_examine *p)
308 if ( p->sbc_nototal )
310 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
318 xmn_custom_nototal(struct cmd_examine *p)
322 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
331 /* Parser for the variables sub command */
333 xmn_custom_variables(struct cmd_examine *cmd )
338 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
342 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
343 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
345 free (dependent_vars);
349 assert(n_dependent_vars);
351 totals = xmalloc( sizeof(struct metrics) * n_dependent_vars);
353 if ( lex_match(T_BY))
355 return examine_parse_independent_vars(cmd);
363 /* Parse the clause specifying the factors */
365 examine_parse_independent_vars(struct cmd_examine *cmd)
368 struct factor *sf = xmalloc(sizeof(struct factor));
370 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
375 sf->indep_var[0] = parse_variable();
376 sf->indep_var[1] = 0;
383 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
387 sf->indep_var[1] = parse_variable();
392 sf->fstats = hsh_create(4,
393 (hsh_compare_func *) factor_statistics_compare,
394 (hsh_hash_func *) factor_statistics_hash,
395 (hsh_free_func *) factor_statistics_free,
403 if ( token == '.' || token == '/' )
406 return examine_parse_independent_vars(cmd);
412 void populate_descriptives(struct tab_table *t, int col, int row,
413 const struct metrics *fs);
415 void populate_extremes(struct tab_table *t, int col, int row, int n,
416 const struct metrics *m);
418 void populate_summary(struct tab_table *t, int col, int row,
419 const struct metrics *m);
424 static int bad_weight_warn = 1;
427 /* Perform calculations for the sub factors */
429 factor_calc(struct ccase *c, int case_no, double weight, int case_missing)
432 struct factor *fctr = factors;
436 union value indep_vals[2] ;
438 indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv);
440 if ( fctr->indep_var[1] )
441 indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv);
443 indep_vals[1].f = SYSMIS;
445 assert(fctr->fstats);
447 struct factor_statistics **foo = ( struct factor_statistics ** )
448 hsh_probe(fctr->fstats, (void *) &indep_vals);
453 *foo = create_factor_statistics(n_dependent_vars,
457 for ( v = 0 ; v < n_dependent_vars ; ++v )
459 metrics_precalc( &(*foo)->m[v] );
464 for ( v = 0 ; v < n_dependent_vars ; ++v )
466 const struct variable *var = dependent_vars[v];
467 const union value *val = case_data (c, var->fv);
469 if ( value_is_missing(val,var) || case_missing )
472 metrics_calc( &(*foo)->m[v], val, weight, case_no );
485 run_examine(const struct casefile *cf, void *cmd_ )
487 struct casereader *r;
491 const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
493 /* Make sure we haven't got rubbish left over from a
496 struct factor *fctr = factors;
499 struct factor *next = fctr->next;
501 hsh_clear(fctr->fstats);
510 for ( v = 0 ; v < n_dependent_vars ; ++v )
511 metrics_precalc(&totals[v]);
513 for(r = casefile_get_reader (cf);
514 casereader_read (r, &c) ;
518 const int case_no = casereader_cnum(r);
520 const double weight =
521 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
523 if ( cmd->miss == XMN_LISTWISE )
525 for ( v = 0 ; v < n_dependent_vars ; ++v )
527 const struct variable *var = dependent_vars[v];
528 const union value *val = case_data (&c, var->fv);
530 if ( value_is_missing(val,var))
536 for ( v = 0 ; v < n_dependent_vars ; ++v )
538 const struct variable *var = dependent_vars[v];
539 const union value *val = case_data (&c, var->fv);
541 if ( value_is_missing(val,var) || case_missing )
544 metrics_calc(&totals[v], val, weight, case_no );
548 factor_calc(&c, case_no, weight, case_missing);
553 for ( v = 0 ; v < n_dependent_vars ; ++v)
558 struct hsh_iterator hi;
559 struct factor_statistics *fs;
561 for ( fs = hsh_first(fctr->fstats, &hi);
563 fs = hsh_next(fctr->fstats, &hi))
565 metrics_postcalc(&fs->m[v]);
570 metrics_postcalc(&totals[v]);
574 /* Make sure that the combination of factors are complete */
579 struct hsh_iterator hi;
580 struct hsh_iterator hi0;
581 struct hsh_iterator hi1;
582 struct factor_statistics *fs;
584 struct hsh_table *idh0=0;
585 struct hsh_table *idh1=0;
589 idh0 = hsh_create(4, (hsh_compare_func *) compare_values,
590 (hsh_hash_func *) hash_value,
593 idh1 = hsh_create(4, (hsh_compare_func *) compare_values,
594 (hsh_hash_func *) hash_value,
598 for ( fs = hsh_first(fctr->fstats, &hi);
600 fs = hsh_next(fctr->fstats, &hi))
602 hsh_insert(idh0,(void *) &fs->id[0]);
603 hsh_insert(idh1,(void *) &fs->id[1]);
606 /* Ensure that the factors combination is complete */
607 for ( val0 = hsh_first(idh0, &hi0);
609 val0 = hsh_next(idh0, &hi0))
611 for ( val1 = hsh_first(idh1, &hi1);
613 val1 = hsh_next(idh1, &hi1))
615 struct factor_statistics **ffs;
620 ffs = (struct factor_statistics **)
621 hsh_probe(fctr->fstats, (void *) &key );
625 (*ffs) = create_factor_statistics (n_dependent_vars,
627 for ( i = 0 ; i < n_dependent_vars ; ++i )
628 metrics_precalc( &(*ffs)->m[i]);
636 fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats);
651 show_summary(struct variable **dependent_var, int n_dep_var,
652 const struct factor *fctr)
654 static const char *subtitle[]=
662 int heading_columns ;
664 const int heading_rows = 3;
665 struct tab_table *tbl;
673 n_factors = hsh_count(fctr->fstats);
674 n_rows = n_dep_var * n_factors ;
676 if ( fctr->indep_var[1] )
685 n_rows += heading_rows;
687 n_cols = heading_columns + 6;
689 tbl = tab_create (n_cols,n_rows,0);
690 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
692 tab_dim (tbl, tab_natural_dimensions);
694 /* Outline the box */
699 n_cols - 1, n_rows - 1);
701 /* Vertical lines for the data only */
706 n_cols - 1, n_rows - 1);
709 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
710 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
711 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
713 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
716 tab_title (tbl, 0, _("Case Processing Summary"));
719 tab_joint_text(tbl, heading_columns, 0,
721 TAB_CENTER | TAT_TITLE,
724 /* Remove lines ... */
731 for ( i = 0 ; i < 3 ; ++i )
733 tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE,
736 tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
739 tab_joint_text(tbl, heading_columns + i*2 , 1,
740 heading_columns + i*2 + 1, 1,
741 TAB_CENTER | TAT_TITLE,
744 tab_box (tbl, -1, -1,
746 heading_columns + i*2, 1,
747 heading_columns + i*2 + 1, 1);
752 /* Titles for the independent variables */
755 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
756 var_to_string(fctr->indep_var[0]));
758 if ( fctr->indep_var[1] )
760 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
761 var_to_string(fctr->indep_var[1]));
767 for ( i = 0 ; i < n_dep_var ; ++i )
771 n_factors = hsh_count(fctr->fstats);
775 tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows);
778 0, i * n_factors + heading_rows,
779 TAB_LEFT | TAT_TITLE,
780 var_to_string(dependent_var[i])
785 populate_summary(tbl, heading_columns,
786 (i * n_factors) + heading_rows,
792 struct factor_statistics **fs = fctr->fs;
797 static union value prev;
799 if ( 0 != compare_values(&prev, &(*fs)->id[0],
800 fctr->indep_var[0]->width))
804 (i * n_factors ) + count +
806 TAB_LEFT | TAT_TITLE,
807 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
810 if (fctr->indep_var[1] && count > 0 )
811 tab_hline(tbl, TAL_1, 1, n_cols - 1,
812 (i * n_factors ) + count + heading_rows);
819 if ( fctr->indep_var[1])
822 (i * n_factors ) + count +
824 TAB_LEFT | TAT_TITLE,
825 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
828 populate_summary(tbl, heading_columns,
829 (i * n_factors) + count
844 populate_summary(struct tab_table *t, int col, int row,
845 const struct metrics *m)
848 const double total = m->n + m->n_missing ;
850 tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0);
851 tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0);
852 tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0);
856 tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
857 100.0 * m->n / total );
859 tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
860 100.0 * m->n_missing / total );
862 /* This seems a bit pointless !!! */
863 tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
864 100.0 * total / total );
875 show_extremes(struct variable **dependent_var, int n_dep_var,
876 const struct factor *fctr, int n_extremities)
879 int heading_columns ;
881 const int heading_rows = 1;
882 struct tab_table *tbl;
890 n_factors = hsh_count(fctr->fstats);
892 n_rows = n_dep_var * 2 * n_extremities * n_factors;
894 if ( fctr->indep_var[1] )
900 n_rows = n_dep_var * 2 * n_extremities;
903 n_rows += heading_rows;
905 heading_columns += 2;
906 n_cols = heading_columns + 2;
908 tbl = tab_create (n_cols,n_rows,0);
909 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
911 tab_dim (tbl, tab_natural_dimensions);
913 /* Outline the box, No internal lines*/
918 n_cols - 1, n_rows - 1);
920 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
922 tab_title (tbl, 0, _("Extreme Values"));
925 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1);
926 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1);
930 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
931 var_to_string(fctr->indep_var[0]));
933 if ( fctr->indep_var[1] )
934 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
935 var_to_string(fctr->indep_var[1]));
938 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
939 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
944 for ( i = 0 ; i < n_dep_var ; ++i )
948 tab_hline(tbl, TAL_1, 0, n_cols -1 ,
949 i * 2 * n_extremities * n_factors + heading_rows);
952 i * 2 * n_extremities * n_factors + heading_rows,
953 TAB_LEFT | TAT_TITLE,
954 var_to_string(dependent_var[i])
959 populate_extremes(tbl, heading_columns - 2,
960 i * 2 * n_extremities * n_factors + heading_rows,
961 n_extremities, &totals[i]);
965 struct factor_statistics **fs = fctr->fs;
970 static union value prev ;
972 const int row = heading_rows + ( 2 * n_extremities ) *
973 ( ( i * n_factors ) + count );
976 if ( 0 != compare_values(&prev, &(*fs)->id[0],
977 fctr->indep_var[0]->width))
981 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
985 TAB_LEFT | TAT_TITLE,
986 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
992 if (fctr->indep_var[1] && count > 0 )
993 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
995 if ( fctr->indep_var[1])
996 tab_text (tbl, 2, row,
997 TAB_LEFT | TAT_TITLE,
998 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1001 populate_extremes(tbl, heading_columns - 2,
1016 /* Fill in the extremities table */
1018 populate_extremes(struct tab_table *t,
1019 int col, int row, int n, const struct metrics *m)
1024 const int n_data = hsh_count(m->ordered_data);
1026 tab_text(t, col, row,
1027 TAB_RIGHT | TAT_TITLE ,
1031 tab_text(t, col, row + n ,
1032 TAB_RIGHT | TAT_TITLE ,
1037 tab_hline(t, TAL_1, col, col + 3, row + n );
1039 for (extremity = 0; extremity < n ; ++extremity )
1042 tab_float(t, col + 1, row + extremity,
1044 extremity + 1, 8, 0);
1048 tab_float(t, col + 1, row + extremity + n,
1050 extremity + 1, 8, 0);
1056 for (idx = 0, extremity = 0; extremity < n && idx < n_data ; ++idx )
1059 const struct weighted_value *wv = &m->wv[idx];
1060 struct case_node *cn = wv->case_nos;
1063 for (j = 0 ; j < wv->w ; ++j )
1065 if ( extremity + j >= n )
1068 tab_float(t, col + 3, row + extremity + j + n,
1072 tab_float(t, col + 2, row + extremity + j + n,
1081 extremity += wv->w ;
1086 for (idx = n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
1089 const struct weighted_value *wv = &m->wv[idx];
1090 struct case_node *cn = wv->case_nos;
1092 for (j = 0 ; j < wv->w ; ++j )
1094 if ( extremity + j >= n )
1097 tab_float(t, col + 3, row + extremity + j,
1101 tab_float(t, col + 2, row + extremity + j,
1110 extremity += wv->w ;
1115 /* Show the descriptives table */
1117 show_descriptives(struct variable **dependent_var,
1119 struct factor *fctr)
1122 int heading_columns ;
1124 const int n_stat_rows = 13;
1126 const int heading_rows = 1;
1128 struct tab_table *tbl;
1135 heading_columns = 4;
1136 n_factors = hsh_count(fctr->fstats);
1138 n_rows = n_dep_var * n_stat_rows * n_factors;
1140 if ( fctr->indep_var[1] )
1141 heading_columns = 5;
1145 heading_columns = 3;
1146 n_rows = n_dep_var * n_stat_rows;
1149 n_rows += heading_rows;
1151 n_cols = heading_columns + 2;
1154 tbl = tab_create (n_cols, n_rows, 0);
1156 tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0);
1158 tab_dim (tbl, tab_natural_dimensions);
1160 /* Outline the box and have no internal lines*/
1165 n_cols - 1, n_rows - 1);
1167 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1169 tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1);
1170 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1);
1171 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1173 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
1174 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
1176 tab_title (tbl, 0, _("Descriptives"));
1179 for ( i = 0 ; i < n_dep_var ; ++i )
1181 const int row = heading_rows + i * n_stat_rows * n_factors ;
1184 tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
1187 i * n_stat_rows * n_factors + heading_rows,
1188 TAB_LEFT | TAT_TITLE,
1189 var_to_string(dependent_var[i])
1195 struct factor_statistics **fs = fctr->fs;
1198 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1199 var_to_string(fctr->indep_var[0]));
1202 if ( fctr->indep_var[1])
1203 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1204 var_to_string(fctr->indep_var[1]));
1209 static union value prev ;
1211 const int row = heading_rows + n_stat_rows *
1212 ( ( i * n_factors ) + count );
1215 if ( 0 != compare_values(&prev, &(*fs)->id[0],
1216 fctr->indep_var[0]->width))
1220 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
1224 TAB_LEFT | TAT_TITLE,
1225 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
1229 prev = (*fs)->id[0];
1231 if (fctr->indep_var[1] && count > 0 )
1232 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
1234 if ( fctr->indep_var[1])
1235 tab_text (tbl, 2, row,
1236 TAB_LEFT | TAT_TITLE,
1237 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1240 populate_descriptives(tbl, heading_columns - 2,
1252 populate_descriptives(tbl, heading_columns - 2,
1253 i * n_stat_rows * n_factors + heading_rows,
1267 /* Fill in the descriptives data */
1269 populate_descriptives(struct tab_table *tbl, int col, int row,
1270 const struct metrics *m)
1273 const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
1279 TAB_LEFT | TAT_TITLE,
1282 tab_float (tbl, col + 2,
1288 tab_float (tbl, col + 3,
1297 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1298 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
1301 tab_text (tbl, col + 1,
1303 TAB_LEFT | TAT_TITLE,
1306 tab_float (tbl, col + 2,
1309 m->mean - t * m->stderr,
1312 tab_text (tbl, col + 1,
1314 TAB_LEFT | TAT_TITLE,
1318 tab_float (tbl, col + 2,
1321 m->mean + t * m->stderr,
1326 TAB_LEFT | TAT_TITLE,
1327 _("5% Trimmed Mean"));
1329 tab_float (tbl, col + 2,
1337 TAB_LEFT | TAT_TITLE,
1342 TAB_LEFT | TAT_TITLE,
1345 tab_float (tbl, col + 2,
1354 TAB_LEFT | TAT_TITLE,
1355 _("Std. Deviation"));
1358 tab_float (tbl, col + 2,
1367 TAB_LEFT | TAT_TITLE,
1370 tab_float (tbl, col + 2,
1378 TAB_LEFT | TAT_TITLE,
1381 tab_float (tbl, col + 2,
1390 TAB_LEFT | TAT_TITLE,
1394 tab_float (tbl, col + 2,
1402 TAB_LEFT | TAT_TITLE,
1403 _("Interquartile Range"));
1407 TAB_LEFT | TAT_TITLE,
1412 TAB_LEFT | TAT_TITLE,
1420 /* Plot the normal and detrended normal plots for m
1421 Label the plots with factorname */
1423 np_plot(const struct metrics *m, const char *factorname)
1426 double yfirst=0, ylast=0;
1429 struct chart np_chart;
1431 /* Detrended Normal Plot */
1432 struct chart dnp_chart;
1434 const struct weighted_value *wv = m->wv;
1435 const int n_data = hsh_count(m->ordered_data) ;
1437 /* The slope and intercept of the ideal normal probability line */
1438 const double slope = 1.0 / m->stddev;
1439 const double intercept = - m->mean / m->stddev;
1441 /* Cowardly refuse to plot an empty data set */
1445 chart_initialise(&np_chart);
1446 chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
1447 chart_write_xlabel(&np_chart, _("Observed Value"));
1448 chart_write_ylabel(&np_chart, _("Expected Normal"));
1450 chart_initialise(&dnp_chart);
1451 chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
1453 chart_write_xlabel(&dnp_chart, _("Observed Value"));
1454 chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
1456 yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1));
1457 ylast = gsl_cdf_ugaussian_Pinv (wv[n_data-1].rank / ( m->n + 1));
1460 /* Need to make sure that both the scatter plot and the ideal fit into the
1462 double x_lower = min(m->min, (yfirst - intercept) / slope) ;
1463 double x_upper = max(m->max, (ylast - intercept) / slope) ;
1464 double slack = (x_upper - x_lower) * 0.05 ;
1466 chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack,
1467 chart_rounded_tick((m->max - m->min) / 5.0));
1470 chart_write_xscale(&dnp_chart, m->min, m->max,
1471 chart_rounded_tick((m->max - m->min) / 5.0));
1475 chart_write_yscale(&np_chart, yfirst, ylast,
1476 chart_rounded_tick((ylast - yfirst)/5.0) );
1479 /* We have to cache the detrended data, beacause we need to
1480 find its limits before we can plot it */
1482 d_data = xmalloc (n_data * sizeof(double));
1483 double d_max = -DBL_MAX;
1484 double d_min = DBL_MAX;
1485 for ( i = 0 ; i < n_data; ++i )
1487 const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1));
1489 chart_datum(&np_chart, 0, wv[i].v.f, ns);
1491 d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns;
1493 if ( d_data[i] < d_min ) d_min = d_data[i];
1494 if ( d_data[i] > d_max ) d_max = d_data[i];
1497 chart_write_yscale(&dnp_chart, d_min, d_max,
1498 chart_rounded_tick((d_max - d_min) / 5.0));
1500 for ( i = 0 ; i < n_data; ++i )
1501 chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]);
1506 chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
1507 chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
1509 chart_finalise(&np_chart);
1510 chart_finalise(&dnp_chart);