1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include <gsl/gsl_cdf.h>
30 #include "dictionary.h"
38 #include "value-labels.h"
43 #include "factor_stats.h"
52 +missing=miss:pairwise/!listwise,
54 incl:include/!exclude;
55 +compare=cmp:variables/!groups;
56 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
58 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
67 static struct cmd_examine cmd;
69 static struct variable **dependent_vars;
71 static int n_dependent_vars;
76 /* The independent variable */
77 struct variable *indep_var[2];
80 /* Hash table of factor stats indexed by 2 values */
81 struct hsh_table *fstats;
83 /* The hash table after it has been crunched */
84 struct factor_statistics **fs;
90 /* Linked list of factors */
91 static struct factor *factors=0;
93 static struct metrics *totals=0;
98 struct factor *f = factors;
102 struct factor_statistics **fs = f->fs;
104 printf("Factor: %s BY %s\n",
105 var_to_string(f->indep_var[0]),
106 var_to_string(f->indep_var[1]) );
109 printf("Contains %d entries\n", hsh_count(f->fstats));
114 printf("Factor %g; %g\n", (*fs)->id[0].f, (*fs)->id[1].f);
117 printf("Factor %s; %s\n",
118 value_to_string(&(*fs)->id[0], f->indep_var[0]),
119 value_to_string(&(*fs)->id[1], f->indep_var[1]));
123 printf("Sum is %g; ",(*fs)->m[0].sum);
124 printf("N is %g; ",(*fs)->m[0].n);
125 printf("Mean is %g\n",(*fs)->m[0].mean);
137 /* Parse the clause specifying the factors */
138 static int examine_parse_independent_vars(struct cmd_examine *cmd);
142 /* Output functions */
143 static void show_summary(struct variable **dependent_var, int n_dep_var,
144 const struct factor *f);
146 static void show_extremes(struct variable **dependent_var,
148 const struct factor *factor,
151 static void show_descriptives(struct variable **dependent_var,
153 struct factor *factor);
156 void np_plot(const struct metrics *m, const char *factorname);
161 /* Per Split function */
162 static void run_examine(const struct casefile *cf, void *cmd_);
164 static void output_examine(void);
167 void factor_calc(struct ccase *c, int case_no,
168 double weight, int case_missing);
171 /* Function to use for testing for missing values */
172 static is_missing_func value_is_missing;
179 if ( !parse_examine(&cmd) )
182 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
183 if (cmd.incl == XMN_INCLUDE )
184 value_is_missing = is_system_missing;
186 value_is_missing = is_missing;
188 if ( cmd.st_n == SYSMIS )
191 if ( ! cmd.sbc_cinterval)
192 cmd.n_cinterval[0] = 95.0;
194 multipass_procedure_with_splits (run_examine, &cmd);
204 /* Show all the appropriate tables */
210 /* Show totals if appropriate */
211 if ( ! cmd.sbc_nototal || factors == 0 )
213 show_summary(dependent_vars, n_dependent_vars, 0);
215 if ( cmd.sbc_statistics )
217 if ( cmd.a_statistics[XMN_ST_EXTREME])
218 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
220 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
221 show_descriptives(dependent_vars, n_dependent_vars, 0);
227 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
231 for ( v = 0 ; v < n_dependent_vars; ++v )
232 np_plot(&totals[v], var_to_string(dependent_vars[v]));
240 /* Show grouped statistics as appropriate */
244 show_summary(dependent_vars, n_dependent_vars, fctr);
246 if ( cmd.sbc_statistics )
248 if ( cmd.a_statistics[XMN_ST_EXTREME])
249 show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n);
251 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
252 show_descriptives(dependent_vars, n_dependent_vars, fctr);
257 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
260 for ( v = 0 ; v < n_dependent_vars; ++ v)
263 struct factor_statistics **fs = fctr->fs ;
264 for ( fs = fctr->fs ; *fs ; ++fs )
268 sprintf(buf1, "%s (",
269 var_to_string(dependent_vars[v]));
271 sprintf(buf2, "%s = %s",
272 var_to_string(fctr->indep_var[0]),
273 value_to_string(&(*fs)->id[0],fctr->indep_var[0]));
278 if ( fctr->indep_var[1] )
280 sprintf(buf2, "; %s = %s)",
281 var_to_string(fctr->indep_var[1]),
282 value_to_string(&(*fs)->id[1],
283 fctr->indep_var[1]));
291 np_plot(&(*fs)->m[v],buf1);
307 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
309 xmn_custom_total(struct cmd_examine *p)
311 if ( p->sbc_nototal )
313 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
321 xmn_custom_nototal(struct cmd_examine *p)
325 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
334 /* Parser for the variables sub command */
336 xmn_custom_variables(struct cmd_examine *cmd )
341 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
345 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
346 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
348 free (dependent_vars);
352 assert(n_dependent_vars);
354 totals = xmalloc( sizeof(struct metrics) * n_dependent_vars);
356 if ( lex_match(T_BY))
358 return examine_parse_independent_vars(cmd);
366 /* Parse the clause specifying the factors */
368 examine_parse_independent_vars(struct cmd_examine *cmd)
371 struct factor *sf = xmalloc(sizeof(struct factor));
373 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
378 sf->indep_var[0] = parse_variable();
379 sf->indep_var[1] = 0;
386 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
390 sf->indep_var[1] = parse_variable();
395 sf->fstats = hsh_create(4,
396 (hsh_compare_func *) factor_statistics_compare,
397 (hsh_hash_func *) factor_statistics_hash,
398 (hsh_free_func *) factor_statistics_free,
406 if ( token == '.' || token == '/' )
409 return examine_parse_independent_vars(cmd);
415 void populate_descriptives(struct tab_table *t, int col, int row,
416 const struct metrics *fs);
418 void populate_extremes(struct tab_table *t, int col, int row, int n,
419 const struct metrics *m);
421 void populate_summary(struct tab_table *t, int col, int row,
422 const struct metrics *m);
427 static int bad_weight_warn = 1;
430 /* Perform calculations for the sub factors */
432 factor_calc(struct ccase *c, int case_no, double weight, int case_missing)
435 struct factor *fctr = factors;
439 union value indep_vals[2] ;
441 indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv);
443 if ( fctr->indep_var[1] )
444 indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv);
446 indep_vals[1].f = SYSMIS;
448 assert(fctr->fstats);
450 struct factor_statistics **foo = ( struct factor_statistics ** )
451 hsh_probe(fctr->fstats, (void *) &indep_vals);
456 *foo = create_factor_statistics(n_dependent_vars,
460 for ( v = 0 ; v < n_dependent_vars ; ++v )
462 metrics_precalc( &(*foo)->m[v] );
467 for ( v = 0 ; v < n_dependent_vars ; ++v )
469 const struct variable *var = dependent_vars[v];
470 const union value *val = case_data (c, var->fv);
472 if ( value_is_missing(val,var) || case_missing )
475 metrics_calc( &(*foo)->m[v], val, weight, case_no );
488 run_examine(const struct casefile *cf, void *cmd_ )
490 struct casereader *r;
494 const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
496 /* Make sure we haven't got rubbish left over from a
499 struct factor *fctr = factors;
502 struct factor *next = fctr->next;
504 hsh_clear(fctr->fstats);
513 for ( v = 0 ; v < n_dependent_vars ; ++v )
514 metrics_precalc(&totals[v]);
516 for(r = casefile_get_reader (cf);
517 casereader_read (r, &c) ;
521 const int case_no = casereader_cnum(r);
523 const double weight =
524 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
526 if ( cmd->miss == XMN_LISTWISE )
528 for ( v = 0 ; v < n_dependent_vars ; ++v )
530 const struct variable *var = dependent_vars[v];
531 const union value *val = case_data (&c, var->fv);
533 if ( value_is_missing(val,var))
539 for ( v = 0 ; v < n_dependent_vars ; ++v )
541 const struct variable *var = dependent_vars[v];
542 const union value *val = case_data (&c, var->fv);
544 if ( value_is_missing(val,var) || case_missing )
547 metrics_calc(&totals[v], val, weight, case_no );
551 factor_calc(&c, case_no, weight, case_missing);
556 for ( v = 0 ; v < n_dependent_vars ; ++v)
561 struct hsh_iterator hi;
562 struct factor_statistics *fs;
564 for ( fs = hsh_first(fctr->fstats, &hi);
566 fs = hsh_next(fctr->fstats, &hi))
568 metrics_postcalc(&fs->m[v]);
573 metrics_postcalc(&totals[v]);
577 /* Make sure that the combination of factors are complete */
582 struct hsh_iterator hi;
583 struct hsh_iterator hi0;
584 struct hsh_iterator hi1;
585 struct factor_statistics *fs;
587 struct hsh_table *idh0=0;
588 struct hsh_table *idh1=0;
592 idh0 = hsh_create(4, (hsh_compare_func *) compare_values,
593 (hsh_hash_func *) hash_value,
596 idh1 = hsh_create(4, (hsh_compare_func *) compare_values,
597 (hsh_hash_func *) hash_value,
601 for ( fs = hsh_first(fctr->fstats, &hi);
603 fs = hsh_next(fctr->fstats, &hi))
605 hsh_insert(idh0,(void *) &fs->id[0]);
606 hsh_insert(idh1,(void *) &fs->id[1]);
609 /* Ensure that the factors combination is complete */
610 for ( val0 = hsh_first(idh0, &hi0);
612 val0 = hsh_next(idh0, &hi0))
614 for ( val1 = hsh_first(idh1, &hi1);
616 val1 = hsh_next(idh1, &hi1))
618 struct factor_statistics **ffs;
623 ffs = (struct factor_statistics **)
624 hsh_probe(fctr->fstats, (void *) &key );
628 (*ffs) = create_factor_statistics (n_dependent_vars,
630 for ( i = 0 ; i < n_dependent_vars ; ++i )
631 metrics_precalc( &(*ffs)->m[i]);
639 fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats);
646 for ( v = 0 ; v < n_dependent_vars ; ++v )
647 hsh_destroy(totals[v].ordered_data);
653 show_summary(struct variable **dependent_var, int n_dep_var,
654 const struct factor *fctr)
656 static const char *subtitle[]=
664 int heading_columns ;
666 const int heading_rows = 3;
667 struct tab_table *tbl;
675 n_factors = hsh_count(fctr->fstats);
676 n_rows = n_dep_var * n_factors ;
678 if ( fctr->indep_var[1] )
687 n_rows += heading_rows;
689 n_cols = heading_columns + 6;
691 tbl = tab_create (n_cols,n_rows,0);
692 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
694 tab_dim (tbl, tab_natural_dimensions);
696 /* Outline the box */
701 n_cols - 1, n_rows - 1);
703 /* Vertical lines for the data only */
708 n_cols - 1, n_rows - 1);
711 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
712 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
713 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
715 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
718 tab_title (tbl, 0, _("Case Processing Summary"));
721 tab_joint_text(tbl, heading_columns, 0,
723 TAB_CENTER | TAT_TITLE,
726 /* Remove lines ... */
733 for ( i = 0 ; i < 3 ; ++i )
735 tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE,
738 tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
741 tab_joint_text(tbl, heading_columns + i*2 , 1,
742 heading_columns + i*2 + 1, 1,
743 TAB_CENTER | TAT_TITLE,
746 tab_box (tbl, -1, -1,
748 heading_columns + i*2, 1,
749 heading_columns + i*2 + 1, 1);
754 /* Titles for the independent variables */
757 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
758 var_to_string(fctr->indep_var[0]));
760 if ( fctr->indep_var[1] )
762 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
763 var_to_string(fctr->indep_var[1]));
769 for ( i = 0 ; i < n_dep_var ; ++i )
773 n_factors = hsh_count(fctr->fstats);
777 tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows);
780 0, i * n_factors + heading_rows,
781 TAB_LEFT | TAT_TITLE,
782 var_to_string(dependent_var[i])
787 populate_summary(tbl, heading_columns,
788 (i * n_factors) + heading_rows,
794 struct factor_statistics **fs = fctr->fs;
799 static union value prev;
801 if ( 0 != compare_values(&prev, &(*fs)->id[0],
802 fctr->indep_var[0]->width))
806 (i * n_factors ) + count +
808 TAB_LEFT | TAT_TITLE,
809 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
812 if (fctr->indep_var[1] && count > 0 )
813 tab_hline(tbl, TAL_1, 1, n_cols - 1,
814 (i * n_factors ) + count + heading_rows);
821 if ( fctr->indep_var[1])
824 (i * n_factors ) + count +
826 TAB_LEFT | TAT_TITLE,
827 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
830 populate_summary(tbl, heading_columns,
831 (i * n_factors) + count
846 populate_summary(struct tab_table *t, int col, int row,
847 const struct metrics *m)
850 const double total = m->n + m->n_missing ;
852 tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0);
853 tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0);
854 tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0);
858 tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
859 100.0 * m->n / total );
861 tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
862 100.0 * m->n_missing / total );
864 /* This seems a bit pointless !!! */
865 tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
866 100.0 * total / total );
877 show_extremes(struct variable **dependent_var, int n_dep_var,
878 const struct factor *fctr, int n_extremities)
881 int heading_columns ;
883 const int heading_rows = 1;
884 struct tab_table *tbl;
892 n_factors = hsh_count(fctr->fstats);
894 n_rows = n_dep_var * 2 * n_extremities * n_factors;
896 if ( fctr->indep_var[1] )
902 n_rows = n_dep_var * 2 * n_extremities;
905 n_rows += heading_rows;
907 heading_columns += 2;
908 n_cols = heading_columns + 2;
910 tbl = tab_create (n_cols,n_rows,0);
911 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
913 tab_dim (tbl, tab_natural_dimensions);
915 /* Outline the box, No internal lines*/
920 n_cols - 1, n_rows - 1);
922 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
924 tab_title (tbl, 0, _("Extreme Values"));
927 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1);
928 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1);
932 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
933 var_to_string(fctr->indep_var[0]));
935 if ( fctr->indep_var[1] )
936 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
937 var_to_string(fctr->indep_var[1]));
940 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
941 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
946 for ( i = 0 ; i < n_dep_var ; ++i )
950 tab_hline(tbl, TAL_1, 0, n_cols -1 ,
951 i * 2 * n_extremities * n_factors + heading_rows);
954 i * 2 * n_extremities * n_factors + heading_rows,
955 TAB_LEFT | TAT_TITLE,
956 var_to_string(dependent_var[i])
961 populate_extremes(tbl, heading_columns - 2,
962 i * 2 * n_extremities * n_factors + heading_rows,
963 n_extremities, &totals[i]);
967 struct factor_statistics **fs = fctr->fs;
972 static union value prev ;
974 const int row = heading_rows + ( 2 * n_extremities ) *
975 ( ( i * n_factors ) + count );
978 if ( 0 != compare_values(&prev, &(*fs)->id[0],
979 fctr->indep_var[0]->width))
983 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
987 TAB_LEFT | TAT_TITLE,
988 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
994 if (fctr->indep_var[1] && count > 0 )
995 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
997 if ( fctr->indep_var[1])
998 tab_text (tbl, 2, row,
999 TAB_LEFT | TAT_TITLE,
1000 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1003 populate_extremes(tbl, heading_columns - 2,
1018 /* Fill in the extremities table */
1020 populate_extremes(struct tab_table *t,
1021 int col, int row, int n, const struct metrics *m)
1027 tab_text(t, col, row,
1028 TAB_RIGHT | TAT_TITLE ,
1032 tab_text(t, col, row + n ,
1033 TAB_RIGHT | TAT_TITLE ,
1038 tab_hline(t, TAL_1, col, col + 3, row + n );
1040 for (extremity = 0; extremity < n ; ++extremity )
1043 tab_float(t, col + 1, row + extremity,
1045 extremity + 1, 8, 0);
1049 tab_float(t, col + 1, row + extremity + n,
1051 extremity + 1, 8, 0);
1057 for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx )
1060 const struct weighted_value *wv = m->wvp[idx];
1061 struct case_node *cn = wv->case_nos;
1064 for (j = 0 ; j < wv->w ; ++j )
1066 if ( extremity + j >= n )
1069 tab_float(t, col + 3, row + extremity + j + n,
1073 tab_float(t, col + 2, row + extremity + j + n,
1082 extremity += wv->w ;
1087 for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
1090 const struct weighted_value *wv = m->wvp[idx];
1091 struct case_node *cn = wv->case_nos;
1093 for (j = 0 ; j < wv->w ; ++j )
1095 if ( extremity + j >= n )
1098 tab_float(t, col + 3, row + extremity + j,
1102 tab_float(t, col + 2, row + extremity + j,
1111 extremity += wv->w ;
1116 /* Show the descriptives table */
1118 show_descriptives(struct variable **dependent_var,
1120 struct factor *fctr)
1123 int heading_columns ;
1125 const int n_stat_rows = 13;
1127 const int heading_rows = 1;
1129 struct tab_table *tbl;
1136 heading_columns = 4;
1137 n_factors = hsh_count(fctr->fstats);
1139 n_rows = n_dep_var * n_stat_rows * n_factors;
1141 if ( fctr->indep_var[1] )
1142 heading_columns = 5;
1146 heading_columns = 3;
1147 n_rows = n_dep_var * n_stat_rows;
1150 n_rows += heading_rows;
1152 n_cols = heading_columns + 2;
1155 tbl = tab_create (n_cols, n_rows, 0);
1157 tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0);
1159 tab_dim (tbl, tab_natural_dimensions);
1161 /* Outline the box and have no internal lines*/
1166 n_cols - 1, n_rows - 1);
1168 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1170 tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1);
1171 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1);
1172 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1174 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
1175 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
1177 tab_title (tbl, 0, _("Descriptives"));
1180 for ( i = 0 ; i < n_dep_var ; ++i )
1182 const int row = heading_rows + i * n_stat_rows * n_factors ;
1185 tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
1188 i * n_stat_rows * n_factors + heading_rows,
1189 TAB_LEFT | TAT_TITLE,
1190 var_to_string(dependent_var[i])
1196 struct factor_statistics **fs = fctr->fs;
1199 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1200 var_to_string(fctr->indep_var[0]));
1203 if ( fctr->indep_var[1])
1204 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1205 var_to_string(fctr->indep_var[1]));
1210 static union value prev ;
1212 const int row = heading_rows + n_stat_rows *
1213 ( ( i * n_factors ) + count );
1216 if ( 0 != compare_values(&prev, &(*fs)->id[0],
1217 fctr->indep_var[0]->width))
1221 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
1225 TAB_LEFT | TAT_TITLE,
1226 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
1230 prev = (*fs)->id[0];
1232 if (fctr->indep_var[1] && count > 0 )
1233 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
1235 if ( fctr->indep_var[1])
1236 tab_text (tbl, 2, row,
1237 TAB_LEFT | TAT_TITLE,
1238 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1241 populate_descriptives(tbl, heading_columns - 2,
1253 populate_descriptives(tbl, heading_columns - 2,
1254 i * n_stat_rows * n_factors + heading_rows,
1268 /* Fill in the descriptives data */
1270 populate_descriptives(struct tab_table *tbl, int col, int row,
1271 const struct metrics *m)
1274 const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
1280 TAB_LEFT | TAT_TITLE,
1283 tab_float (tbl, col + 2,
1289 tab_float (tbl, col + 3,
1298 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1299 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
1302 tab_text (tbl, col + 1,
1304 TAB_LEFT | TAT_TITLE,
1307 tab_float (tbl, col + 2,
1310 m->mean - t * m->stderr,
1313 tab_text (tbl, col + 1,
1315 TAB_LEFT | TAT_TITLE,
1319 tab_float (tbl, col + 2,
1322 m->mean + t * m->stderr,
1327 TAB_LEFT | TAT_TITLE,
1328 _("5% Trimmed Mean"));
1330 tab_float (tbl, col + 2,
1338 TAB_LEFT | TAT_TITLE,
1343 TAB_LEFT | TAT_TITLE,
1346 tab_float (tbl, col + 2,
1355 TAB_LEFT | TAT_TITLE,
1356 _("Std. Deviation"));
1359 tab_float (tbl, col + 2,
1368 TAB_LEFT | TAT_TITLE,
1371 tab_float (tbl, col + 2,
1379 TAB_LEFT | TAT_TITLE,
1382 tab_float (tbl, col + 2,
1391 TAB_LEFT | TAT_TITLE,
1395 tab_float (tbl, col + 2,
1403 TAB_LEFT | TAT_TITLE,
1404 _("Interquartile Range"));
1408 TAB_LEFT | TAT_TITLE,
1413 TAB_LEFT | TAT_TITLE,
1421 /* Plot the normal and detrended normal plots for m
1422 Label the plots with factorname */
1424 np_plot(const struct metrics *m, const char *factorname)
1427 double yfirst=0, ylast=0;
1430 struct chart np_chart;
1432 /* Detrended Normal Plot */
1433 struct chart dnp_chart;
1435 const struct weighted_value *wv = *(m->wvp);
1438 /* The slope and intercept of the ideal normal probability line */
1439 const double slope = 1.0 / m->stddev;
1440 const double intercept = - m->mean / m->stddev;
1442 /* Cowardly refuse to plot an empty data set */
1443 if ( m->n_data == 0 )
1446 chart_initialise(&np_chart);
1447 chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
1448 chart_write_xlabel(&np_chart, _("Observed Value"));
1449 chart_write_ylabel(&np_chart, _("Expected Normal"));
1451 chart_initialise(&dnp_chart);
1452 chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
1454 chart_write_xlabel(&dnp_chart, _("Observed Value"));
1455 chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
1457 yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1));
1458 ylast = gsl_cdf_ugaussian_Pinv (wv[m->n_data-1].rank / ( m->n + 1));
1461 /* Need to make sure that both the scatter plot and the ideal fit into the
1463 double x_lower = min(m->min, (yfirst - intercept) / slope) ;
1464 double x_upper = max(m->max, (ylast - intercept) / slope) ;
1465 double slack = (x_upper - x_lower) * 0.05 ;
1467 chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack,
1468 chart_rounded_tick((m->max - m->min) / 5.0));
1471 chart_write_xscale(&dnp_chart, m->min, m->max,
1472 chart_rounded_tick((m->max - m->min) / 5.0));
1476 chart_write_yscale(&np_chart, yfirst, ylast,
1477 chart_rounded_tick((ylast - yfirst)/5.0) );
1480 /* We have to cache the detrended data, beacause we need to
1481 find its limits before we can plot it */
1483 d_data = xmalloc (m->n_data * sizeof(double));
1484 double d_max = -DBL_MAX;
1485 double d_min = DBL_MAX;
1486 for ( i = 0 ; i < m->n_data; ++i )
1488 const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1));
1490 chart_datum(&np_chart, 0, wv[i].v.f, ns);
1492 d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns;
1494 if ( d_data[i] < d_min ) d_min = d_data[i];
1495 if ( d_data[i] > d_max ) d_max = d_data[i];
1498 chart_write_yscale(&dnp_chart, d_min, d_max,
1499 chart_rounded_tick((d_max - d_min) / 5.0));
1501 for ( i = 0 ; i < m->n_data; ++i )
1502 chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]);
1507 chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
1508 chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
1510 chart_finalise(&np_chart);
1511 chart_finalise(&dnp_chart);