1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 2004 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 #include <gsl/gsl_cdf.h>
30 #include "dictionary.h"
38 #include "value-labels.h"
43 #include "factor_stats.h"
45 #include "percentiles.h"
55 +missing=miss:pairwise/!listwise,
57 incl:include/!exclude;
58 +compare=cmp:variables/!groups;
60 +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
62 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
71 static struct cmd_examine cmd;
73 static struct variable **dependent_vars;
75 static int n_dependent_vars;
80 /* The independent variable */
81 struct variable *indep_var[2];
84 /* Hash table of factor stats indexed by 2 values */
85 struct hsh_table *fstats;
87 /* The hash table after it has been crunched */
88 struct factor_statistics **fs;
94 /* Linked list of factors */
95 static struct factor *factors=0;
97 static struct metrics *totals=0;
99 /* Parse the clause specifying the factors */
100 static int examine_parse_independent_vars(struct cmd_examine *cmd);
104 /* Output functions */
105 static void show_summary(struct variable **dependent_var, int n_dep_var,
106 const struct factor *f);
108 static void show_extremes(struct variable **dependent_var,
110 const struct factor *factor,
113 static void show_descriptives(struct variable **dependent_var,
115 struct factor *factor);
117 static void show_percentiles(struct variable **dependent_var,
119 struct factor *factor);
123 void np_plot(const struct metrics *m, const char *factorname);
127 /* Per Split function */
128 static void run_examine(const struct casefile *cf, void *cmd_);
130 static void output_examine(void);
133 void factor_calc(struct ccase *c, int case_no,
134 double weight, int case_missing);
137 /* Function to use for testing for missing values */
138 static is_missing_func value_is_missing;
143 static subc_list_double percentile_list;
145 static enum pc_alg percentile_algorithm;
147 static short sbc_percentile;
154 subc_list_double_create(&percentile_list);
155 percentile_algorithm = PC_HAVERAGE;
157 if ( !parse_examine(&cmd) )
160 /* If /MISSING=INCLUDE is set, then user missing values are ignored */
161 if (cmd.incl == XMN_INCLUDE )
162 value_is_missing = is_system_missing;
164 value_is_missing = is_missing;
166 if ( cmd.st_n == SYSMIS )
169 if ( ! cmd.sbc_cinterval)
170 cmd.n_cinterval[0] = 95.0;
173 /* If descriptives have been requested, make sure the
174 quartiles are calculated */
175 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
177 subc_list_double_push(&percentile_list, 25);
178 subc_list_double_push(&percentile_list, 50);
179 subc_list_double_push(&percentile_list, 75);
182 multipass_procedure_with_splits (run_examine, &cmd);
187 subc_list_double_destroy(&percentile_list);
194 /* Show all the appropriate tables */
200 /* Show totals if appropriate */
201 if ( ! cmd.sbc_nototal || factors == 0 )
203 show_summary(dependent_vars, n_dependent_vars, 0);
205 if ( cmd.sbc_statistics )
207 if ( cmd.a_statistics[XMN_ST_EXTREME])
208 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
210 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
211 show_descriptives(dependent_vars, n_dependent_vars, 0);
214 if ( sbc_percentile )
215 show_percentiles(dependent_vars, n_dependent_vars, 0);
220 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
222 for ( v = 0 ; v < n_dependent_vars; ++v )
223 np_plot(&totals[v], var_to_string(dependent_vars[v]));
226 if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
228 for ( v = 0 ; v < n_dependent_vars; ++v )
230 struct normal_curve normal;
232 normal.N = totals[v].n;
233 normal.mean = totals[v].mean;
234 normal.stddev = totals[v].stddev;
236 histogram_plot(totals[v].histogram,
237 var_to_string(dependent_vars[v]),
247 /* Show grouped statistics as appropriate */
251 show_summary(dependent_vars, n_dependent_vars, fctr);
253 if ( cmd.sbc_statistics )
255 if ( cmd.a_statistics[XMN_ST_EXTREME])
256 show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n);
258 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
259 show_descriptives(dependent_vars, n_dependent_vars, fctr);
262 if ( sbc_percentile )
263 show_percentiles(dependent_vars, n_dependent_vars, fctr);
270 struct factor_statistics **fs = fctr->fs ;
272 for ( v = 0 ; v < n_dependent_vars; ++v )
275 for ( fs = fctr->fs ; *fs ; ++fs )
279 sprintf(buf1, "%s (",
280 var_to_string(dependent_vars[v]));
282 snprintf(buf2, 100, "%s = %s",
283 var_to_string(fctr->indep_var[0]),
284 value_to_string(&(*fs)->id[0],fctr->indep_var[0]));
288 if ( fctr->indep_var[1] )
290 sprintf(buf2, "; %s = %s)",
291 var_to_string(fctr->indep_var[1]),
292 value_to_string(&(*fs)->id[1],
293 fctr->indep_var[1]));
301 if ( cmd.a_plot[XMN_PLT_NPPLOT] )
302 np_plot(&(*fs)->m[v],buf1);
305 if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
307 struct normal_curve normal;
309 normal.N = (*fs)->m[v].n;
310 normal.mean = (*fs)->m[v].mean;
311 normal.stddev = (*fs)->m[v].stddev;
313 histogram_plot((*fs)->m[v].histogram,
317 } /* for ( fs .... */
319 } /* for ( v = 0 ..... */
329 static struct hsh_table *
330 list_to_ptile_hash(const subc_list_double *l)
334 struct hsh_table *h ;
336 h = hsh_create(subc_list_double_count(l),
337 (hsh_compare_func *) ptile_compare,
338 (hsh_hash_func *) ptile_hash,
339 (hsh_free_func *) free,
343 for ( i = 0 ; i < subc_list_double_count(l) ; ++i )
345 struct percentile *p = xmalloc (sizeof (struct percentile));
347 p->p = subc_list_double_at(l,i);
357 /* Parse the PERCENTILES subcommand */
359 xmn_custom_percentiles(struct cmd_examine *p UNUSED)
367 while ( lex_double_p() )
369 subc_list_double_push(&percentile_list,lex_double());
379 if ( lex_match_id("HAVERAGE"))
380 percentile_algorithm = PC_HAVERAGE;
382 else if ( lex_match_id("WAVERAGE"))
383 percentile_algorithm = PC_WAVERAGE;
385 else if ( lex_match_id("ROUND"))
386 percentile_algorithm = PC_ROUND;
388 else if ( lex_match_id("EMPIRICAL"))
389 percentile_algorithm = PC_EMPIRICAL;
391 else if ( lex_match_id("AEMPIRICAL"))
392 percentile_algorithm = PC_AEMPIRICAL;
394 else if ( lex_match_id("NONE"))
395 percentile_algorithm = PC_NONE;
398 if ( 0 == subc_list_double_count(&percentile_list))
400 subc_list_double_push(&percentile_list, 5);
401 subc_list_double_push(&percentile_list, 10);
402 subc_list_double_push(&percentile_list, 25);
403 subc_list_double_push(&percentile_list, 50);
404 subc_list_double_push(&percentile_list, 75);
405 subc_list_double_push(&percentile_list, 90);
406 subc_list_double_push(&percentile_list, 95);
412 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
414 xmn_custom_total(struct cmd_examine *p)
416 if ( p->sbc_nototal )
418 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
426 xmn_custom_nototal(struct cmd_examine *p)
430 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
439 /* Parser for the variables sub command */
441 xmn_custom_variables(struct cmd_examine *cmd )
446 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
450 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
451 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
453 free (dependent_vars);
457 assert(n_dependent_vars);
459 totals = xmalloc( sizeof(struct metrics) * n_dependent_vars);
461 if ( lex_match(T_BY))
463 return examine_parse_independent_vars(cmd);
471 /* Parse the clause specifying the factors */
473 examine_parse_independent_vars(struct cmd_examine *cmd)
476 struct factor *sf = xmalloc(sizeof(struct factor));
478 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
483 sf->indep_var[0] = parse_variable();
484 sf->indep_var[1] = 0;
491 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
495 sf->indep_var[1] = parse_variable();
500 sf->fstats = hsh_create(4,
501 (hsh_compare_func *) factor_statistics_compare,
502 (hsh_hash_func *) factor_statistics_hash,
503 (hsh_free_func *) factor_statistics_free,
511 if ( token == '.' || token == '/' )
514 return examine_parse_independent_vars(cmd);
520 void populate_percentiles(struct tab_table *tbl, int col, int row,
521 const struct metrics *m);
523 void populate_descriptives(struct tab_table *t, int col, int row,
524 const struct metrics *fs);
526 void populate_extremes(struct tab_table *t, int col, int row, int n,
527 const struct metrics *m);
529 void populate_summary(struct tab_table *t, int col, int row,
530 const struct metrics *m);
535 static int bad_weight_warn = 1;
538 /* Perform calculations for the sub factors */
540 factor_calc(struct ccase *c, int case_no, double weight, int case_missing)
543 struct factor *fctr = factors;
547 union value indep_vals[2] ;
549 indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv);
551 if ( fctr->indep_var[1] )
552 indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv);
554 indep_vals[1].f = SYSMIS;
556 assert(fctr->fstats);
558 struct factor_statistics **foo = ( struct factor_statistics ** )
559 hsh_probe(fctr->fstats, (void *) &indep_vals);
564 *foo = create_factor_statistics(n_dependent_vars,
568 for ( v = 0 ; v < n_dependent_vars ; ++v )
570 metrics_precalc( &(*foo)->m[v] );
575 for ( v = 0 ; v < n_dependent_vars ; ++v )
577 const struct variable *var = dependent_vars[v];
578 const union value *val = case_data (c, var->fv);
580 if ( value_is_missing(val,var) || case_missing )
583 metrics_calc( &(*foo)->m[v], val, weight, case_no );
596 run_examine(const struct casefile *cf, void *cmd_ )
598 struct casereader *r;
602 const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
604 /* Make sure we haven't got rubbish left over from a
607 struct factor *fctr = factors;
610 struct factor *next = fctr->next;
612 hsh_clear(fctr->fstats);
621 for ( v = 0 ; v < n_dependent_vars ; ++v )
622 metrics_precalc(&totals[v]);
624 for(r = casefile_get_reader (cf);
625 casereader_read (r, &c) ;
629 const int case_no = casereader_cnum(r);
631 const double weight =
632 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
634 if ( cmd->miss == XMN_LISTWISE )
636 for ( v = 0 ; v < n_dependent_vars ; ++v )
638 const struct variable *var = dependent_vars[v];
639 const union value *val = case_data (&c, var->fv);
641 if ( value_is_missing(val,var))
647 for ( v = 0 ; v < n_dependent_vars ; ++v )
649 const struct variable *var = dependent_vars[v];
650 const union value *val = case_data (&c, var->fv);
652 if ( value_is_missing(val,var) || case_missing )
655 metrics_calc(&totals[v], val, weight, case_no );
659 factor_calc(&c, case_no, weight, case_missing);
664 for ( v = 0 ; v < n_dependent_vars ; ++v)
669 struct hsh_iterator hi;
670 struct factor_statistics *fs;
672 for ( fs = hsh_first(fctr->fstats, &hi);
674 fs = hsh_next(fctr->fstats, &hi))
677 fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list);
678 fs->m[v].ptile_alg = percentile_algorithm;
679 metrics_postcalc(&fs->m[v]);
685 totals[v].ptile_hash = list_to_ptile_hash(&percentile_list);
686 totals[v].ptile_alg = percentile_algorithm;
687 metrics_postcalc(&totals[v]);
691 /* Make sure that the combination of factors are complete */
696 struct hsh_iterator hi;
697 struct hsh_iterator hi0;
698 struct hsh_iterator hi1;
699 struct factor_statistics *fs;
701 struct hsh_table *idh0=0;
702 struct hsh_table *idh1=0;
706 idh0 = hsh_create(4, (hsh_compare_func *) compare_values,
707 (hsh_hash_func *) hash_value,
710 idh1 = hsh_create(4, (hsh_compare_func *) compare_values,
711 (hsh_hash_func *) hash_value,
715 for ( fs = hsh_first(fctr->fstats, &hi);
717 fs = hsh_next(fctr->fstats, &hi))
719 hsh_insert(idh0,(void *) &fs->id[0]);
720 hsh_insert(idh1,(void *) &fs->id[1]);
723 /* Ensure that the factors combination is complete */
724 for ( val0 = hsh_first(idh0, &hi0);
726 val0 = hsh_next(idh0, &hi0))
728 for ( val1 = hsh_first(idh1, &hi1);
730 val1 = hsh_next(idh1, &hi1))
732 struct factor_statistics **ffs;
737 ffs = (struct factor_statistics **)
738 hsh_probe(fctr->fstats, (void *) &key );
742 (*ffs) = create_factor_statistics (n_dependent_vars,
744 for ( i = 0 ; i < n_dependent_vars ; ++i )
745 metrics_precalc( &(*ffs)->m[i]);
753 fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats);
760 for ( v = 0 ; v < n_dependent_vars ; ++v )
761 hsh_destroy(totals[v].ordered_data);
767 show_summary(struct variable **dependent_var, int n_dep_var,
768 const struct factor *fctr)
770 static const char *subtitle[]=
778 int heading_columns ;
780 const int heading_rows = 3;
781 struct tab_table *tbl;
789 n_factors = hsh_count(fctr->fstats);
790 n_rows = n_dep_var * n_factors ;
792 if ( fctr->indep_var[1] )
801 n_rows += heading_rows;
803 n_cols = heading_columns + 6;
805 tbl = tab_create (n_cols,n_rows,0);
806 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
808 tab_dim (tbl, tab_natural_dimensions);
810 /* Outline the box */
815 n_cols - 1, n_rows - 1);
817 /* Vertical lines for the data only */
822 n_cols - 1, n_rows - 1);
825 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
826 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 );
827 tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 );
829 tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
832 tab_title (tbl, 0, _("Case Processing Summary"));
835 tab_joint_text(tbl, heading_columns, 0,
837 TAB_CENTER | TAT_TITLE,
840 /* Remove lines ... */
847 for ( i = 0 ; i < 3 ; ++i )
849 tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE,
852 tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
855 tab_joint_text(tbl, heading_columns + i*2 , 1,
856 heading_columns + i*2 + 1, 1,
857 TAB_CENTER | TAT_TITLE,
860 tab_box (tbl, -1, -1,
862 heading_columns + i*2, 1,
863 heading_columns + i*2 + 1, 1);
868 /* Titles for the independent variables */
871 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
872 var_to_string(fctr->indep_var[0]));
874 if ( fctr->indep_var[1] )
876 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
877 var_to_string(fctr->indep_var[1]));
883 for ( i = 0 ; i < n_dep_var ; ++i )
887 n_factors = hsh_count(fctr->fstats);
891 tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows);
894 0, i * n_factors + heading_rows,
895 TAB_LEFT | TAT_TITLE,
896 var_to_string(dependent_var[i])
901 populate_summary(tbl, heading_columns,
902 (i * n_factors) + heading_rows,
908 struct factor_statistics **fs = fctr->fs;
913 static union value prev;
915 if ( 0 != compare_values(&prev, &(*fs)->id[0],
916 fctr->indep_var[0]->width))
920 (i * n_factors ) + count +
922 TAB_LEFT | TAT_TITLE,
923 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
926 if (fctr->indep_var[1] && count > 0 )
927 tab_hline(tbl, TAL_1, 1, n_cols - 1,
928 (i * n_factors ) + count + heading_rows);
935 if ( fctr->indep_var[1])
938 (i * n_factors ) + count +
940 TAB_LEFT | TAT_TITLE,
941 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
944 populate_summary(tbl, heading_columns,
945 (i * n_factors) + count
960 populate_summary(struct tab_table *t, int col, int row,
961 const struct metrics *m)
964 const double total = m->n + m->n_missing ;
966 tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0);
967 tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0);
968 tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0);
972 tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
973 100.0 * m->n / total );
975 tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
976 100.0 * m->n_missing / total );
978 /* This seems a bit pointless !!! */
979 tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
980 100.0 * total / total );
991 show_extremes(struct variable **dependent_var, int n_dep_var,
992 const struct factor *fctr, int n_extremities)
995 int heading_columns ;
997 const int heading_rows = 1;
998 struct tab_table *tbl;
1005 heading_columns = 2;
1006 n_factors = hsh_count(fctr->fstats);
1008 n_rows = n_dep_var * 2 * n_extremities * n_factors;
1010 if ( fctr->indep_var[1] )
1011 heading_columns = 3;
1015 heading_columns = 1;
1016 n_rows = n_dep_var * 2 * n_extremities;
1019 n_rows += heading_rows;
1021 heading_columns += 2;
1022 n_cols = heading_columns + 2;
1024 tbl = tab_create (n_cols,n_rows,0);
1025 tab_headers (tbl, heading_columns, 0, heading_rows, 0);
1027 tab_dim (tbl, tab_natural_dimensions);
1029 /* Outline the box, No internal lines*/
1034 n_cols - 1, n_rows - 1);
1036 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1038 tab_title (tbl, 0, _("Extreme Values"));
1041 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1);
1042 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1);
1046 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1047 var_to_string(fctr->indep_var[0]));
1049 if ( fctr->indep_var[1] )
1050 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1051 var_to_string(fctr->indep_var[1]));
1054 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
1055 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
1060 for ( i = 0 ; i < n_dep_var ; ++i )
1064 tab_hline(tbl, TAL_1, 0, n_cols -1 ,
1065 i * 2 * n_extremities * n_factors + heading_rows);
1068 i * 2 * n_extremities * n_factors + heading_rows,
1069 TAB_LEFT | TAT_TITLE,
1070 var_to_string(dependent_var[i])
1075 populate_extremes(tbl, heading_columns - 2,
1076 i * 2 * n_extremities * n_factors + heading_rows,
1077 n_extremities, &totals[i]);
1081 struct factor_statistics **fs = fctr->fs;
1086 static union value prev ;
1088 const int row = heading_rows + ( 2 * n_extremities ) *
1089 ( ( i * n_factors ) + count );
1092 if ( 0 != compare_values(&prev, &(*fs)->id[0],
1093 fctr->indep_var[0]->width))
1097 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
1101 TAB_LEFT | TAT_TITLE,
1102 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
1106 prev = (*fs)->id[0];
1108 if (fctr->indep_var[1] && count > 0 )
1109 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
1111 if ( fctr->indep_var[1])
1112 tab_text (tbl, 2, row,
1113 TAB_LEFT | TAT_TITLE,
1114 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1117 populate_extremes(tbl, heading_columns - 2,
1132 /* Fill in the extremities table */
1134 populate_extremes(struct tab_table *t,
1135 int col, int row, int n, const struct metrics *m)
1141 tab_text(t, col, row,
1142 TAB_RIGHT | TAT_TITLE ,
1146 tab_text(t, col, row + n ,
1147 TAB_RIGHT | TAT_TITLE ,
1152 tab_hline(t, TAL_1, col, col + 3, row + n );
1154 for (extremity = 0; extremity < n ; ++extremity )
1157 tab_float(t, col + 1, row + extremity,
1159 extremity + 1, 8, 0);
1163 tab_float(t, col + 1, row + extremity + n,
1165 extremity + 1, 8, 0);
1171 for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx )
1174 const struct weighted_value *wv = m->wvp[idx];
1175 struct case_node *cn = wv->case_nos;
1178 for (j = 0 ; j < wv->w ; ++j )
1180 if ( extremity + j >= n )
1183 tab_float(t, col + 3, row + extremity + j + n,
1187 tab_float(t, col + 2, row + extremity + j + n,
1196 extremity += wv->w ;
1201 for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
1204 const struct weighted_value *wv = m->wvp[idx];
1205 struct case_node *cn = wv->case_nos;
1207 for (j = 0 ; j < wv->w ; ++j )
1209 if ( extremity + j >= n )
1212 tab_float(t, col + 3, row + extremity + j,
1216 tab_float(t, col + 2, row + extremity + j,
1225 extremity += wv->w ;
1230 /* Show the descriptives table */
1232 show_descriptives(struct variable **dependent_var,
1234 struct factor *fctr)
1237 int heading_columns ;
1239 const int n_stat_rows = 13;
1241 const int heading_rows = 1;
1243 struct tab_table *tbl;
1250 heading_columns = 4;
1251 n_factors = hsh_count(fctr->fstats);
1253 n_rows = n_dep_var * n_stat_rows * n_factors;
1255 if ( fctr->indep_var[1] )
1256 heading_columns = 5;
1260 heading_columns = 3;
1261 n_rows = n_dep_var * n_stat_rows;
1264 n_rows += heading_rows;
1266 n_cols = heading_columns + 2;
1269 tbl = tab_create (n_cols, n_rows, 0);
1271 tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0);
1273 tab_dim (tbl, tab_natural_dimensions);
1275 /* Outline the box and have no internal lines*/
1280 n_cols - 1, n_rows - 1);
1282 tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
1284 tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1);
1285 tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1);
1286 tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
1288 tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
1289 tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
1291 tab_title (tbl, 0, _("Descriptives"));
1294 for ( i = 0 ; i < n_dep_var ; ++i )
1296 const int row = heading_rows + i * n_stat_rows * n_factors ;
1299 tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
1302 i * n_stat_rows * n_factors + heading_rows,
1303 TAB_LEFT | TAT_TITLE,
1304 var_to_string(dependent_var[i])
1310 struct factor_statistics **fs = fctr->fs;
1313 tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1314 var_to_string(fctr->indep_var[0]));
1317 if ( fctr->indep_var[1])
1318 tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
1319 var_to_string(fctr->indep_var[1]));
1324 static union value prev ;
1326 const int row = heading_rows + n_stat_rows *
1327 ( ( i * n_factors ) + count );
1330 if ( 0 != compare_values(&prev, &(*fs)->id[0],
1331 fctr->indep_var[0]->width))
1335 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
1339 TAB_LEFT | TAT_TITLE,
1340 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
1344 prev = (*fs)->id[0];
1346 if (fctr->indep_var[1] && count > 0 )
1347 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
1349 if ( fctr->indep_var[1])
1350 tab_text (tbl, 2, row,
1351 TAB_LEFT | TAT_TITLE,
1352 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1355 populate_descriptives(tbl, heading_columns - 2,
1367 populate_descriptives(tbl, heading_columns - 2,
1368 i * n_stat_rows * n_factors + heading_rows,
1387 /* Fill in the descriptives data */
1389 populate_descriptives(struct tab_table *tbl, int col, int row,
1390 const struct metrics *m)
1393 const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
1399 TAB_LEFT | TAT_TITLE,
1402 tab_float (tbl, col + 2,
1408 tab_float (tbl, col + 3,
1417 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
1418 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
1421 tab_text (tbl, col + 1,
1423 TAB_LEFT | TAT_TITLE,
1426 tab_float (tbl, col + 2,
1429 m->mean - t * m->se_mean,
1432 tab_text (tbl, col + 1,
1434 TAB_LEFT | TAT_TITLE,
1438 tab_float (tbl, col + 2,
1441 m->mean + t * m->se_mean,
1446 TAB_LEFT | TAT_TITLE,
1447 _("5% Trimmed Mean"));
1449 tab_float (tbl, col + 2,
1457 TAB_LEFT | TAT_TITLE,
1461 struct percentile *p;
1464 p = hsh_find(m->ptile_hash, &d);
1468 tab_float (tbl, col + 2,
1477 TAB_LEFT | TAT_TITLE,
1480 tab_float (tbl, col + 2,
1489 TAB_LEFT | TAT_TITLE,
1490 _("Std. Deviation"));
1493 tab_float (tbl, col + 2,
1502 TAB_LEFT | TAT_TITLE,
1505 tab_float (tbl, col + 2,
1513 TAB_LEFT | TAT_TITLE,
1516 tab_float (tbl, col + 2,
1525 TAB_LEFT | TAT_TITLE,
1529 tab_float (tbl, col + 2,
1537 TAB_LEFT | TAT_TITLE,
1538 _("Interquartile Range"));
1541 struct percentile *p1;
1542 struct percentile *p2;
1545 p1 = hsh_find(m->ptile_hash, &d);
1548 p2 = hsh_find(m->ptile_hash, &d);
1553 tab_float (tbl, col + 2,
1564 TAB_LEFT | TAT_TITLE,
1568 tab_float (tbl, col + 2,
1574 /* stderr of skewness */
1575 tab_float (tbl, col + 3,
1584 TAB_LEFT | TAT_TITLE,
1588 tab_float (tbl, col + 2,
1594 /* stderr of kurtosis */
1595 tab_float (tbl, col + 3,
1605 /* Plot the normal and detrended normal plots for m
1606 Label the plots with factorname */
1608 np_plot(const struct metrics *m, const char *factorname)
1611 double yfirst=0, ylast=0;
1614 struct chart np_chart;
1616 /* Detrended Normal Plot */
1617 struct chart dnp_chart;
1619 /* The slope and intercept of the ideal normal probability line */
1620 const double slope = 1.0 / m->stddev;
1621 const double intercept = - m->mean / m->stddev;
1623 /* Cowardly refuse to plot an empty data set */
1624 if ( m->n_data == 0 )
1627 chart_initialise(&np_chart);
1628 chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname);
1629 chart_write_xlabel(&np_chart, _("Observed Value"));
1630 chart_write_ylabel(&np_chart, _("Expected Normal"));
1632 chart_initialise(&dnp_chart);
1633 chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
1635 chart_write_xlabel(&dnp_chart, _("Observed Value"));
1636 chart_write_ylabel(&dnp_chart, _("Dev from Normal"));
1638 yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1));
1639 ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1));
1643 /* Need to make sure that both the scatter plot and the ideal fit into the
1645 double x_lower = min(m->min, (yfirst - intercept) / slope) ;
1646 double x_upper = max(m->max, (ylast - intercept) / slope) ;
1647 double slack = (x_upper - x_lower) * 0.05 ;
1649 chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, 5);
1651 chart_write_xscale(&dnp_chart, m->min, m->max, 5);
1655 chart_write_yscale(&np_chart, yfirst, ylast, 5);
1658 /* We have to cache the detrended data, beacause we need to
1659 find its limits before we can plot it */
1661 d_data = xmalloc (m->n_data * sizeof(double));
1662 double d_max = -DBL_MAX;
1663 double d_min = DBL_MAX;
1664 for ( i = 0 ; i < m->n_data; ++i )
1666 const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1));
1668 chart_datum(&np_chart, 0, m->wvp[i]->v.f, ns);
1670 d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns;
1672 if ( d_data[i] < d_min ) d_min = d_data[i];
1673 if ( d_data[i] > d_max ) d_max = d_data[i];
1675 chart_write_yscale(&dnp_chart, d_min, d_max, 5);
1677 for ( i = 0 ; i < m->n_data; ++i )
1678 chart_datum(&dnp_chart, 0, m->wvp[i]->v.f, d_data[i]);
1683 chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
1684 chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
1686 chart_finalise(&np_chart);
1687 chart_finalise(&dnp_chart);
1694 /* Show the percentiles */
1696 show_percentiles(struct variable **dependent_var,
1698 struct factor *fctr)
1700 struct tab_table *tbl;
1706 struct hsh_table *ptiles ;
1708 int n_heading_columns;
1709 const int n_heading_rows = 2;
1710 const int n_stat_rows = 2;
1716 struct factor_statistics **fs = fctr->fs ;
1717 n_heading_columns = 3;
1718 n_factors = hsh_count(fctr->fstats);
1720 ptiles = (*fs)->m[0].ptile_hash;
1722 if ( fctr->indep_var[1] )
1723 n_heading_columns = 4;
1728 n_heading_columns = 2;
1730 ptiles = totals[0].ptile_hash;
1733 n_ptiles = hsh_count(ptiles);
1735 n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors;
1737 n_cols = n_heading_columns + n_ptiles ;
1739 tbl = tab_create (n_cols, n_rows, 0);
1741 tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0);
1743 tab_dim (tbl, tab_natural_dimensions);
1745 /* Outline the box and have no internal lines*/
1750 n_cols - 1, n_rows - 1);
1752 tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows );
1754 tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1);
1757 tab_title (tbl, 0, _("Percentiles"));
1760 tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 );
1767 n_heading_columns - 1, n_rows - 1);
1773 n_heading_columns, n_heading_rows - 1,
1774 n_cols - 1, n_rows - 1);
1776 tab_joint_text(tbl, n_heading_columns + 1, 0,
1778 TAB_CENTER | TAT_TITLE ,
1783 /* Put in the percentile break points as headings */
1785 struct percentile **p = (struct percentile **) hsh_sort(ptiles);
1790 tab_float(tbl, n_heading_columns + i++ , 1,
1799 for ( i = 0 ; i < n_dep_var ; ++i )
1801 const int n_stat_rows = 2;
1802 const int row = n_heading_rows + i * n_stat_rows * n_factors ;
1805 tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
1808 i * n_stat_rows * n_factors + n_heading_rows,
1809 TAB_LEFT | TAT_TITLE,
1810 var_to_string(dependent_var[i])
1815 struct factor_statistics **fs = fctr->fs;
1818 tab_text (tbl, 1, n_heading_rows - 1,
1819 TAB_CENTER | TAT_TITLE,
1820 var_to_string(fctr->indep_var[0]));
1823 if ( fctr->indep_var[1])
1824 tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE,
1825 var_to_string(fctr->indep_var[1]));
1830 static union value prev ;
1832 const int row = n_heading_rows + n_stat_rows *
1833 ( ( i * n_factors ) + count );
1836 if ( 0 != compare_values(&prev, &(*fs)->id[0],
1837 fctr->indep_var[0]->width))
1841 tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
1845 TAB_LEFT | TAT_TITLE,
1846 value_to_string(&(*fs)->id[0], fctr->indep_var[0])
1852 prev = (*fs)->id[0];
1854 if (fctr->indep_var[1] && count > 0 )
1855 tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
1857 if ( fctr->indep_var[1])
1858 tab_text (tbl, 2, row,
1859 TAB_LEFT | TAT_TITLE,
1860 value_to_string(&(*fs)->id[1], fctr->indep_var[1])
1864 populate_percentiles(tbl, n_heading_columns - 1,
1876 populate_percentiles(tbl, n_heading_columns - 1,
1877 i * n_stat_rows * n_factors + n_heading_rows,
1894 populate_percentiles(struct tab_table *tbl, int col, int row,
1895 const struct metrics *m)
1899 struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash);
1903 TAB_LEFT | TAT_TITLE,
1904 _("Tukey\'s Hinges")
1909 TAB_LEFT | TAT_TITLE,
1910 ptile_alg_desc[m->ptile_alg]
1917 tab_float(tbl, col + i + 1 , row,
1920 if ( (*p)->p == 25 )
1921 tab_float(tbl, col + i + 1 , row + 1,
1923 m->hinges[0], 8, 2);
1925 if ( (*p)->p == 50 )
1926 tab_float(tbl, col + i + 1 , row + 1,
1928 m->hinges[1], 8, 2);
1930 if ( (*p)->p == 75 )
1931 tab_float(tbl, col + i + 1 , row + 1,
1933 m->hinges[2], 8, 2);