1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #include "value-labels.h"
47 +missing=miss:pairwise/!listwise,
49 incl:include/!exclude;
50 +compare=cmp:variables/!groups;
52 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
60 static struct cmd_examine cmd;
62 static struct variable **dependent_vars;
64 static int n_dependent_vars;
66 static struct hsh_table *hash_table_factors;
71 /* The independent variable for this factor */
72 struct variable *indep_var;
74 /* The list of values of the independent variable */
75 struct hsh_table *hash_table_val;
77 /* The subfactor (if any) */
78 struct factor *subfactor;
83 /* Parse the clause specifying the factors */
84 static int examine_parse_independent_vars(struct cmd_examine *cmd,
85 struct hsh_table *hash_factors );
90 /* Functions to support hashes of factors */
91 int compare_factors(const struct factor *f1, const struct factor *f2,
94 unsigned hash_factor(const struct factor *f, void *aux);
96 void free_factor(struct factor *f, void *aux UNUSED);
99 /* Output functions */
100 static void show_summary(struct variable **dependent_var, int n_dep_var,
103 static void show_descriptives(struct variable **dependent_var,
105 struct factor *factor);
108 static void show_extremes(struct variable **dependent_var,
110 struct factor *factor,
114 /* Per Split function */
115 static void run_examine(const struct casefile *cf, void *cmd_);
117 static void output_examine(void);
125 if ( !parse_examine(&cmd) )
128 if ( cmd.st_n == SYSMIS )
131 if ( ! cmd.sbc_cinterval)
132 cmd.n_cinterval[0] = 95.0;
136 multipass_procedure_with_splits (run_examine, &cmd);
139 hsh_destroy(hash_table_factors);
146 /* Show all the appropriate tables */
151 /* Show totals if appropriate */
152 if ( ! cmd.sbc_nototal ||
153 ! hash_table_factors || 0 == hsh_count (hash_table_factors))
155 show_summary(dependent_vars, n_dependent_vars,0);
157 if ( cmd.sbc_statistics )
159 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
160 show_descriptives(dependent_vars, n_dependent_vars, 0);
162 if ( cmd.a_statistics[XMN_ST_EXTREME])
163 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
167 /* Show grouped statistics if appropriate */
168 if ( hash_table_factors && 0 != hsh_count (hash_table_factors))
170 struct hsh_iterator hi;
173 for(f = hsh_first(hash_table_factors,&hi);
175 f = hsh_next(hash_table_factors,&hi))
177 show_summary(dependent_vars, n_dependent_vars,f);
179 if ( cmd.sbc_statistics )
181 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
182 show_descriptives(dependent_vars, n_dependent_vars, f);
184 if ( cmd.a_statistics[XMN_ST_EXTREME])
185 show_extremes(dependent_vars, n_dependent_vars, f, cmd.st_n);
195 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
197 xmn_custom_total(struct cmd_examine *p)
199 if ( p->sbc_nototal )
201 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
209 xmn_custom_nototal(struct cmd_examine *p)
213 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
221 /* Compare two factors */
223 compare_factors (const struct factor *f1,
224 const struct factor *f2,
227 int indep_var_cmp = strcmp(f1->indep_var->name, f2->indep_var->name);
229 if ( 0 != indep_var_cmp )
230 return indep_var_cmp;
232 /* If the names are identical, and there are no subfactors then
233 the factors are identical */
234 if ( ! f1->subfactor && ! f2->subfactor )
237 /* ... otherwise we must compare the subfactors */
239 return compare_factors(f1->subfactor, f2->subfactor, aux);
243 /* Create a hash of a factor */
245 hash_factor( const struct factor *f, void *aux)
248 h = hsh_hash_string(f->indep_var->name);
251 h += hash_factor(f->subfactor, aux);
257 /* Free up a factor */
259 free_factor(struct factor *f, void *aux)
261 hsh_destroy(f->hash_table_val);
264 free_factor(f->subfactor, aux);
270 /* Parser for the variables sub command */
272 xmn_custom_variables(struct cmd_examine *cmd )
277 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
281 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
282 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
284 free (dependent_vars);
288 assert(n_dependent_vars);
290 if ( lex_match(T_BY))
292 hash_table_factors = hsh_create(4,
293 (hsh_compare_func *) compare_factors,
294 (hsh_hash_func *) hash_factor,
295 (hsh_free_func *) free_factor, 0);
297 return examine_parse_independent_vars(cmd, hash_table_factors);
306 /* Parse the clause specifying the factors */
308 examine_parse_independent_vars(struct cmd_examine *cmd,
309 struct hsh_table *hash_table_factors )
311 struct factor *f = 0;
313 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
319 f = xmalloc(sizeof(struct factor));
321 f->hash_table_val = 0;
325 f->indep_var = parse_variable();
327 if ( ! f->hash_table_val )
328 f->hash_table_val = hsh_create(4,(hsh_compare_func *)compare_values,
329 (hsh_hash_func *)hash_value,
330 0,(void *) f->indep_var->width);
336 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
340 f->subfactor = xmalloc(sizeof(struct factor));
342 f->subfactor->indep_var = parse_variable();
344 f->subfactor->subfactor = 0;
346 f->subfactor->hash_table_val =
348 (hsh_compare_func *) compare_values,
349 (hsh_hash_func *) hash_value,
351 (void *) f->subfactor->indep_var->width);
354 hsh_insert(hash_table_factors, f);
358 if ( token == '.' || token == '/' )
361 return examine_parse_independent_vars(cmd, hash_table_factors);
365 void populate_descriptives(struct tab_table *t, int col, int row);
368 void populate_extremities(struct tab_table *t, int col, int row, int n);
371 /* Show the descriptives table */
373 show_descriptives(struct variable **dependent_var,
375 struct factor *factor)
378 int heading_columns ;
380 const int n_stat_rows = 13;
382 const int heading_rows = 1;
383 int n_rows = heading_rows ;
391 n_rows += n_dep_var * n_stat_rows;
395 assert(factor->indep_var);
396 if ( factor->subfactor == 0 )
399 n_rows += n_dep_var * hsh_count(factor->hash_table_val) * n_stat_rows;
404 n_rows += n_dep_var * hsh_count(factor->hash_table_val) *
405 hsh_count(factor->subfactor->hash_table_val) * n_stat_rows ;
409 n_cols = heading_columns + 4;
411 t = tab_create (n_cols, n_rows, 0);
413 tab_headers (t, heading_columns + 1, 0, heading_rows, 0);
415 tab_dim (t, tab_natural_dimensions);
417 /* Outline the box and have no internal lines*/
422 n_cols - 1, n_rows - 1);
424 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
426 tab_vline (t, TAL_1, heading_columns, 0, n_rows - 1);
427 tab_vline (t, TAL_2, n_cols - 2, 0, n_rows - 1);
428 tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1);
430 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
431 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
434 for ( i = 0 ; i < n_dep_var ; ++i )
437 int n_subfactors = 1;
442 n_factors = hsh_count(factor->hash_table_val);
443 if ( factor->subfactor )
444 n_subfactors = hsh_count(factor->subfactor->hash_table_val);
448 row = heading_rows + i * n_stat_rows * n_factors * n_subfactors;
451 tab_hline(t, TAL_1, 0, n_cols - 1, row );
457 struct hsh_iterator hi;
461 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
462 var_to_string(factor->indep_var));
466 for ( v = hsh_first(factor->hash_table_val, &hi);
468 v = hsh_next(factor->hash_table_val, &hi))
470 struct hsh_iterator h2;
474 row + count * n_subfactors * n_stat_rows,
475 TAB_RIGHT | TAT_TITLE,
476 value_to_string(v, factor->indep_var)
480 tab_hline (t, TAL_1, 1, n_cols - 1,
481 row + count * n_subfactors * n_stat_rows);
483 if ( factor->subfactor )
487 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
488 var_to_string(factor->subfactor->indep_var));
490 for ( vv = hsh_first(factor->subfactor->hash_table_val, &h2);
492 vv = hsh_next(factor->subfactor->hash_table_val, &h2))
497 + count * n_subfactors * n_stat_rows
498 + count2 * n_stat_rows,
499 TAB_RIGHT | TAT_TITLE ,
500 value_to_string(vv, factor->subfactor->indep_var)
504 tab_hline (t, TAL_1, 2, n_cols - 1,
506 + count * n_subfactors * n_stat_rows
507 + count2 * n_stat_rows);
509 populate_descriptives(t, heading_columns,
511 + count * n_subfactors
513 + count2 * n_stat_rows);
521 populate_descriptives(t, heading_columns,
523 + count * n_subfactors * n_stat_rows);
531 populate_descriptives(t, heading_columns,
537 TAB_LEFT | TAT_TITLE,
538 var_to_string(dependent_var[i])
543 tab_title (t, 0, _("Descriptives"));
550 /* Fill in the descriptives data */
552 populate_descriptives(struct tab_table *t, int col, int row)
557 TAB_LEFT | TAT_TITLE,
563 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
564 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
566 tab_text (t, col + 1,
568 TAB_LEFT | TAT_TITLE,
571 tab_text (t, col + 1,
573 TAB_LEFT | TAT_TITLE,
579 TAB_LEFT | TAT_TITLE,
580 _("5% Trimmed Mean"));
584 TAB_LEFT | TAT_TITLE,
589 TAB_LEFT | TAT_TITLE,
594 TAB_LEFT | TAT_TITLE,
595 _("Std. Deviation"));
599 TAB_LEFT | TAT_TITLE,
604 TAB_LEFT | TAT_TITLE,
609 TAB_LEFT | TAT_TITLE,
614 TAB_LEFT | TAT_TITLE,
615 _("Interquartile Range"));
619 TAB_LEFT | TAT_TITLE,
624 TAB_LEFT | TAT_TITLE,
630 show_summary(struct variable **dependent_var,
632 struct factor *factor)
634 static const char *subtitle[]=
642 int heading_columns ;
644 const int heading_rows = 3;
647 int n_rows = heading_rows;
656 assert(factor->indep_var);
657 if ( factor->subfactor == 0 )
660 n_rows += n_dep_var * hsh_count(factor->hash_table_val);
665 n_rows += n_dep_var * hsh_count(factor->hash_table_val) *
666 hsh_count(factor->subfactor->hash_table_val) ;
671 n_cols = heading_columns + 6;
673 t = tab_create (n_cols,n_rows,0);
674 tab_headers (t, heading_columns, 0, heading_rows, 0);
676 tab_dim (t, tab_natural_dimensions);
678 /* Outline the box and have vertical internal lines*/
683 n_cols - 1, n_rows - 1);
685 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
686 tab_hline (t, TAL_1, heading_columns, n_cols - 1, 1 );
687 tab_hline (t, TAL_1, 0, n_cols - 1, heading_rows -1 );
689 tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
692 tab_title (t, 0, _("Case Processing Summary"));
695 tab_joint_text(t, heading_columns, 0,
697 TAB_CENTER | TAT_TITLE,
700 /* Remove lines ... */
709 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
710 var_to_string(factor->indep_var));
712 if ( factor->subfactor )
713 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
714 var_to_string(factor->subfactor->indep_var));
717 for ( i = 0 ; i < 3 ; ++i )
719 tab_text (t, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
720 tab_text (t, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
723 tab_joint_text(t, heading_columns + i*2 , 1,
724 heading_columns + i*2 + 1, 1,
725 TAB_CENTER | TAT_TITLE,
730 heading_columns + i*2, 1,
731 heading_columns + i*2 + 1, 1);
736 for ( i = 0 ; i < n_dep_var ; ++i )
738 int n_subfactors = 1;
743 n_factors = hsh_count(factor->hash_table_val);
744 if ( factor->subfactor )
745 n_subfactors = hsh_count(factor->subfactor->hash_table_val);
749 0, i * n_factors * n_subfactors + heading_rows,
750 TAB_LEFT | TAT_TITLE,
751 var_to_string(dependent_var[i])
756 struct hsh_iterator hi;
760 for ( v = hsh_first(factor->hash_table_val, &hi);
762 v = hsh_next(factor->hash_table_val, &hi))
764 struct hsh_iterator h2;
768 i * n_factors * n_subfactors + heading_rows
769 + count * n_subfactors,
770 TAB_RIGHT | TAT_TITLE,
771 value_to_string(v, factor->indep_var)
774 if ( factor->subfactor )
777 for ( vv = hsh_first(factor->subfactor->hash_table_val, &h2);
779 vv = hsh_next(factor->subfactor->hash_table_val, &h2))
783 i * n_factors * n_subfactors + heading_rows
784 + count * n_subfactors + count2,
785 TAB_RIGHT | TAT_TITLE ,
786 value_to_string(vv, factor->subfactor->indep_var)
804 static int bad_weight_warn = 1;
807 run_examine(const struct casefile *cf, void *cmd_)
809 struct hsh_iterator hi;
812 struct casereader *r;
815 const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
817 /* Make sure we haven't got rubbish left over from a
819 if ( hash_table_factors )
821 for ( fctr = hsh_first(hash_table_factors, &hi);
823 fctr = hsh_next (hash_table_factors, &hi) )
825 hsh_clear(fctr->hash_table_val);
827 while ( (fctr = fctr->subfactor) )
828 hsh_clear(fctr->hash_table_val);
833 for(r = casefile_get_reader (cf);
834 casereader_read (r, &c) ;
839 const double weight =
840 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
842 if ( hash_table_factors )
844 for ( fctr = hsh_first(hash_table_factors, &hi);
846 fctr = hsh_next (hash_table_factors, &hi) )
848 const union value *val = case_data (&c, fctr->indep_var->fv);
849 hsh_insert(fctr->hash_table_val, (void *) val);
851 if ( fctr->subfactor )
853 val = case_data (&c, fctr->subfactor->indep_var->fv);
854 hsh_insert(fctr->subfactor->hash_table_val, (void *) val);
866 show_extremes(struct variable **dependent_var,
868 struct factor *factor,
872 int heading_columns ;
874 const int heading_rows = 1;
877 int n_rows = heading_rows;
881 heading_columns = 1 + 1;
882 n_rows += n_dep_var * 2 * n_extremities;
886 assert(factor->indep_var);
887 if ( factor->subfactor == 0 )
889 heading_columns = 2 + 1;
890 n_rows += n_dep_var * 2 * n_extremities
891 * hsh_count(factor->hash_table_val);
895 heading_columns = 3 + 1;
896 n_rows += n_dep_var * 2 * n_extremities
897 * hsh_count(factor->hash_table_val)
898 * hsh_count(factor->subfactor->hash_table_val) ;
903 n_cols = heading_columns + 3;
905 t = tab_create (n_cols,n_rows,0);
906 tab_headers (t, heading_columns, 0, heading_rows, 0);
908 tab_dim (t, tab_natural_dimensions);
910 /* Outline the box and have vertical internal lines*/
915 n_cols - 1, n_rows - 1);
919 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
921 tab_title (t, 0, _("Extreme Values"));
926 /* Remove lines ... */
935 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
936 var_to_string(factor->indep_var));
938 if ( factor->subfactor )
939 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
940 var_to_string(factor->subfactor->indep_var));
943 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
944 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
947 for ( i = 0 ; i < n_dep_var ; ++i )
949 int n_subfactors = 1;
954 n_factors = hsh_count(factor->hash_table_val);
955 if ( factor->subfactor )
956 n_subfactors = hsh_count(factor->subfactor->hash_table_val);
960 0, i * 2 * n_extremities * n_factors *
961 n_subfactors + heading_rows,
962 TAB_LEFT | TAT_TITLE,
963 var_to_string(dependent_var[i])
969 TAL_1, 0, n_cols - 1,
970 heading_rows + 2 * n_extremities *
971 (i * n_factors * n_subfactors )
976 struct hsh_iterator hi;
980 for ( v = hsh_first(factor->hash_table_val, &hi);
982 v = hsh_next(factor->hash_table_val, &hi))
984 struct hsh_iterator h2;
987 tab_text (t, 1, heading_rows + 2 * n_extremities *
988 (i * n_factors * n_subfactors
989 + count * n_subfactors),
990 TAB_RIGHT | TAT_TITLE,
991 value_to_string(v, factor->indep_var)
995 tab_hline (t, TAL_1, 1, n_cols - 1,
996 heading_rows + 2 * n_extremities *
997 (i * n_factors * n_subfactors
998 + count * n_subfactors));
1001 if ( factor->subfactor )
1004 for ( vv = hsh_first(factor->subfactor->hash_table_val, &h2);
1006 vv = hsh_next(factor->subfactor->hash_table_val, &h2))
1009 tab_text(t, 2, heading_rows + 2 * n_extremities *
1010 (i * n_factors * n_subfactors
1011 + count * n_subfactors + count2 ),
1012 TAB_RIGHT | TAT_TITLE ,
1013 value_to_string(vv, factor->subfactor->indep_var)
1018 tab_hline (t, TAL_1, 2, n_cols - 1,
1019 heading_rows + 2 * n_extremities *
1020 (i * n_factors * n_subfactors
1021 + count * n_subfactors + count2 ));
1023 populate_extremities(t,3,
1024 heading_rows + 2 * n_extremities *
1025 (i * n_factors * n_subfactors
1026 + count * n_subfactors + count2),
1034 populate_extremities(t,2,
1035 heading_rows + 2 * n_extremities *
1036 (i * n_factors * n_subfactors
1037 + count * n_subfactors),
1046 populate_extremities(t, 1,
1047 heading_rows + 2 * n_extremities *
1048 (i * n_factors * n_subfactors ),
1061 /* Fill in the extremities table */
1063 populate_extremities(struct tab_table *t, int col, int row, int n)
1067 tab_text(t, col, row,
1068 TAB_RIGHT | TAT_TITLE ,
1073 tab_text(t, col, row + n ,
1074 TAB_RIGHT | TAT_TITLE ,
1079 for (i = 0; i < n ; ++i )
1081 tab_float(t, col + 1, row + i,
1082 TAB_RIGHT | TAT_TITLE,
1085 tab_float(t, col + 1, row + i + n,
1086 TAB_RIGHT | TAT_TITLE,