1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #include "value-labels.h"
47 +missing=miss:pairwise/!listwise,
49 incl:include/!exclude;
50 +compare=cmp:variables/!groups;
52 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
60 static struct cmd_examine cmd;
62 static struct variable **dependent_vars;
64 static int n_dependent_vars;
66 static struct hsh_table *hash_table_factors;
71 /* The independent variable for this factor */
72 struct variable *indep_var;
74 /* The list of values of the independent variable */
75 struct hsh_table *hash_table_val;
77 /* The subfactor (if any) */
78 struct factor *subfactor;
83 /* Parse the clause specifying the factors */
84 static int examine_parse_independent_vars(struct cmd_examine *cmd,
85 struct hsh_table *hash_factors );
90 /* Functions to support hashes of factors */
91 int compare_factors(const struct factor *f1, const struct factor *f2,
94 unsigned hash_factor(const struct factor *f, void *aux);
96 void free_factor(struct factor *f, void *aux UNUSED);
99 /* Output functions */
100 static void show_summary(struct variable **dependent_var, int n_dep_var,
103 static void show_descriptives(struct variable **dependent_var,
105 struct factor *factor);
108 static void show_extremes(struct variable **dependent_var,
110 struct factor *factor,
114 /* Per Split function */
115 static void run_examine(const struct casefile *cf, void *cmd_);
117 static void output_examine(void);
125 if ( !parse_examine(&cmd) )
128 if ( cmd.st_n == SYSMIS )
131 if ( ! cmd.sbc_cinterval)
132 cmd.n_cinterval[0] = 95.0;
136 multipass_procedure_with_splits (run_examine, &cmd);
139 hsh_destroy(hash_table_factors);
146 /* Show all the appropriate tables */
151 /* Show totals if appropriate */
152 if ( ! cmd.sbc_nototal ||
153 ! hash_table_factors || 0 == hsh_count (hash_table_factors))
155 show_summary(dependent_vars, n_dependent_vars,0);
157 if ( cmd.sbc_statistics )
159 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
160 show_descriptives(dependent_vars, n_dependent_vars, 0);
162 if ( cmd.a_statistics[XMN_ST_EXTREME])
163 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
167 /* Show grouped statistics if appropriate */
168 if ( hash_table_factors && 0 != hsh_count (hash_table_factors))
170 struct hsh_iterator hi;
173 for(f = hsh_first(hash_table_factors,&hi);
175 f = hsh_next(hash_table_factors,&hi))
177 show_summary(dependent_vars, n_dependent_vars,f);
179 if ( cmd.sbc_statistics )
181 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
182 show_descriptives(dependent_vars, n_dependent_vars, f);
184 if ( cmd.a_statistics[XMN_ST_EXTREME])
185 show_extremes(dependent_vars, n_dependent_vars, f, cmd.st_n);
195 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
197 xmn_custom_total(struct cmd_examine *p)
199 if ( p->sbc_nototal )
201 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
209 xmn_custom_nototal(struct cmd_examine *p)
213 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
221 /* Compare two factors */
223 compare_factors (const struct factor *f1,
224 const struct factor *f2,
229 v1_cmp = strcmp(f1->v1->name, f2->v1->name);
234 if ( f1->v2 == 0 && f2->v2 == 0 )
237 if ( f1->v2 == 0 && f2->v2 != 0 )
240 if ( f1->v2 != 0 && f2->v2 == 0 )
243 return strcmp(f1->v2->name, f2->v2->name);
247 /* Create a hash of a factor */
249 hash_factor( const struct factor *f,
253 h = hsh_hash_string(f->v1->name);
256 h += hsh_hash_string(f->v2->name);
262 /* Free up a factor */
264 free_factor(struct factor *f, void *aux UNUSED)
266 hsh_destroy(f->hash_table_v1);
267 hsh_destroy(f->hash_table_v2);
273 /* Parser for the variables sub command */
275 xmn_custom_variables(struct cmd_examine *cmd )
280 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
284 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
285 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
287 free (dependent_vars);
291 assert(n_dependent_vars);
293 if ( lex_match(T_BY))
295 hash_table_factors = hsh_create(4,
296 (hsh_compare_func *) compare_factors,
297 (hsh_hash_func *) hash_factor,
298 (hsh_free_func *) free_factor, 0);
300 return examine_parse_independent_vars(cmd, hash_table_factors);
309 /* Parse the clause specifying the factors */
311 examine_parse_independent_vars(struct cmd_examine *cmd,
312 struct hsh_table *hash_table_factors )
314 struct factor *f = 0;
316 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
322 f = xmalloc(sizeof(struct factor));
325 f->hash_table_v2 = 0;
326 f->hash_table_v1 = 0;
329 f->v1 = parse_variable();
331 if ( ! f->hash_table_v1 )
332 f->hash_table_v1 = hsh_create(4,(hsh_compare_func *)compare_values,
333 (hsh_hash_func *)hash_value,
334 0,(void *) f->v1->width);
339 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
343 f->v2 = parse_variable();
345 if ( !f->hash_table_v2 )
347 f->hash_table_v2 = hsh_create(4,
348 (hsh_compare_func *) compare_values,
349 (hsh_hash_func *) hash_value,
351 (void *) f->v2->width);
355 hsh_insert(hash_table_factors, f);
359 if ( token == '.' || token == '/' )
362 return examine_parse_independent_vars(cmd, hash_table_factors);
366 void populate_descriptives(struct tab_table *t, int col, int row);
369 void populate_extremities(struct tab_table *t, int col, int row, int n);
372 /* Show the descriptives table */
374 show_descriptives(struct variable **dependent_var,
376 struct factor *factor)
379 int heading_columns ;
381 const int n_stat_rows = 13;
383 const int heading_rows = 1;
384 int n_rows = heading_rows ;
392 n_rows += n_dep_var * n_stat_rows;
397 if ( factor->v2 == 0 )
400 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) * n_stat_rows;
405 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
406 hsh_count(factor->hash_table_v2) * n_stat_rows ;
410 n_cols = heading_columns + 4;
412 t = tab_create (n_cols, n_rows, 0);
414 tab_headers (t, heading_columns, 0, heading_rows, 0);
416 tab_dim (t, tab_natural_dimensions);
418 /* Outline the box and have no internal lines*/
423 n_cols - 1, n_rows - 1);
425 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
427 tab_vline (t, TAL_1, heading_columns, 0, n_rows - 1);
428 tab_vline (t, TAL_2, n_cols - 2, 0, n_rows - 1);
429 tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1);
431 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
432 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
435 for ( i = 0 ; i < n_dep_var ; ++i )
438 int n_subfactors = 1;
443 n_factors = hsh_count(factor->hash_table_v1);
445 n_subfactors = hsh_count(factor->hash_table_v2);
449 row = heading_rows + i * n_stat_rows * n_factors * n_subfactors;
452 tab_hline(t, TAL_1, 0, n_cols - 1, row );
458 struct hsh_iterator hi;
462 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
463 var_to_string(factor->v1));
467 for ( v = hsh_first(factor->hash_table_v1, &hi);
469 v = hsh_next(factor->hash_table_v1, &hi))
471 struct hsh_iterator h2;
475 row + count * n_subfactors * n_stat_rows,
476 TAB_RIGHT | TAT_TITLE,
477 value_to_string(v, factor->v1)
481 tab_hline (t, TAL_1, 1, n_cols - 1,
482 row + count * n_subfactors * n_stat_rows);
488 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
489 var_to_string(factor->v2));
491 for ( vv = hsh_first(factor->hash_table_v2, &h2);
493 vv = hsh_next(factor->hash_table_v2, &h2))
498 + count * n_subfactors * n_stat_rows
499 + count2 * n_stat_rows,
500 TAB_RIGHT | TAT_TITLE ,
501 value_to_string(vv, factor->v2)
505 tab_hline (t, TAL_1, 2, n_cols - 1,
507 + count * n_subfactors * n_stat_rows
508 + count2 * n_stat_rows);
510 populate_descriptives(t, heading_columns,
512 + count * n_subfactors
514 + count2 * n_stat_rows);
522 populate_descriptives(t, heading_columns,
524 + count * n_subfactors * n_stat_rows);
532 populate_descriptives(t, heading_columns,
538 TAB_LEFT | TAT_TITLE,
539 var_to_string(dependent_var[i])
544 tab_title (t, 0, _("Descriptives"));
551 /* Fill in the descriptives data */
553 populate_descriptives(struct tab_table *t, int col, int row)
558 TAB_LEFT | TAT_TITLE,
564 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
565 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
567 tab_text (t, col + 1,
569 TAB_LEFT | TAT_TITLE,
572 tab_text (t, col + 1,
574 TAB_LEFT | TAT_TITLE,
580 TAB_LEFT | TAT_TITLE,
581 _("5% Trimmed Mean"));
585 TAB_LEFT | TAT_TITLE,
590 TAB_LEFT | TAT_TITLE,
595 TAB_LEFT | TAT_TITLE,
596 _("Std. Deviation"));
600 TAB_LEFT | TAT_TITLE,
605 TAB_LEFT | TAT_TITLE,
610 TAB_LEFT | TAT_TITLE,
615 TAB_LEFT | TAT_TITLE,
616 _("Interquartile Range"));
620 TAB_LEFT | TAT_TITLE,
625 TAB_LEFT | TAT_TITLE,
631 show_summary(struct variable **dependent_var,
633 struct factor *factor)
635 static const char *subtitle[]=
643 int heading_columns ;
645 const int heading_rows = 3;
648 int n_rows = heading_rows;
658 if ( factor->v2 == 0 )
661 n_rows += n_dep_var * hsh_count(factor->hash_table_v1);
666 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
667 hsh_count(factor->hash_table_v2) ;
672 n_cols = heading_columns + 6;
674 t = tab_create (n_cols,n_rows,0);
675 tab_headers (t, heading_columns, 0, heading_rows, 0);
677 tab_dim (t, tab_natural_dimensions);
679 /* Outline the box and have vertical internal lines*/
684 n_cols - 1, n_rows - 1);
686 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
687 tab_hline (t, TAL_1, heading_columns, n_cols - 1, 1 );
688 tab_hline (t, TAL_1, 0, n_cols - 1, heading_rows -1 );
690 tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
693 tab_title (t, 0, _("Case Processing Summary"));
696 tab_joint_text(t, heading_columns, 0,
698 TAB_CENTER | TAT_TITLE,
701 /* Remove lines ... */
710 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
711 var_to_string(factor->v1));
714 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
715 var_to_string(factor->v2));
718 for ( i = 0 ; i < 3 ; ++i )
720 tab_text (t, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
721 tab_text (t, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
724 tab_joint_text(t, heading_columns + i*2 , 1,
725 heading_columns + i*2 + 1, 1,
726 TAB_CENTER | TAT_TITLE,
731 heading_columns + i*2, 1,
732 heading_columns + i*2 + 1, 1);
737 for ( i = 0 ; i < n_dep_var ; ++i )
739 int n_subfactors = 1;
744 n_factors = hsh_count(factor->hash_table_v1);
746 n_subfactors = hsh_count(factor->hash_table_v2);
750 0, i * n_factors * n_subfactors + heading_rows,
751 TAB_LEFT | TAT_TITLE,
752 var_to_string(dependent_var[i])
757 struct hsh_iterator hi;
761 for ( v = hsh_first(factor->hash_table_v1, &hi);
763 v = hsh_next(factor->hash_table_v1, &hi))
765 struct hsh_iterator h2;
769 i * n_factors * n_subfactors + heading_rows
770 + count * n_subfactors,
771 TAB_RIGHT | TAT_TITLE,
772 value_to_string(v, factor->v1)
778 for ( vv = hsh_first(factor->hash_table_v2, &h2);
780 vv = hsh_next(factor->hash_table_v2, &h2))
784 i * n_factors * n_subfactors + heading_rows
785 + count * n_subfactors + count2,
786 TAB_RIGHT | TAT_TITLE ,
787 value_to_string(vv, factor->v2)
805 static int bad_weight_warn = 1;
808 run_examine(const struct casefile *cf, void *cmd_)
810 struct hsh_iterator hi;
813 struct casereader *r;
816 const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
818 /* Make sure we haven't got rubbish left over from a
820 if ( hash_table_factors )
822 for ( fctr = hsh_first(hash_table_factors, &hi);
824 fctr = hsh_next (hash_table_factors, &hi) )
826 hsh_clear(fctr->hash_table_v1);
827 if ( fctr->hash_table_v2 )
828 hsh_clear(fctr->hash_table_v2);
833 for(r = casefile_get_reader (cf);
834 casereader_read (r, &c) ;
839 const double weight =
840 dict_get_case_weight(default_dict, &c, &bad_weight_warn);
842 if ( hash_table_factors )
844 for ( fctr = hsh_first(hash_table_factors, &hi);
846 fctr = hsh_next (hash_table_factors, &hi) )
848 const union value *val = case_data (&c, fctr->v1->fv);
849 hsh_insert(fctr->hash_table_v1, (void *) val);
851 if ( fctr->hash_table_v2 )
853 val = case_data (&c, fctr->v2->fv);
854 hsh_insert(fctr->hash_table_v2, (void *) val);
866 show_extremes(struct variable **dependent_var,
868 struct factor *factor,
872 int heading_columns ;
874 const int heading_rows = 1;
877 int n_rows = heading_rows;
881 heading_columns = 1 + 1;
882 n_rows += n_dep_var * 2 * n_extremities;
887 if ( factor->v2 == 0 )
889 heading_columns = 2 + 1;
890 n_rows += n_dep_var * 2 * n_extremities
891 * hsh_count(factor->hash_table_v1);
895 heading_columns = 3 + 1;
896 n_rows += n_dep_var * 2 * n_extremities
897 * hsh_count(factor->hash_table_v1)
898 * hsh_count(factor->hash_table_v2) ;
903 n_cols = heading_columns + 3;
905 t = tab_create (n_cols,n_rows,0);
906 tab_headers (t, heading_columns, 0, heading_rows, 0);
908 tab_dim (t, tab_natural_dimensions);
910 /* Outline the box and have vertical internal lines*/
915 n_cols - 1, n_rows - 1);
919 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
921 tab_title (t, 0, _("Extreme Values"));
926 /* Remove lines ... */
935 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
936 var_to_string(factor->v1));
939 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
940 var_to_string(factor->v2));
943 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
944 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
947 for ( i = 0 ; i < n_dep_var ; ++i )
949 int n_subfactors = 1;
954 n_factors = hsh_count(factor->hash_table_v1);
956 n_subfactors = hsh_count(factor->hash_table_v2);
960 0, i * 2 * n_extremities * n_factors *
961 n_subfactors + heading_rows,
962 TAB_LEFT | TAT_TITLE,
963 var_to_string(dependent_var[i])
969 TAL_1, 0, n_cols - 1,
970 heading_rows + 2 * n_extremities *
971 (i * n_factors * n_subfactors )
976 struct hsh_iterator hi;
980 for ( v = hsh_first(factor->hash_table_v1, &hi);
982 v = hsh_next(factor->hash_table_v1, &hi))
984 struct hsh_iterator h2;
987 tab_text (t, 1, heading_rows + 2 * n_extremities *
988 (i * n_factors * n_subfactors
989 + count * n_subfactors),
990 TAB_RIGHT | TAT_TITLE,
991 value_to_string(v, factor->v1)
995 tab_hline (t, TAL_1, 1, n_cols - 1,
996 heading_rows + 2 * n_extremities *
997 (i * n_factors * n_subfactors
998 + count * n_subfactors));
1004 for ( vv = hsh_first(factor->hash_table_v2, &h2);
1006 vv = hsh_next(factor->hash_table_v2, &h2))
1009 tab_text(t, 2, heading_rows + 2 * n_extremities *
1010 (i * n_factors * n_subfactors
1011 + count * n_subfactors + count2 ),
1012 TAB_RIGHT | TAT_TITLE ,
1013 value_to_string(vv, factor->v2)
1018 tab_hline (t, TAL_1, 2, n_cols - 1,
1019 heading_rows + 2 * n_extremities *
1020 (i * n_factors * n_subfactors
1021 + count * n_subfactors + count2 ));
1023 populate_extremities(t,3,
1024 heading_rows + 2 * n_extremities *
1025 (i * n_factors * n_subfactors
1026 + count * n_subfactors + count2),
1034 populate_extremities(t,2,
1035 heading_rows + 2 * n_extremities *
1036 (i * n_factors * n_subfactors
1037 + count * n_subfactors),
1046 populate_extremities(t, 1,
1047 heading_rows + 2 * n_extremities *
1048 (i * n_factors * n_subfactors ),
1061 /* Fill in the extremities table */
1063 populate_extremities(struct tab_table *t, int col, int row, int n)
1067 tab_text(t, col, row,
1068 TAB_RIGHT | TAT_TITLE ,
1073 tab_text(t, col, row + n ,
1074 TAB_RIGHT | TAT_TITLE ,
1079 for (i = 0; i < n ; ++i )
1081 tab_float(t, col + 1, row + i,
1082 TAB_RIGHT | TAT_TITLE,
1085 tab_float(t, col + 1, row + i + n,
1086 TAB_RIGHT | TAT_TITLE,