1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #include "value-labels.h"
47 +missing=miss:pairwise/!listwise,
49 incl:include/!exclude;
50 +compare=cmp:variables/!groups;
52 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
60 static struct cmd_examine cmd;
62 static struct variable **dependent_vars;
64 static int n_dependent_vars;
66 static struct hsh_table *hash_table_factors;
72 struct hsh_table *hash_table_v1;
75 struct hsh_table *hash_table_v2;
79 /* Parse the clause specifying the factors */
80 static int examine_parse_independent_vars(struct cmd_examine *cmd,
81 struct hsh_table *hash_factors );
86 /* Functions to support hashes of factors */
87 int compare_factors(const struct factor *f1, const struct factor *f2,
90 unsigned hash_factor(const struct factor *f, void *aux);
92 void free_factor(struct factor *f, void *aux UNUSED);
95 /* Output functions */
96 static void show_summary(struct variable **dependent_var, int n_dep_var,
99 static void show_descriptives(struct variable **dependent_var,
101 struct factor *factor);
104 static void show_extremes(struct variable **dependent_var,
106 struct factor *factor,
111 static void calculate(const struct casefile *cf, void *cmd_);
120 if ( !parse_examine(&cmd) )
123 if ( ! cmd.sbc_cinterval)
124 cmd.n_cinterval[0] = 95.0;
126 if ( cmd.sbc_nototal )
130 multipass_procedure_with_splits (calculate, &cmd);
132 /* Show totals if appropriate */
133 if ( total || !hash_table_factors || 0 == hsh_count (hash_table_factors))
135 show_summary(dependent_vars, n_dependent_vars,0);
137 if ( cmd.sbc_statistics )
139 if ( cmd.a_statistics[EXAMINE_ST_DESCRIPTIVES])
140 show_descriptives(dependent_vars, n_dependent_vars, 0);
142 if ( cmd.st_n != SYSMIS )
143 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
147 /* Show grouped statistics if appropriate */
148 if ( hash_table_factors && 0 != hsh_count (hash_table_factors))
150 struct hsh_iterator hi;
153 for(f = hsh_first(hash_table_factors,&hi);
155 f = hsh_next(hash_table_factors,&hi))
157 show_summary(dependent_vars, n_dependent_vars,f);
159 if ( cmd.sbc_statistics )
161 if ( cmd.a_statistics[EXAMINE_ST_DESCRIPTIVES])
162 show_descriptives(dependent_vars, n_dependent_vars,f);
164 if ( cmd.st_n != SYSMIS )
165 show_extremes(dependent_vars, n_dependent_vars,f,cmd.st_n);
170 hsh_destroy(hash_table_factors);
176 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
178 examine_custom_total(struct cmd_examine *p)
180 if ( p->sbc_nototal )
182 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
190 examine_custom_nototal(struct cmd_examine *p)
194 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
202 /* Compare two factors */
204 compare_factors (const struct factor *f1,
205 const struct factor *f2,
210 v1_cmp = strcmp(f1->v1->name, f2->v1->name);
215 if ( f1->v2 == 0 && f2->v2 == 0 )
218 if ( f1->v2 == 0 && f2->v2 != 0 )
221 if ( f1->v2 != 0 && f2->v2 == 0 )
224 return strcmp(f1->v2->name, f2->v2->name);
228 /* Create a hash of a factor */
230 hash_factor( const struct factor *f,
234 h = hsh_hash_string(f->v1->name);
237 h += hsh_hash_string(f->v2->name);
243 /* Free up a factor */
245 free_factor(struct factor *f, void *aux UNUSED)
247 hsh_destroy(f->hash_table_v1);
248 hsh_destroy(f->hash_table_v2);
254 /* Parser for the variables sub command */
256 examine_custom_variables(struct cmd_examine *cmd )
261 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
265 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
266 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
268 free (dependent_vars);
272 assert(n_dependent_vars);
274 if ( lex_match(T_BY))
276 hash_table_factors = hsh_create(4,
277 (hsh_compare_func *) compare_factors,
278 (hsh_hash_func *) hash_factor,
279 (hsh_free_func *) free_factor, 0);
281 return examine_parse_independent_vars(cmd, hash_table_factors);
290 /* Parse the clause specifying the factors */
292 examine_parse_independent_vars(struct cmd_examine *cmd,
293 struct hsh_table *hash_table_factors )
295 struct factor *f = 0;
297 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
303 f = xmalloc(sizeof(struct factor));
306 f->hash_table_v2 = 0;
307 f->hash_table_v1 = 0;
310 f->v1 = parse_variable();
312 if ( ! f->hash_table_v1 )
313 f->hash_table_v1 = hsh_create(4,(hsh_compare_func *)compare_values,
314 (hsh_hash_func *)hash_value,
315 0,(void *) f->v1->width);
320 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
324 f->v2 = parse_variable();
326 if ( !f->hash_table_v2 )
328 f->hash_table_v2 = hsh_create(4,
329 (hsh_compare_func *) compare_values,
330 (hsh_hash_func *) hash_value,
332 (void *) f->v2->width);
336 hsh_insert(hash_table_factors, f);
340 if ( token == '.' || token == '/' )
343 return examine_parse_independent_vars(cmd, hash_table_factors);
347 void populate_descriptives(struct tab_table *t, int col, int row);
350 void populate_extremities(struct tab_table *t, int col, int row, int n);
353 /* Show the descriptives table */
355 show_descriptives(struct variable **dependent_var,
357 struct factor *factor)
360 int heading_columns ;
362 const int n_stat_rows = 13;
364 const int heading_rows = 1;
365 int n_rows = heading_rows ;
373 n_rows += n_dep_var * n_stat_rows;
378 if ( factor->v2 == 0 )
381 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) * n_stat_rows;
386 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
387 hsh_count(factor->hash_table_v2) * n_stat_rows ;
391 n_cols = heading_columns + 4;
393 t = tab_create (n_cols, n_rows, 0);
395 tab_headers (t, heading_columns, 0, heading_rows, 0);
397 tab_dim (t, tab_natural_dimensions);
399 /* Outline the box and have no internal lines*/
404 n_cols - 1, n_rows - 1);
406 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
408 tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
409 tab_vline (t, TAL_1, n_cols - 2, 0, n_rows - 1);
410 tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1);
412 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
413 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
416 for ( i = 0 ; i < n_dep_var ; ++i )
419 int n_subfactors = 1;
424 n_factors = hsh_count(factor->hash_table_v1);
426 n_subfactors = hsh_count(factor->hash_table_v2);
430 row = heading_rows + i * n_stat_rows * n_factors * n_subfactors;
433 tab_hline(t, TAL_1, 0, n_cols - 1, row );
439 struct hsh_iterator hi;
443 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
444 var_to_string(factor->v1));
448 for ( v = hsh_first(factor->hash_table_v1, &hi);
450 v = hsh_next(factor->hash_table_v1, &hi))
452 struct hsh_iterator h2;
456 row + count * n_subfactors * n_stat_rows,
457 TAB_RIGHT | TAT_TITLE,
458 value_to_string(v, factor->v1)
462 tab_hline (t, TAL_1, 1, n_cols - 1,
463 row + count * n_subfactors * n_stat_rows);
469 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
470 var_to_string(factor->v2));
472 for ( vv = hsh_first(factor->hash_table_v2, &h2);
474 vv = hsh_next(factor->hash_table_v2, &h2))
479 + count * n_subfactors * n_stat_rows
480 + count2 * n_stat_rows,
481 TAB_RIGHT | TAT_TITLE ,
482 value_to_string(vv, factor->v2)
486 tab_hline (t, TAL_1, 2, n_cols - 1,
488 + count * n_subfactors * n_stat_rows
489 + count2 * n_stat_rows);
491 populate_descriptives(t, heading_columns,
493 + count * n_subfactors
495 + count2 * n_stat_rows);
503 populate_descriptives(t, heading_columns,
505 + count * n_subfactors * n_stat_rows);
513 populate_descriptives(t, heading_columns,
519 TAB_LEFT | TAT_TITLE,
520 var_to_string(dependent_var[i])
525 tab_title (t, 0, _("Descriptives"));
532 /* Fill in the descriptives data */
534 populate_descriptives(struct tab_table *t, int col, int row)
539 TAB_LEFT | TAT_TITLE,
545 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
546 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
548 tab_text (t, col + 1,
550 TAB_LEFT | TAT_TITLE,
553 tab_text (t, col + 1,
555 TAB_LEFT | TAT_TITLE,
561 TAB_LEFT | TAT_TITLE,
562 _("5% Trimmed Mean"));
566 TAB_LEFT | TAT_TITLE,
571 TAB_LEFT | TAT_TITLE,
576 TAB_LEFT | TAT_TITLE,
577 _("Std. Deviation"));
581 TAB_LEFT | TAT_TITLE,
586 TAB_LEFT | TAT_TITLE,
591 TAB_LEFT | TAT_TITLE,
596 TAB_LEFT | TAT_TITLE,
597 _("Interquartile Range"));
601 TAB_LEFT | TAT_TITLE,
606 TAB_LEFT | TAT_TITLE,
612 show_summary(struct variable **dependent_var,
614 struct factor *factor)
616 static const char *subtitle[]=
624 int heading_columns ;
626 const int heading_rows = 3;
629 int n_rows = heading_rows;
639 if ( factor->v2 == 0 )
642 n_rows += n_dep_var * hsh_count(factor->hash_table_v1);
647 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
648 hsh_count(factor->hash_table_v2) ;
653 n_cols = heading_columns + 6;
655 t = tab_create (n_cols,n_rows,0);
656 tab_headers (t, heading_columns, 0, heading_rows, 0);
658 tab_dim (t, tab_natural_dimensions);
660 /* Outline the box and have vertical internal lines*/
665 n_cols - 1, n_rows - 1);
667 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
668 tab_hline (t, TAL_1, heading_columns, n_cols - 1, 1 );
669 tab_hline (t, TAL_1, 0, n_cols - 1, heading_rows -1 );
671 tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
674 tab_title (t, 0, _("Case Processing Summary"));
677 tab_joint_text(t, heading_columns, 0,
679 TAB_CENTER | TAT_TITLE,
682 /* Remove lines ... */
691 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
692 var_to_string(factor->v1));
695 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
696 var_to_string(factor->v2));
699 for ( i = 0 ; i < 3 ; ++i )
701 tab_text (t, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
702 tab_text (t, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
705 tab_joint_text(t, heading_columns + i*2 , 1,
706 heading_columns + i*2 + 1, 1,
707 TAB_CENTER | TAT_TITLE,
712 heading_columns + i*2, 1,
713 heading_columns + i*2 + 1, 1);
718 for ( i = 0 ; i < n_dep_var ; ++i )
720 int n_subfactors = 1;
725 n_factors = hsh_count(factor->hash_table_v1);
727 n_subfactors = hsh_count(factor->hash_table_v2);
731 0, i * n_factors * n_subfactors + heading_rows,
732 TAB_LEFT | TAT_TITLE,
733 var_to_string(dependent_var[i])
738 struct hsh_iterator hi;
742 for ( v = hsh_first(factor->hash_table_v1, &hi);
744 v = hsh_next(factor->hash_table_v1, &hi))
746 struct hsh_iterator h2;
750 i * n_factors * n_subfactors + heading_rows
751 + count * n_subfactors,
752 TAB_RIGHT | TAT_TITLE,
753 value_to_string(v, factor->v1)
759 for ( vv = hsh_first(factor->hash_table_v2, &h2);
761 vv = hsh_next(factor->hash_table_v2, &h2))
765 i * n_factors * n_subfactors + heading_rows
766 + count * n_subfactors + count2,
767 TAB_RIGHT | TAT_TITLE ,
768 value_to_string(vv, factor->v2)
786 static int bad_weight_warn = 1;
789 calculate(const struct casefile *cf, void *cmd_)
791 struct casereader *r;
794 struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
796 for(r = casefile_get_reader (cf);
797 casereader_read (r, &c) ;
801 struct hsh_iterator hi;
804 const double weight =
805 dict_get_case_weight(default_dict,&c,&bad_weight_warn);
807 if ( hash_table_factors )
809 for ( fctr = hsh_first(hash_table_factors, &hi);
811 fctr = hsh_next (hash_table_factors, &hi) )
816 val = case_data (&c, fctr->v1->fv);
817 hsh_insert(fctr->hash_table_v1,val);
819 if ( fctr->hash_table_v2 )
821 val = case_data (&c, fctr->v2->fv);
822 hsh_insert(fctr->hash_table_v2,val);
832 show_extremes(struct variable **dependent_var,
834 struct factor *factor,
838 int heading_columns ;
840 const int heading_rows = 1;
843 int n_rows = heading_rows;
847 heading_columns = 1 + 1;
848 n_rows += n_dep_var * 2 * n_extremities;
853 if ( factor->v2 == 0 )
855 heading_columns = 2 + 1;
856 n_rows += n_dep_var * 2 * n_extremities
857 * hsh_count(factor->hash_table_v1);
861 heading_columns = 3 + 1;
862 n_rows += n_dep_var * 2 * n_extremities
863 * hsh_count(factor->hash_table_v1)
864 * hsh_count(factor->hash_table_v2) ;
869 n_cols = heading_columns + 3;
871 t = tab_create (n_cols,n_rows,0);
872 tab_headers (t, heading_columns, 0, heading_rows, 0);
874 tab_dim (t, tab_natural_dimensions);
876 /* Outline the box and have vertical internal lines*/
881 n_cols - 1, n_rows - 1);
885 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
887 tab_title (t, 0, _("Extreme Values"));
892 /* Remove lines ... */
901 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
902 var_to_string(factor->v1));
905 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
906 var_to_string(factor->v2));
909 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
910 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
913 for ( i = 0 ; i < n_dep_var ; ++i )
915 int n_subfactors = 1;
920 n_factors = hsh_count(factor->hash_table_v1);
922 n_subfactors = hsh_count(factor->hash_table_v2);
926 0, i * 2 * n_extremities * n_factors *
927 n_subfactors + heading_rows,
928 TAB_LEFT | TAT_TITLE,
929 var_to_string(dependent_var[i])
935 TAL_1, 0, n_cols - 1,
936 heading_rows + 2 * n_extremities *
937 (i * n_factors * n_subfactors )
942 struct hsh_iterator hi;
946 for ( v = hsh_first(factor->hash_table_v1, &hi);
948 v = hsh_next(factor->hash_table_v1, &hi))
950 struct hsh_iterator h2;
953 tab_text (t, 1, heading_rows + 2 * n_extremities *
954 (i * n_factors * n_subfactors
955 + count * n_subfactors),
956 TAB_RIGHT | TAT_TITLE,
957 value_to_string(v, factor->v1)
961 tab_hline (t, TAL_1, 1, n_cols - 1,
962 heading_rows + 2 * n_extremities *
963 (i * n_factors * n_subfactors
964 + count * n_subfactors));
970 for ( vv = hsh_first(factor->hash_table_v2, &h2);
972 vv = hsh_next(factor->hash_table_v2, &h2))
975 tab_text(t, 2, heading_rows + 2 * n_extremities *
976 (i * n_factors * n_subfactors
977 + count * n_subfactors + count2 ),
978 TAB_RIGHT | TAT_TITLE ,
979 value_to_string(vv, factor->v2)
984 tab_hline (t, TAL_1, 2, n_cols - 1,
985 heading_rows + 2 * n_extremities *
986 (i * n_factors * n_subfactors
987 + count * n_subfactors + count2 ));
989 populate_extremities(t,3,
990 heading_rows + 2 * n_extremities *
991 (i * n_factors * n_subfactors
992 + count * n_subfactors + count2),
1000 populate_extremities(t,2,
1001 heading_rows + 2 * n_extremities *
1002 (i * n_factors * n_subfactors
1003 + count * n_subfactors),
1012 populate_extremities(t, 1,
1013 heading_rows + 2 * n_extremities *
1014 (i * n_factors * n_subfactors ),
1027 /* Fill in the extremities table */
1029 populate_extremities(struct tab_table *t, int col, int row, int n)
1033 tab_text(t, col, row,
1034 TAB_RIGHT | TAT_TITLE ,
1039 tab_text(t, col, row + n ,
1040 TAB_RIGHT | TAT_TITLE ,
1045 for (i = 0; i < n ; ++i )
1047 tab_float(t, col + 1, row + i,
1048 TAB_RIGHT | TAT_TITLE,
1051 tab_float(t, col + 1, row + i + n,
1052 TAB_RIGHT | TAT_TITLE,