1 /* PSPP - EXAMINE data for normality . -*-c-*-
3 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
4 Author: John Darrington 2004
6 This program is free software; you can redistribute it and/or
7 modify it under the terms of the GNU General Public License as
8 published by the Free Software Foundation; either version 2 of the
9 License, or (at your option) any later version.
11 This program is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #include "value-labels.h"
47 +missing=miss:pairwise/!listwise,
49 incl:include/!exclude;
50 +compare=cmp:variables/!groups;
52 +statistics[st_]=descriptives,:extreme(*d:n),all,none.
60 static struct cmd_examine cmd;
62 static struct variable **dependent_vars;
64 static int n_dependent_vars;
66 static struct hsh_table *hash_table_factors;
72 struct hsh_table *hash_table_v1;
75 struct hsh_table *hash_table_v2;
79 /* Parse the clause specifying the factors */
80 static int examine_parse_independent_vars(struct cmd_examine *cmd,
81 struct hsh_table *hash_factors );
86 /* Functions to support hashes of factors */
87 int compare_factors(const struct factor *f1, const struct factor *f2,
90 unsigned hash_factor(const struct factor *f, void *aux);
92 void free_factor(struct factor *f, void *aux UNUSED);
95 /* Output functions */
96 static void show_summary(struct variable **dependent_var, int n_dep_var,
99 static void show_descriptives(struct variable **dependent_var,
101 struct factor *factor);
104 static void show_extremes(struct variable **dependent_var,
106 struct factor *factor,
111 static void calculate(const struct casefile *cf, void *cmd_);
122 if ( !parse_examine(&cmd) )
125 if ( cmd.st_n == SYSMIS )
128 if ( ! cmd.sbc_cinterval)
129 cmd.n_cinterval[0] = 95.0;
131 if ( cmd.sbc_nototal )
135 multipass_procedure_with_splits (calculate, &cmd);
137 /* Show totals if appropriate */
138 if ( total || !hash_table_factors || 0 == hsh_count (hash_table_factors))
140 show_summary(dependent_vars, n_dependent_vars,0);
142 if ( cmd.sbc_statistics )
144 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
145 show_descriptives(dependent_vars, n_dependent_vars, 0);
147 if ( cmd.a_statistics[XMN_ST_EXTREME])
148 show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
152 /* Show grouped statistics if appropriate */
153 if ( hash_table_factors && 0 != hsh_count (hash_table_factors))
155 struct hsh_iterator hi;
158 for(f = hsh_first(hash_table_factors,&hi);
160 f = hsh_next(hash_table_factors,&hi))
162 show_summary(dependent_vars, n_dependent_vars,f);
164 if ( cmd.sbc_statistics )
166 if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
167 show_descriptives(dependent_vars, n_dependent_vars,f);
169 if ( cmd.a_statistics[XMN_ST_EXTREME])
170 show_extremes(dependent_vars, n_dependent_vars,f,cmd.st_n);
175 hsh_destroy(hash_table_factors);
181 /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
183 xmn_custom_total(struct cmd_examine *p)
185 if ( p->sbc_nototal )
187 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
195 xmn_custom_nototal(struct cmd_examine *p)
199 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
207 /* Compare two factors */
209 compare_factors (const struct factor *f1,
210 const struct factor *f2,
215 v1_cmp = strcmp(f1->v1->name, f2->v1->name);
220 if ( f1->v2 == 0 && f2->v2 == 0 )
223 if ( f1->v2 == 0 && f2->v2 != 0 )
226 if ( f1->v2 != 0 && f2->v2 == 0 )
229 return strcmp(f1->v2->name, f2->v2->name);
233 /* Create a hash of a factor */
235 hash_factor( const struct factor *f,
239 h = hsh_hash_string(f->v1->name);
242 h += hsh_hash_string(f->v2->name);
248 /* Free up a factor */
250 free_factor(struct factor *f, void *aux UNUSED)
252 hsh_destroy(f->hash_table_v1);
253 hsh_destroy(f->hash_table_v2);
259 /* Parser for the variables sub command */
261 xmn_custom_variables(struct cmd_examine *cmd )
266 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
270 if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
271 PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
273 free (dependent_vars);
277 assert(n_dependent_vars);
279 if ( lex_match(T_BY))
281 hash_table_factors = hsh_create(4,
282 (hsh_compare_func *) compare_factors,
283 (hsh_hash_func *) hash_factor,
284 (hsh_free_func *) free_factor, 0);
286 return examine_parse_independent_vars(cmd, hash_table_factors);
295 /* Parse the clause specifying the factors */
297 examine_parse_independent_vars(struct cmd_examine *cmd,
298 struct hsh_table *hash_table_factors )
300 struct factor *f = 0;
302 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
308 f = xmalloc(sizeof(struct factor));
311 f->hash_table_v2 = 0;
312 f->hash_table_v1 = 0;
315 f->v1 = parse_variable();
317 if ( ! f->hash_table_v1 )
318 f->hash_table_v1 = hsh_create(4,(hsh_compare_func *)compare_values,
319 (hsh_hash_func *)hash_value,
320 0,(void *) f->v1->width);
325 if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
329 f->v2 = parse_variable();
331 if ( !f->hash_table_v2 )
333 f->hash_table_v2 = hsh_create(4,
334 (hsh_compare_func *) compare_values,
335 (hsh_hash_func *) hash_value,
337 (void *) f->v2->width);
341 hsh_insert(hash_table_factors, f);
345 if ( token == '.' || token == '/' )
348 return examine_parse_independent_vars(cmd, hash_table_factors);
352 void populate_descriptives(struct tab_table *t, int col, int row);
355 void populate_extremities(struct tab_table *t, int col, int row, int n);
358 /* Show the descriptives table */
360 show_descriptives(struct variable **dependent_var,
362 struct factor *factor)
365 int heading_columns ;
367 const int n_stat_rows = 13;
369 const int heading_rows = 1;
370 int n_rows = heading_rows ;
378 n_rows += n_dep_var * n_stat_rows;
383 if ( factor->v2 == 0 )
386 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) * n_stat_rows;
391 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
392 hsh_count(factor->hash_table_v2) * n_stat_rows ;
396 n_cols = heading_columns + 4;
398 t = tab_create (n_cols, n_rows, 0);
400 tab_headers (t, heading_columns, 0, heading_rows, 0);
402 tab_dim (t, tab_natural_dimensions);
404 /* Outline the box and have no internal lines*/
409 n_cols - 1, n_rows - 1);
411 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
413 tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
414 tab_vline (t, TAL_1, n_cols - 2, 0, n_rows - 1);
415 tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1);
417 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
418 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
421 for ( i = 0 ; i < n_dep_var ; ++i )
424 int n_subfactors = 1;
429 n_factors = hsh_count(factor->hash_table_v1);
431 n_subfactors = hsh_count(factor->hash_table_v2);
435 row = heading_rows + i * n_stat_rows * n_factors * n_subfactors;
438 tab_hline(t, TAL_1, 0, n_cols - 1, row );
444 struct hsh_iterator hi;
448 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
449 var_to_string(factor->v1));
453 for ( v = hsh_first(factor->hash_table_v1, &hi);
455 v = hsh_next(factor->hash_table_v1, &hi))
457 struct hsh_iterator h2;
461 row + count * n_subfactors * n_stat_rows,
462 TAB_RIGHT | TAT_TITLE,
463 value_to_string(v, factor->v1)
467 tab_hline (t, TAL_1, 1, n_cols - 1,
468 row + count * n_subfactors * n_stat_rows);
474 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
475 var_to_string(factor->v2));
477 for ( vv = hsh_first(factor->hash_table_v2, &h2);
479 vv = hsh_next(factor->hash_table_v2, &h2))
484 + count * n_subfactors * n_stat_rows
485 + count2 * n_stat_rows,
486 TAB_RIGHT | TAT_TITLE ,
487 value_to_string(vv, factor->v2)
491 tab_hline (t, TAL_1, 2, n_cols - 1,
493 + count * n_subfactors * n_stat_rows
494 + count2 * n_stat_rows);
496 populate_descriptives(t, heading_columns,
498 + count * n_subfactors
500 + count2 * n_stat_rows);
508 populate_descriptives(t, heading_columns,
510 + count * n_subfactors * n_stat_rows);
518 populate_descriptives(t, heading_columns,
524 TAB_LEFT | TAT_TITLE,
525 var_to_string(dependent_var[i])
530 tab_title (t, 0, _("Descriptives"));
537 /* Fill in the descriptives data */
539 populate_descriptives(struct tab_table *t, int col, int row)
544 TAB_LEFT | TAT_TITLE,
550 TAB_LEFT | TAT_TITLE | TAT_PRINTF,
551 _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
553 tab_text (t, col + 1,
555 TAB_LEFT | TAT_TITLE,
558 tab_text (t, col + 1,
560 TAB_LEFT | TAT_TITLE,
566 TAB_LEFT | TAT_TITLE,
567 _("5% Trimmed Mean"));
571 TAB_LEFT | TAT_TITLE,
576 TAB_LEFT | TAT_TITLE,
581 TAB_LEFT | TAT_TITLE,
582 _("Std. Deviation"));
586 TAB_LEFT | TAT_TITLE,
591 TAB_LEFT | TAT_TITLE,
596 TAB_LEFT | TAT_TITLE,
601 TAB_LEFT | TAT_TITLE,
602 _("Interquartile Range"));
606 TAB_LEFT | TAT_TITLE,
611 TAB_LEFT | TAT_TITLE,
617 show_summary(struct variable **dependent_var,
619 struct factor *factor)
621 static const char *subtitle[]=
629 int heading_columns ;
631 const int heading_rows = 3;
634 int n_rows = heading_rows;
644 if ( factor->v2 == 0 )
647 n_rows += n_dep_var * hsh_count(factor->hash_table_v1);
652 n_rows += n_dep_var * hsh_count(factor->hash_table_v1) *
653 hsh_count(factor->hash_table_v2) ;
658 n_cols = heading_columns + 6;
660 t = tab_create (n_cols,n_rows,0);
661 tab_headers (t, heading_columns, 0, heading_rows, 0);
663 tab_dim (t, tab_natural_dimensions);
665 /* Outline the box and have vertical internal lines*/
670 n_cols - 1, n_rows - 1);
672 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
673 tab_hline (t, TAL_1, heading_columns, n_cols - 1, 1 );
674 tab_hline (t, TAL_1, 0, n_cols - 1, heading_rows -1 );
676 tab_vline (t, TAL_2, heading_columns, 0, n_rows - 1);
679 tab_title (t, 0, _("Case Processing Summary"));
682 tab_joint_text(t, heading_columns, 0,
684 TAB_CENTER | TAT_TITLE,
687 /* Remove lines ... */
696 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
697 var_to_string(factor->v1));
700 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
701 var_to_string(factor->v2));
704 for ( i = 0 ; i < 3 ; ++i )
706 tab_text (t, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N"));
707 tab_text (t, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
710 tab_joint_text(t, heading_columns + i*2 , 1,
711 heading_columns + i*2 + 1, 1,
712 TAB_CENTER | TAT_TITLE,
717 heading_columns + i*2, 1,
718 heading_columns + i*2 + 1, 1);
723 for ( i = 0 ; i < n_dep_var ; ++i )
725 int n_subfactors = 1;
730 n_factors = hsh_count(factor->hash_table_v1);
732 n_subfactors = hsh_count(factor->hash_table_v2);
736 0, i * n_factors * n_subfactors + heading_rows,
737 TAB_LEFT | TAT_TITLE,
738 var_to_string(dependent_var[i])
743 struct hsh_iterator hi;
747 for ( v = hsh_first(factor->hash_table_v1, &hi);
749 v = hsh_next(factor->hash_table_v1, &hi))
751 struct hsh_iterator h2;
755 i * n_factors * n_subfactors + heading_rows
756 + count * n_subfactors,
757 TAB_RIGHT | TAT_TITLE,
758 value_to_string(v, factor->v1)
764 for ( vv = hsh_first(factor->hash_table_v2, &h2);
766 vv = hsh_next(factor->hash_table_v2, &h2))
770 i * n_factors * n_subfactors + heading_rows
771 + count * n_subfactors + count2,
772 TAB_RIGHT | TAT_TITLE ,
773 value_to_string(vv, factor->v2)
791 static int bad_weight_warn = 1;
794 calculate(const struct casefile *cf, void *cmd_)
796 struct casereader *r;
799 struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
801 for(r = casefile_get_reader (cf);
802 casereader_read (r, &c) ;
806 struct hsh_iterator hi;
809 const double weight =
810 dict_get_case_weight(default_dict,&c,&bad_weight_warn);
812 if ( hash_table_factors )
814 for ( fctr = hsh_first(hash_table_factors, &hi);
816 fctr = hsh_next (hash_table_factors, &hi) )
821 val = case_data (&c, fctr->v1->fv);
822 hsh_insert(fctr->hash_table_v1,val);
824 if ( fctr->hash_table_v2 )
826 val = case_data (&c, fctr->v2->fv);
827 hsh_insert(fctr->hash_table_v2,val);
837 show_extremes(struct variable **dependent_var,
839 struct factor *factor,
843 int heading_columns ;
845 const int heading_rows = 1;
848 int n_rows = heading_rows;
852 heading_columns = 1 + 1;
853 n_rows += n_dep_var * 2 * n_extremities;
858 if ( factor->v2 == 0 )
860 heading_columns = 2 + 1;
861 n_rows += n_dep_var * 2 * n_extremities
862 * hsh_count(factor->hash_table_v1);
866 heading_columns = 3 + 1;
867 n_rows += n_dep_var * 2 * n_extremities
868 * hsh_count(factor->hash_table_v1)
869 * hsh_count(factor->hash_table_v2) ;
874 n_cols = heading_columns + 3;
876 t = tab_create (n_cols,n_rows,0);
877 tab_headers (t, heading_columns, 0, heading_rows, 0);
879 tab_dim (t, tab_natural_dimensions);
881 /* Outline the box and have vertical internal lines*/
886 n_cols - 1, n_rows - 1);
890 tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows );
892 tab_title (t, 0, _("Extreme Values"));
897 /* Remove lines ... */
906 tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
907 var_to_string(factor->v1));
910 tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
911 var_to_string(factor->v2));
914 tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
915 tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
918 for ( i = 0 ; i < n_dep_var ; ++i )
920 int n_subfactors = 1;
925 n_factors = hsh_count(factor->hash_table_v1);
927 n_subfactors = hsh_count(factor->hash_table_v2);
931 0, i * 2 * n_extremities * n_factors *
932 n_subfactors + heading_rows,
933 TAB_LEFT | TAT_TITLE,
934 var_to_string(dependent_var[i])
940 TAL_1, 0, n_cols - 1,
941 heading_rows + 2 * n_extremities *
942 (i * n_factors * n_subfactors )
947 struct hsh_iterator hi;
951 for ( v = hsh_first(factor->hash_table_v1, &hi);
953 v = hsh_next(factor->hash_table_v1, &hi))
955 struct hsh_iterator h2;
958 tab_text (t, 1, heading_rows + 2 * n_extremities *
959 (i * n_factors * n_subfactors
960 + count * n_subfactors),
961 TAB_RIGHT | TAT_TITLE,
962 value_to_string(v, factor->v1)
966 tab_hline (t, TAL_1, 1, n_cols - 1,
967 heading_rows + 2 * n_extremities *
968 (i * n_factors * n_subfactors
969 + count * n_subfactors));
975 for ( vv = hsh_first(factor->hash_table_v2, &h2);
977 vv = hsh_next(factor->hash_table_v2, &h2))
980 tab_text(t, 2, heading_rows + 2 * n_extremities *
981 (i * n_factors * n_subfactors
982 + count * n_subfactors + count2 ),
983 TAB_RIGHT | TAT_TITLE ,
984 value_to_string(vv, factor->v2)
989 tab_hline (t, TAL_1, 2, n_cols - 1,
990 heading_rows + 2 * n_extremities *
991 (i * n_factors * n_subfactors
992 + count * n_subfactors + count2 ));
994 populate_extremities(t,3,
995 heading_rows + 2 * n_extremities *
996 (i * n_factors * n_subfactors
997 + count * n_subfactors + count2),
1005 populate_extremities(t,2,
1006 heading_rows + 2 * n_extremities *
1007 (i * n_factors * n_subfactors
1008 + count * n_subfactors),
1017 populate_extremities(t, 1,
1018 heading_rows + 2 * n_extremities *
1019 (i * n_factors * n_subfactors ),
1032 /* Fill in the extremities table */
1034 populate_extremities(struct tab_table *t, int col, int row, int n)
1038 tab_text(t, col, row,
1039 TAB_RIGHT | TAT_TITLE ,
1044 tab_text(t, col, row + n ,
1045 TAB_RIGHT | TAT_TITLE ,
1050 for (i = 0; i < n ; ++i )
1052 tab_float(t, col + 1, row + i,
1053 TAB_RIGHT | TAT_TITLE,
1056 tab_float(t, col + 1, row + i + n,
1057 TAB_RIGHT | TAT_TITLE,