2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <gsl/gsl_cdf.h>
24 #include "libpspp/assertion.h"
25 #include "libpspp/message.h"
26 #include "libpspp/pool.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/casegrouper.h"
32 #include "data/casereader.h"
33 #include "data/casewriter.h"
34 #include "data/caseproto.h"
35 #include "data/subcase.h"
38 #include "data/format.h"
40 #include "math/interaction.h"
41 #include "math/box-whisker.h"
42 #include "math/categoricals.h"
43 #include "math/chart-geometry.h"
44 #include "math/histogram.h"
45 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "math/percentiles.h"
50 #include "math/tukey-hinges.h"
51 #include "math/trimmed-mean.h"
53 #include "output/charts/boxplot.h"
54 #include "output/charts/np-plot.h"
55 #include "output/charts/plot-hist.h"
57 #include "language/command.h"
58 #include "language/lexer/lexer.h"
59 #include "language/lexer/value-parser.h"
60 #include "language/lexer/variable-parser.h"
62 #include "output/tab.h"
65 #define _(msgid) gettext (msgid)
66 #define N_(msgid) msgid
75 /* Indices for the ex_proto member (below) */
88 /* A caseproto used to contain the data subsets under examination,
90 struct caseproto *ex_proto;
93 const struct variable **dep_vars;
96 struct interaction **iacts;
98 enum mv_class exclude;
100 const struct dictionary *dict;
102 struct categoricals *cats;
104 /* how many extremities to display */
113 /* The case index of the ID value (or -1) if not applicable */
119 size_t n_percentiles;
125 enum bp_mode boxplot_mode;
127 const struct variable *id_var;
129 const struct variable *wv;
134 /* The value of this extremity */
137 /* Either the casenumber or the value of the variable specified
138 by the /ID subcommand which corresponds to this extremity */
139 union value identity;
142 struct exploratory_stats
149 /* Most operations need a sorted reader/writer */
150 struct casewriter *sorted_writer;
151 struct casereader *sorted_reader;
153 struct extremity *minima;
154 struct extremity *maxima;
157 Minimum should alway equal mimima[0].val.
158 Likewise, maximum should alway equal maxima[0].val.
159 This redundancy exists as an optimisation effort.
160 Some statistics (eg histogram) require early calculation
166 struct trimmed_mean *trimmed_mean;
167 struct percentile *quartiles[3];
168 struct percentile **percentiles;
170 struct tukey_hinges *hinges;
172 /* The data for the NP Plots */
175 struct histogram *histogram;
177 /* The data for the box plots */
178 struct box_whisker *box_whisker;
183 /* The minimum weight */
188 /* Returns an array of (iact->n_vars) pointers to union value initialised to NULL.
189 The caller must free this array when no longer required. */
190 static const union value **
191 previous_value_alloc (const struct interaction *iact)
195 const union value **prev_val = xcalloc (iact->n_vars, sizeof (*prev_val));
197 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
198 prev_val[ivar_idx] = NULL;
203 /* Set the contents of PREV_VAL to the values of C indexed by the variables of IACT */
205 previous_value_record (const struct interaction *iact, const struct ccase *c, const union value **prev_val)
210 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
212 const struct variable *ivar = iact->vars[ivar_idx];
213 const int width = var_get_width (ivar);
214 const union value *val = case_data (c, ivar);
216 if (prev_val[ivar_idx])
217 if (! value_equal (prev_val[ivar_idx], val, width))
224 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
226 const struct variable *ivar = iact->vars[ivar_idx];
227 const union value *val = case_data (c, ivar);
229 prev_val[ivar_idx] = val;
236 show_boxplot_grouped (const struct examine *cmd, int iact_idx)
240 const struct interaction *iact = cmd->iacts[iact_idx];
241 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
243 for (v = 0; v < cmd->n_dep_vars; ++v)
245 double y_min = DBL_MAX;
246 double y_max = -DBL_MAX;
248 struct boxplot *boxplot;
250 ds_init_empty (&title);
252 if (iact->n_vars > 0)
255 ds_init_empty (&istr);
256 interaction_to_string (iact, &istr);
257 ds_put_format (&title, _("Boxplot of %s vs. %s"),
258 var_to_string (cmd->dep_vars[v]),
263 ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v]));
265 for (grp = 0; grp < n_cats; ++grp)
267 const struct exploratory_stats *es =
268 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
270 if ( y_min > es[v].minimum)
271 y_min = es[v].minimum;
273 if ( y_max < es[v].maximum)
274 y_max = es[v].maximum;
277 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
281 for (grp = 0; grp < n_cats; ++grp)
286 const struct ccase *c =
287 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
289 const struct exploratory_stats *es =
290 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
292 ds_init_empty (&label);
293 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
295 const struct variable *ivar = iact->vars[ivar_idx];
296 const union value *val = case_data (c, ivar);
298 ds_put_cstr (&label, var_to_string (ivar));
299 ds_put_cstr (&label, " = ");
300 var_append_value_name (ivar, val, &label);
301 ds_put_cstr (&label, "; ");
304 boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
309 boxplot_submit (boxplot);
314 show_boxplot_variabled (const struct examine *cmd, int iact_idx)
317 const struct interaction *iact = cmd->iacts[iact_idx];
318 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
320 for (grp = 0; grp < n_cats; ++grp)
322 struct boxplot *boxplot;
324 double y_min = DBL_MAX;
325 double y_max = -DBL_MAX;
327 const struct ccase *c =
328 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
331 ds_init_empty (&title);
333 for (v = 0; v < cmd->n_dep_vars; ++v)
335 const struct exploratory_stats *es =
336 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
338 if ( y_min > es[v].minimum)
339 y_min = es[v].minimum;
341 if ( y_max < es[v].maximum)
342 y_max = es[v].maximum;
345 if ( iact->n_vars == 0)
346 ds_put_format (&title, _("Boxplot"));
351 ds_init_empty (&label);
352 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
354 const struct variable *ivar = iact->vars[ivar_idx];
355 const union value *val = case_data (c, ivar);
357 ds_put_cstr (&label, var_to_string (ivar));
358 ds_put_cstr (&label, " = ");
359 var_append_value_name (ivar, val, &label);
360 ds_put_cstr (&label, "; ");
363 ds_put_format (&title, _("Boxplot of %s"),
369 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
373 for (v = 0; v < cmd->n_dep_vars; ++v)
375 const struct exploratory_stats *es =
376 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
378 boxplot_add_box (boxplot, es[v].box_whisker,
379 var_to_string (cmd->dep_vars[v]));
382 boxplot_submit (boxplot);
388 show_npplot (const struct examine *cmd, int iact_idx)
390 const struct interaction *iact = cmd->iacts[iact_idx];
391 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
395 for (v = 0; v < cmd->n_dep_vars; ++v)
398 for (grp = 0; grp < n_cats; ++grp)
400 struct chart_item *npp, *dnpp;
401 struct casereader *reader;
405 const struct ccase *c =
406 categoricals_get_case_by_category_real (cmd->cats,
409 const struct exploratory_stats *es =
410 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
413 ds_init_cstr (&label,
414 var_to_string (cmd->dep_vars[v]));
416 if ( iact->n_vars > 0)
418 ds_put_cstr (&label, " (");
419 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
421 const struct variable *ivar = iact->vars[ivar_idx];
422 const union value *val = case_data (c, ivar);
424 ds_put_cstr (&label, var_to_string (ivar));
425 ds_put_cstr (&label, " = ");
426 var_append_value_name (ivar, val, &label);
427 ds_put_cstr (&label, "; ");
430 ds_put_cstr (&label, ")");
434 reader = casewriter_make_reader (np->writer);
437 npp = np_plot_create (np, reader, ds_cstr (&label));
438 dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
440 if (npp == NULL || dnpp == NULL)
442 msg (MW, _("Not creating NP plot because data set is empty."));
443 chart_item_unref (npp);
444 chart_item_unref (dnpp);
448 chart_item_submit (npp);
449 chart_item_submit (dnpp);
451 casereader_destroy (reader);
460 show_histogram (const struct examine *cmd, int iact_idx)
462 const struct interaction *iact = cmd->iacts[iact_idx];
463 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
467 for (v = 0; v < cmd->n_dep_vars; ++v)
470 for (grp = 0; grp < n_cats; ++grp)
474 const struct ccase *c =
475 categoricals_get_case_by_category_real (cmd->cats,
478 const struct exploratory_stats *es =
479 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
483 if (es[v].histogram == NULL)
486 ds_init_cstr (&label,
487 var_to_string (cmd->dep_vars[v]));
489 if ( iact->n_vars > 0)
491 ds_put_cstr (&label, " (");
492 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
494 const struct variable *ivar = iact->vars[ivar_idx];
495 const union value *val = case_data (c, ivar);
497 ds_put_cstr (&label, var_to_string (ivar));
498 ds_put_cstr (&label, " = ");
499 var_append_value_name (ivar, val, &label);
500 ds_put_cstr (&label, "; ");
503 ds_put_cstr (&label, ")");
507 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
510 ( histogram_chart_create (es[v].histogram->gsl_hist,
511 ds_cstr (&label), n, mean,
521 percentiles_report (const struct examine *cmd, int iact_idx)
523 const struct interaction *iact = cmd->iacts[iact_idx];
525 const int heading_columns = 1 + iact->n_vars + 1;
526 const int heading_rows = 2;
529 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
531 const int rows_per_cat = 2;
532 const int rows_per_var = n_cats * rows_per_cat;
534 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
535 const int nc = heading_columns + cmd->n_percentiles;
537 t = tab_create (nc, nr);
538 tab_title (t, _("Percentiles"));
540 tab_headers (t, heading_columns, 0, heading_rows, 0);
542 /* Internal Vertical lines */
543 tab_box (t, -1, -1, -1, TAL_1,
544 heading_columns, 0, nc - 1, nr - 1);
547 tab_box (t, TAL_2, TAL_2, -1, -1,
548 0, 0, nc - 1, nr - 1);
550 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
551 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
553 tab_joint_text (t, heading_columns, 0,
555 TAT_TITLE | TAB_CENTER,
559 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
562 for (i = 0; i < cmd->n_percentiles; ++i)
564 tab_text_format (t, heading_columns + i, 1,
565 TAT_TITLE | TAB_CENTER,
566 _("%g"), cmd->ptiles[i]);
569 for (i = 0; i < iact->n_vars; ++i)
574 var_to_string (iact->vars[i])
582 tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1);
584 for (v = 0; v < cmd->n_dep_vars; ++v)
586 const union value **prev_vals = previous_value_alloc (iact);
590 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
593 0, heading_rows + v * rows_per_var,
594 TAT_TITLE | TAB_LEFT,
595 var_to_string (cmd->dep_vars[v])
598 for (i = 0; i < n_cats; ++i)
600 const struct ccase *c =
601 categoricals_get_case_by_category_real (cmd->cats,
604 const struct exploratory_stats *ess =
605 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
607 const struct exploratory_stats *es = ess + v;
609 int diff_idx = previous_value_record (iact, c, prev_vals);
614 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
616 const struct variable *ivar = iact->vars[ivar_idx];
617 const union value *val = case_data (c, ivar);
619 if (( diff_idx != -1 && diff_idx <= ivar_idx)
623 ds_init_empty (&str);
624 var_append_value_name (ivar, val, &str);
628 heading_rows + v * rows_per_var + i * rows_per_cat,
629 TAT_TITLE | TAB_LEFT,
637 if ( diff_idx != -1 && diff_idx < iact->n_vars)
639 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
640 heading_rows + v * rows_per_var + i * rows_per_cat
644 tab_text (t, heading_columns - 1,
645 heading_rows + v * rows_per_var + i * rows_per_cat,
646 TAT_TITLE | TAB_LEFT,
647 gettext (ptile_alg_desc [cmd->pc_alg]));
649 tukey_hinges_calculate (es->hinges, hinges);
651 for (p = 0; p < cmd->n_percentiles; ++p)
653 tab_double (t, heading_columns + p,
654 heading_rows + v * rows_per_var + i * rows_per_cat,
656 percentile_calculate (es->percentiles[p], cmd->pc_alg),
659 if (cmd->ptiles[p] == 25.0)
661 tab_double (t, heading_columns + p,
662 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
667 else if (cmd->ptiles[p] == 50.0)
669 tab_double (t, heading_columns + p,
670 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
675 else if (cmd->ptiles[p] == 75.0)
677 tab_double (t, heading_columns + p,
678 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
686 tab_text (t, heading_columns - 1,
687 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
688 TAT_TITLE | TAB_LEFT,
689 _("Tukey's Hinges"));
700 descriptives_report (const struct examine *cmd, int iact_idx)
702 const struct interaction *iact = cmd->iacts[iact_idx];
704 const int heading_columns = 1 + iact->n_vars + 2;
705 const int heading_rows = 1;
708 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
710 const int rows_per_cat = 13;
711 const int rows_per_var = n_cats * rows_per_cat;
713 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
714 const int nc = 2 + heading_columns;
716 t = tab_create (nc, nr);
717 tab_title (t, _("Descriptives"));
719 tab_headers (t, heading_columns, 0, heading_rows, 0);
721 /* Internal Vertical lines */
722 tab_box (t, -1, -1, -1, TAL_1,
723 heading_columns, 0, nc - 1, nr - 1);
726 tab_box (t, TAL_2, TAL_2, -1, -1,
727 0, 0, nc - 1, nr - 1);
729 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
730 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
733 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
736 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
739 for (i = 0; i < iact->n_vars; ++i)
744 var_to_string (iact->vars[i])
748 for (v = 0; v < cmd->n_dep_vars; ++v)
750 const union value **prev_val = previous_value_alloc (iact);
754 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
757 0, heading_rows + v * rows_per_var,
758 TAT_TITLE | TAB_LEFT,
759 var_to_string (cmd->dep_vars[v])
762 for (i = 0; i < n_cats; ++i)
764 const struct ccase *c =
765 categoricals_get_case_by_category_real (cmd->cats,
768 const struct exploratory_stats *ess =
769 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
771 const struct exploratory_stats *es = ess + v;
773 const int diff_idx = previous_value_record (iact, c, prev_val);
775 double m0, m1, m2, m3, m4;
778 moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4);
780 tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0);
782 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
784 const struct variable *ivar = iact->vars[ivar_idx];
785 const union value *val = case_data (c, ivar);
787 if (( diff_idx != -1 && diff_idx <= ivar_idx)
791 ds_init_empty (&str);
792 var_append_value_name (ivar, val, &str);
796 heading_rows + v * rows_per_var + i * rows_per_cat,
797 TAT_TITLE | TAB_LEFT,
805 if ( diff_idx != -1 && diff_idx < iact->n_vars)
807 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
808 heading_rows + v * rows_per_var + i * rows_per_cat
814 heading_rows + v * rows_per_var + i * rows_per_cat,
820 1 + iact->n_vars + 2,
821 heading_rows + v * rows_per_var + i * rows_per_cat,
825 1 + iact->n_vars + 3,
826 heading_rows + v * rows_per_var + i * rows_per_cat,
827 0, calc_semean (m2, m0), 0);
831 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
833 _("%g%% Confidence Interval for Mean"),
838 1 + iact->n_vars + 1,
839 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
845 1 + iact->n_vars + 2,
846 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
847 0, m1 - tval * calc_semean (m2, m0), 0);
851 1 + iact->n_vars + 1,
852 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
858 1 + iact->n_vars + 2,
859 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
860 0, m1 + tval * calc_semean (m2, m0), 0);
865 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
871 1 + iact->n_vars + 2,
872 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
874 trimmed_mean_calculate (es->trimmed_mean),
879 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
885 1 + iact->n_vars + 2,
886 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
888 percentile_calculate (es->quartiles[1], cmd->pc_alg),
894 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
900 1 + iact->n_vars + 2,
901 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
906 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
912 1 + iact->n_vars + 2,
913 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
918 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
924 1 + iact->n_vars + 2,
925 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
932 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
938 1 + iact->n_vars + 2,
939 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
946 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
952 1 + iact->n_vars + 2,
953 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
955 es->maxima[0].val - es->minima[0].val,
960 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
962 _("Interquartile Range")
967 1 + iact->n_vars + 2,
968 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
970 percentile_calculate (es->quartiles[2], cmd->pc_alg) -
971 percentile_calculate (es->quartiles[0], cmd->pc_alg),
979 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
985 1 + iact->n_vars + 2,
986 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
990 1 + iact->n_vars + 3,
991 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
992 0, calc_seskew (m0), 0);
996 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1002 1 + iact->n_vars + 2,
1003 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1007 1 + iact->n_vars + 3,
1008 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1009 0, calc_sekurt (m0), 0);
1019 extremes_report (const struct examine *cmd, int iact_idx)
1021 const struct interaction *iact = cmd->iacts[iact_idx];
1023 const int heading_columns = 1 + iact->n_vars + 2;
1024 const int heading_rows = 1;
1025 struct tab_table *t;
1027 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1029 const int rows_per_cat = 2 * cmd->disp_extremes;
1030 const int rows_per_var = n_cats * rows_per_cat;
1032 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
1033 const int nc = 2 + heading_columns;
1035 t = tab_create (nc, nr);
1036 tab_title (t, _("Extreme Values"));
1038 tab_headers (t, heading_columns, 0, heading_rows, 0);
1040 /* Internal Vertical lines */
1041 tab_box (t, -1, -1, -1, TAL_1,
1042 heading_columns, 0, nc - 1, nr - 1);
1044 /* External Frame */
1045 tab_box (t, TAL_2, TAL_2, -1, -1,
1046 0, 0, nc - 1, nr - 1);
1048 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1049 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1053 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1054 var_to_string (cmd->id_var));
1056 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1059 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
1062 for (i = 0; i < iact->n_vars; ++i)
1067 var_to_string (iact->vars[i])
1071 for (v = 0; v < cmd->n_dep_vars; ++v)
1073 const union value **prev_val = previous_value_alloc (iact);
1077 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
1080 0, heading_rows + v * rows_per_var,
1082 var_to_string (cmd->dep_vars[v])
1085 for (i = 0; i < n_cats; ++i)
1088 const struct ccase *c =
1089 categoricals_get_case_by_category_real (cmd->cats, iact_idx, i);
1091 const struct exploratory_stats *ess =
1092 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1094 const struct exploratory_stats *es = ess + v;
1096 int diff_idx = previous_value_record (iact, c, prev_val);
1098 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1100 const struct variable *ivar = iact->vars[ivar_idx];
1101 const union value *val = case_data (c, ivar);
1103 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1107 ds_init_empty (&str);
1108 var_append_value_name (ivar, val, &str);
1112 heading_rows + v * rows_per_var + i * rows_per_cat,
1113 TAT_TITLE | TAB_LEFT,
1121 if ( diff_idx != -1 && diff_idx < iact->n_vars)
1123 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1124 heading_rows + v * rows_per_var + i * rows_per_cat
1129 heading_columns - 2,
1130 heading_rows + v * rows_per_var + i * rows_per_cat,
1135 tab_hline (t, TAL_1, heading_columns - 2, nc - 1,
1136 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes
1140 heading_columns - 2,
1141 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes,
1145 for (e = 0 ; e < cmd->disp_extremes; ++e)
1148 heading_columns - 1,
1149 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1154 /* The casenumber */
1158 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1160 &es->maxima[e].identity,
1166 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1168 es->maxima[e].identity.f,
1172 heading_columns + 1,
1173 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1181 heading_columns - 1,
1182 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1187 /* The casenumber */
1191 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1193 &es->minima[e].identity,
1199 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1201 es->minima[e].identity.f,
1205 heading_columns + 1,
1206 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1220 summary_report (const struct examine *cmd, int iact_idx)
1222 const struct interaction *iact = cmd->iacts[iact_idx];
1224 const int heading_columns = 1 + iact->n_vars;
1225 const int heading_rows = 3;
1226 struct tab_table *t;
1228 const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0;
1230 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1232 const int nr = heading_rows + n_cats * cmd->n_dep_vars;
1233 const int nc = 6 + heading_columns;
1235 t = tab_create (nc, nr);
1236 tab_title (t, _("Case Processing Summary"));
1238 tab_headers (t, heading_columns, 0, heading_rows, 0);
1240 /* Internal Vertical lines */
1241 tab_box (t, -1, -1, -1, TAL_1,
1242 heading_columns, 0, nc - 1, nr - 1);
1244 /* External Frame */
1245 tab_box (t, TAL_2, TAL_2, -1, -1,
1246 0, 0, nc - 1, nr - 1);
1248 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1249 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1251 tab_joint_text (t, heading_columns, 0,
1252 nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
1255 heading_columns + 1, 1,
1256 TAB_CENTER | TAT_TITLE, _("Valid"));
1259 heading_columns + 2, 1,
1260 heading_columns + 3, 1,
1261 TAB_CENTER | TAT_TITLE, _("Missing"));
1264 heading_columns + 4, 1,
1265 heading_columns + 5, 1,
1266 TAB_CENTER | TAT_TITLE, _("Total"));
1268 for (i = 0; i < 3; ++i)
1270 tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
1272 tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1276 for (i = 0; i < iact->n_vars; ++i)
1281 var_to_string (iact->vars[i])
1286 for (v = 0; v < cmd->n_dep_vars; ++v)
1289 const union value **prev_values = previous_value_alloc (iact);
1292 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats);
1295 0, heading_rows + n_cats * v,
1297 var_to_string (cmd->dep_vars[v])
1301 for (i = 0; i < n_cats; ++i)
1304 const struct exploratory_stats *es;
1306 const struct ccase *c =
1307 categoricals_get_case_by_category_real (cmd->cats,
1311 int diff_idx = previous_value_record (iact, c, prev_values);
1313 if ( diff_idx != -1 && diff_idx < iact->n_vars - 1)
1314 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1315 heading_rows + n_cats * v + i );
1317 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1319 const struct variable *ivar = iact->vars[ivar_idx];
1320 const union value *val = case_data (c, ivar);
1322 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1326 ds_init_empty (&str);
1327 var_append_value_name (ivar, val, &str);
1330 1 + ivar_idx, heading_rows + n_cats * v + i,
1331 TAT_TITLE | TAB_LEFT,
1341 es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1344 total = es[v].missing + es[v].non_missing;
1346 heading_columns + 0,
1347 heading_rows + n_cats * v + i,
1354 heading_columns + 1,
1355 heading_rows + n_cats * v + i,
1358 100.0 * es[v].non_missing / total
1363 heading_columns + 2,
1364 heading_rows + n_cats * v + i,
1370 heading_columns + 3,
1371 heading_rows + n_cats * v + i,
1374 100.0 * es[v].missing / total
1377 heading_columns + 4,
1378 heading_rows + n_cats * v + i,
1383 /* This can only be 100% can't it? */
1385 heading_columns + 5,
1386 heading_rows + n_cats * v + i,
1389 100.0 * (es[v].missing + es[v].non_missing)/ total
1395 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
1396 tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
1402 /* Match a variable.
1403 If the match succeeds, the variable will be placed in VAR.
1404 Returns true if successful */
1406 lex_match_variable (struct lexer *lexer,
1407 const struct dictionary *dict, const struct variable **var)
1409 if (lex_token (lexer) != T_ID)
1413 *var = parse_variable_const (lexer, dict);
1420 /* Attempt to parse an interaction from LEXER */
1421 static struct interaction *
1422 parse_interaction (struct lexer *lexer, struct examine *ex)
1424 const struct variable *v = NULL;
1425 struct interaction *iact = NULL;
1427 if ( lex_match_variable (lexer, ex->dict, &v))
1429 iact = interaction_create (v);
1431 while (lex_match (lexer, T_BY))
1433 if (!lex_match_variable (lexer, ex->dict, &v))
1435 interaction_destroy (iact);
1438 interaction_add_variable (iact, v);
1440 lex_match (lexer, T_COMMA);
1448 create_n (const void *aux1, void *aux2 UNUSED)
1452 const struct examine *examine = aux1;
1453 struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es));
1454 struct subcase ordering;
1455 subcase_init (&ordering, 0, 0, SC_ASCEND);
1457 for (v = 0; v < examine->n_dep_vars; v++)
1459 es[v].sorted_writer = sort_create_writer (&ordering, examine->ex_proto);
1460 es[v].sorted_reader = NULL;
1462 es[v].mom = moments_create (MOMENT_KURTOSIS);
1463 es[v].cmin = DBL_MAX;
1465 es[v].maximum = -DBL_MAX;
1466 es[v].minimum = DBL_MAX;
1469 subcase_destroy (&ordering);
1474 update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
1475 const struct ccase *c, double weight)
1478 const struct examine *examine = aux1;
1479 struct exploratory_stats *es = user_data;
1481 for (v = 0; v < examine->n_dep_vars; v++)
1483 struct ccase *outcase ;
1484 const struct variable *var = examine->dep_vars[v];
1485 const double x = case_data (c, var)->f;
1487 if (var_is_value_missing (var, case_data (c, var), examine->exclude))
1489 es[v].missing += weight;
1493 outcase = case_create (examine->ex_proto);
1495 if (x > es[v].maximum)
1498 if (x < es[v].minimum)
1501 es[v].non_missing += weight;
1503 moments_pass_one (es[v].mom, x, weight);
1505 /* Save the value and the ID to the writer */
1506 assert (examine->id_idx != -1);
1507 case_data_rw_idx (outcase, EX_VAL)->f = x;
1508 value_copy (case_data_rw_idx (outcase, EX_ID),
1509 case_data_idx (c, examine->id_idx), examine->id_width);
1511 case_data_rw_idx (outcase, EX_WT)->f = weight;
1515 if (es[v].cmin > weight)
1516 es[v].cmin = weight;
1518 casewriter_write (es[v].sorted_writer, outcase);
1523 calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
1526 const struct examine *examine = aux1;
1527 struct exploratory_stats *es = user_data;
1529 for (v = 0; v < examine->n_dep_vars; v++)
1532 casenumber imin = 0;
1533 double imax = es[v].cc;
1534 struct casereader *reader;
1536 casenumber total_cases;
1538 if (examine->histogram)
1541 double bin_width = fabs (es[v].minimum - es[v].maximum)
1542 / (1 + log2 (es[v].cc))
1545 bin_width = chart_rounded_tick (bin_width);
1548 histogram_create (bin_width, es[v].minimum, es[v].maximum);
1551 es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
1552 total_cases = casereader_count_cases (es[v].sorted_reader);
1553 es[v].sorted_writer = NULL;
1555 es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
1556 es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
1557 for (i = 0; i < examine->calc_extremes; ++i)
1559 value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ;
1560 value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ;
1563 for (reader = casereader_clone (es[v].sorted_reader);
1564 (c = casereader_read (reader)) != NULL; case_unref (c))
1566 const double val = case_data_idx (c, EX_VAL)->f;
1567 const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */
1569 moments_pass_two (es[v].mom, val, wt);
1571 if (es[v].histogram)
1572 histogram_add (es[v].histogram, val, wt);
1574 if (imin < examine->calc_extremes)
1577 for (x = imin; x < examine->calc_extremes; ++x)
1579 struct extremity *min = &es[v].minima[x];
1581 value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
1587 if (imax < examine->calc_extremes)
1591 for (x = imax; x < imax + wt; ++x)
1593 struct extremity *max;
1595 if (x >= examine->calc_extremes)
1598 max = &es[v].maxima[x];
1600 value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width);
1604 casereader_destroy (reader);
1606 if (examine->calc_extremes > 0)
1608 assert (es[v].minima[0].val == es[v].minimum);
1609 assert (es[v].maxima[0].val == es[v].maximum);
1613 const int n_os = 5 + examine->n_percentiles;
1614 struct order_stats **os ;
1615 es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
1617 es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
1619 os = xcalloc (n_os, sizeof *os);
1620 os[0] = &es[v].trimmed_mean->parent;
1622 es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
1623 es[v].quartiles[1] = percentile_create (0.5, es[v].cc);
1624 es[v].quartiles[2] = percentile_create (0.75, es[v].cc);
1626 os[1] = &es[v].quartiles[0]->parent;
1627 os[2] = &es[v].quartiles[1]->parent;
1628 os[3] = &es[v].quartiles[2]->parent;
1630 es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin);
1631 os[4] = &es[v].hinges->parent;
1633 for (i = 0; i < examine->n_percentiles; ++i)
1635 es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc);
1636 os[5 + i] = &es[v].percentiles[i]->parent;
1639 order_stats_accumulate_idx (os, n_os,
1640 casereader_clone (es[v].sorted_reader),
1646 if (examine->boxplot)
1648 struct order_stats *os;
1650 es[v].box_whisker = box_whisker_create (es[v].hinges,
1651 EX_ID, examine->id_var);
1653 os = &es[v].box_whisker->parent;
1654 order_stats_accumulate_idx (&os, 1,
1655 casereader_clone (es[v].sorted_reader),
1659 if (examine->npplot)
1661 double n, mean, var;
1662 struct order_stats *os;
1664 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
1666 es[v].np = np_create (n, mean, var);
1668 os = &es[v].np->parent;
1670 order_stats_accumulate_idx (&os, 1,
1671 casereader_clone (es[v].sorted_reader),
1679 cleanup_exploratory_stats (struct examine *cmd)
1682 for (i = 0; i < cmd->n_iacts; ++i)
1685 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1687 for (v = 0; v < cmd->n_dep_vars; ++v)
1690 for (grp = 0; grp < n_cats; ++grp)
1693 const struct exploratory_stats *es =
1694 categoricals_get_user_data_by_category_real (cmd->cats, i, grp);
1696 struct order_stats *os = &es[v].hinges->parent;
1697 struct statistic *stat = &os->parent;
1698 stat->destroy (stat);
1700 for (q = 0; q < 3 ; q++)
1702 os = &es[v].quartiles[q]->parent;
1704 stat->destroy (stat);
1707 for (q = 0; q < cmd->n_percentiles ; q++)
1709 os = &es[v].percentiles[q]->parent;
1711 stat->destroy (stat);
1714 os = &es[v].trimmed_mean->parent;
1716 stat->destroy (stat);
1718 os = &es[v].np->parent;
1722 stat->destroy (stat);
1725 statistic_destroy (&es[v].histogram->parent);
1726 moments_destroy (es[v].mom);
1728 casereader_destroy (es[v].sorted_reader);
1736 run_examine (struct examine *cmd, struct casereader *input)
1740 struct casereader *reader;
1742 struct payload payload;
1743 payload.create = create_n;
1744 payload.update = update_n;
1745 payload.destroy = calculate_n;
1747 cmd->wv = dict_get_weight (cmd->dict);
1750 = categoricals_create (cmd->iacts, cmd->n_iacts,
1751 cmd->wv, cmd->exclude);
1753 categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
1755 if (cmd->id_idx == -1)
1757 struct ccase *c = casereader_peek (input, 0);
1759 assert (cmd->id_var == NULL);
1761 cmd->id_idx = case_get_value_cnt (c);
1762 input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
1767 /* FIXME: Filter out missing factor variables */
1769 /* Remove cases on a listwise basis if requested */
1770 if ( cmd->missing_pw == false)
1771 input = casereader_create_filter_missing (input,
1778 for (reader = input;
1779 (c = casereader_read (reader)) != NULL; case_unref (c))
1781 categoricals_update (cmd->cats, c);
1783 casereader_destroy (reader);
1784 categoricals_done (cmd->cats);
1786 for (i = 0; i < cmd->n_iacts; ++i)
1788 summary_report (cmd, i);
1790 if (cmd->disp_extremes > 0)
1791 extremes_report (cmd, i);
1793 if (cmd->n_percentiles > 0)
1794 percentiles_report (cmd, i);
1798 switch (cmd->boxplot_mode)
1801 show_boxplot_grouped (cmd, i);
1804 show_boxplot_variabled (cmd, i);
1813 show_histogram (cmd, i);
1816 show_npplot (cmd, i);
1818 if (cmd->descriptives)
1819 descriptives_report (cmd, i);
1822 cleanup_exploratory_stats (cmd);
1823 categoricals_destroy (cmd->cats);
1828 cmd_examine (struct lexer *lexer, struct dataset *ds)
1831 bool nototals_seen = false;
1832 bool totals_seen = false;
1834 struct interaction **iacts_mem = NULL;
1835 struct examine examine;
1836 bool percentiles_seen = false;
1838 examine.missing_pw = false;
1839 examine.disp_extremes = 0;
1840 examine.calc_extremes = 0;
1841 examine.descriptives = false;
1842 examine.conf = 0.95;
1843 examine.pc_alg = PC_HAVERAGE;
1844 examine.ptiles = NULL;
1845 examine.n_percentiles = 0;
1846 examine.id_idx = -1;
1847 examine.id_width = 0;
1848 examine.id_var = NULL;
1849 examine.boxplot_mode = BP_GROUPS;
1851 examine.ex_proto = caseproto_create ();
1853 examine.pool = pool_create ();
1855 /* Allocate space for the first interaction.
1856 This is interaction is an empty one (for the totals).
1857 If no totals are requested, we will simply ignore this
1860 examine.n_iacts = 1;
1861 examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
1862 examine.iacts[0] = interaction_create (NULL);
1864 examine.exclude = MV_ANY;
1865 examine.histogram = false;
1866 examine.npplot = false;
1867 examine.boxplot = false;
1869 examine.dict = dataset_dict (ds);
1871 /* Accept an optional, completely pointless "/VARIABLES=" */
1872 lex_match (lexer, T_SLASH);
1873 if (lex_match_id (lexer, "VARIABLES"))
1875 if (! lex_force_match (lexer, T_EQUALS) )
1879 if (!parse_variables_const (lexer, examine.dict,
1880 &examine.dep_vars, &examine.n_dep_vars,
1881 PV_NO_DUPLICATE | PV_NUMERIC))
1884 if (lex_match (lexer, T_BY))
1886 struct interaction *iact = NULL;
1889 iact = parse_interaction (lexer, &examine);
1894 pool_nrealloc (examine.pool, iacts_mem,
1896 sizeof (*iacts_mem));
1898 iacts_mem[examine.n_iacts - 1] = iact;
1905 while (lex_token (lexer) != T_ENDCMD)
1907 lex_match (lexer, T_SLASH);
1909 if (lex_match_id (lexer, "STATISTICS"))
1911 lex_match (lexer, T_EQUALS);
1913 while (lex_token (lexer) != T_ENDCMD
1914 && lex_token (lexer) != T_SLASH)
1916 if (lex_match_id (lexer, "DESCRIPTIVES"))
1918 examine.descriptives = true;
1920 else if (lex_match_id (lexer, "EXTREME"))
1923 if (lex_match (lexer, T_LPAREN))
1925 extr = lex_integer (lexer);
1929 msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0);
1934 if (! lex_force_match (lexer, T_RPAREN))
1937 examine.disp_extremes = extr;
1939 else if (lex_match_id (lexer, "NONE"))
1942 else if (lex_match (lexer, T_ALL))
1944 if (examine.disp_extremes == 0)
1945 examine.disp_extremes = 5;
1949 lex_error (lexer, NULL);
1954 else if (lex_match_id (lexer, "PERCENTILES"))
1956 percentiles_seen = true;
1957 if (lex_match (lexer, T_LPAREN))
1959 while (lex_is_number (lexer))
1961 double p = lex_number (lexer);
1963 if ( p <= 0 || p >= 100.0)
1966 _("Percentiles must lie in the range (0, 100)"));
1970 examine.n_percentiles++;
1972 xrealloc (examine.ptiles,
1973 sizeof (*examine.ptiles) *
1974 examine.n_percentiles);
1976 examine.ptiles[examine.n_percentiles - 1] = p;
1979 lex_match (lexer, T_COMMA);
1981 if (!lex_force_match (lexer, T_RPAREN))
1985 lex_match (lexer, T_EQUALS);
1987 while (lex_token (lexer) != T_ENDCMD
1988 && lex_token (lexer) != T_SLASH)
1990 if (lex_match_id (lexer, "HAVERAGE"))
1992 examine.pc_alg = PC_HAVERAGE;
1994 else if (lex_match_id (lexer, "WAVERAGE"))
1996 examine.pc_alg = PC_WAVERAGE;
1998 else if (lex_match_id (lexer, "ROUND"))
2000 examine.pc_alg = PC_ROUND;
2002 else if (lex_match_id (lexer, "EMPIRICAL"))
2004 examine.pc_alg = PC_EMPIRICAL;
2006 else if (lex_match_id (lexer, "AEMPIRICAL"))
2008 examine.pc_alg = PC_AEMPIRICAL;
2010 else if (lex_match_id (lexer, "NONE"))
2012 examine.pc_alg = PC_NONE;
2016 lex_error (lexer, NULL);
2021 else if (lex_match_id (lexer, "TOTAL"))
2025 else if (lex_match_id (lexer, "NOTOTAL"))
2027 nototals_seen = true;
2029 else if (lex_match_id (lexer, "MISSING"))
2031 lex_match (lexer, T_EQUALS);
2033 while (lex_token (lexer) != T_ENDCMD
2034 && lex_token (lexer) != T_SLASH)
2036 if (lex_match_id (lexer, "LISTWISE"))
2038 examine.missing_pw = false;
2040 else if (lex_match_id (lexer, "PAIRWISE"))
2042 examine.missing_pw = true;
2044 else if (lex_match_id (lexer, "EXCLUDE"))
2046 examine.exclude = MV_ANY;
2048 else if (lex_match_id (lexer, "INCLUDE"))
2050 examine.exclude = MV_SYSTEM;
2054 lex_error (lexer, NULL);
2059 else if (lex_match_id (lexer, "COMPARE"))
2061 lex_match (lexer, T_EQUALS);
2062 if (lex_match_id (lexer, "VARIABLES"))
2064 examine.boxplot_mode = BP_VARIABLES;
2066 else if (lex_match_id (lexer, "GROUPS"))
2068 examine.boxplot_mode = BP_GROUPS;
2072 lex_error (lexer, NULL);
2076 else if (lex_match_id (lexer, "PLOT"))
2078 lex_match (lexer, T_EQUALS);
2080 while (lex_token (lexer) != T_ENDCMD
2081 && lex_token (lexer) != T_SLASH)
2083 if (lex_match_id (lexer, "BOXPLOT"))
2085 examine.boxplot = true;
2087 else if (lex_match_id (lexer, "NPPLOT"))
2089 examine.npplot = true;
2091 else if (lex_match_id (lexer, "HISTOGRAM"))
2093 examine.histogram = true;
2095 else if (lex_match_id (lexer, "NONE"))
2097 examine.histogram = false;
2098 examine.npplot = false;
2099 examine.boxplot = false;
2101 else if (lex_match (lexer, T_ALL))
2103 examine.histogram = true;
2104 examine.npplot = true;
2105 examine.boxplot = true;
2109 lex_error (lexer, NULL);
2112 lex_match (lexer, T_COMMA);
2115 else if (lex_match_id (lexer, "CINTERVAL"))
2117 if ( !lex_force_num (lexer))
2120 examine.conf = lex_number (lexer);
2123 else if (lex_match_id (lexer, "ID"))
2125 lex_match (lexer, T_EQUALS);
2127 examine.id_var = parse_variable_const (lexer, examine.dict);
2131 lex_error (lexer, NULL);
2137 if ( totals_seen && nototals_seen)
2139 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
2143 /* If totals have been requested or if there are no factors
2144 in this analysis, then the totals need to be included. */
2145 if ( !nototals_seen || examine.n_iacts == 1)
2147 examine.iacts = &iacts_mem[0];
2152 examine.iacts = &iacts_mem[1];
2156 if ( examine.id_var )
2158 examine.id_idx = var_get_case_index (examine.id_var);
2159 examine.id_width = var_get_width (examine.id_var);
2162 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */
2163 examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */
2164 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */
2167 if (examine.disp_extremes > 0)
2169 examine.calc_extremes = examine.disp_extremes;
2172 if (examine.descriptives && examine.calc_extremes == 0)
2174 /* Descriptives always displays the max and min */
2175 examine.calc_extremes = 1;
2178 if (percentiles_seen && examine.n_percentiles == 0)
2180 examine.n_percentiles = 7;
2181 examine.ptiles = xcalloc (examine.n_percentiles,
2182 sizeof (*examine.ptiles));
2184 examine.ptiles[0] = 5;
2185 examine.ptiles[1] = 10;
2186 examine.ptiles[2] = 25;
2187 examine.ptiles[3] = 50;
2188 examine.ptiles[4] = 75;
2189 examine.ptiles[5] = 90;
2190 examine.ptiles[6] = 95;
2193 assert (examine.calc_extremes >= examine.disp_extremes);
2195 struct casegrouper *grouper;
2196 struct casereader *group;
2199 grouper = casegrouper_create_splits (proc_open (ds), examine.dict);
2200 while (casegrouper_get_next_group (grouper, &group))
2201 run_examine (&examine, group);
2202 ok = casegrouper_destroy (grouper);
2203 ok = proc_commit (ds) && ok;
2206 caseproto_unref (examine.ex_proto);
2208 for (i = 0; i < examine.n_iacts; ++i)
2209 interaction_destroy (examine.iacts[i]);
2211 free (examine.ptiles);
2212 free (examine.dep_vars);
2213 pool_destroy (examine.pool);
2218 caseproto_unref (examine.ex_proto);
2219 for (i = 0; i < examine.n_iacts; ++i)
2220 interaction_destroy (examine.iacts[i]);
2221 free (examine.dep_vars);
2222 free (examine.ptiles);
2223 pool_destroy (examine.pool);