2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <gsl/gsl_cdf.h>
24 #include "libpspp/assertion.h"
25 #include "libpspp/message.h"
26 #include "libpspp/pool.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/casegrouper.h"
32 #include "data/casereader.h"
33 #include "data/casewriter.h"
34 #include "data/caseproto.h"
35 #include "data/subcase.h"
38 #include "data/format.h"
40 #include "math/interaction.h"
41 #include "math/box-whisker.h"
42 #include "math/categoricals.h"
43 #include "math/chart-geometry.h"
44 #include "math/histogram.h"
45 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "math/percentiles.h"
50 #include "math/tukey-hinges.h"
51 #include "math/trimmed-mean.h"
53 #include "output/charts/boxplot.h"
54 #include "output/charts/np-plot.h"
55 #include "output/charts/spreadlevel-plot.h"
56 #include "output/charts/plot-hist.h"
58 #include "language/command.h"
59 #include "language/lexer/lexer.h"
60 #include "language/lexer/value-parser.h"
61 #include "language/lexer/variable-parser.h"
63 #include "output/tab.h"
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
70 append_value_name (const struct variable *var, const union value *val, struct string *str)
72 var_append_value_name (var, val, str);
73 if ( var_is_value_missing (var, val, MV_ANY))
74 ds_put_cstr (str, _(" (missing)"));
84 /* Indices for the ex_proto member (below) */
97 /* A caseproto used to contain the data subsets under examination,
99 struct caseproto *ex_proto;
102 const struct variable **dep_vars;
105 struct interaction **iacts;
107 enum mv_class dep_excl;
108 enum mv_class fctr_excl;
110 const struct dictionary *dict;
112 struct categoricals *cats;
114 /* how many extremities to display */
123 /* The case index of the ID value (or -1) if not applicable */
129 size_t n_percentiles;
137 enum bp_mode boxplot_mode;
139 const struct variable *id_var;
141 const struct variable *wv;
146 /* The value of this extremity */
149 /* Either the casenumber or the value of the variable specified
150 by the /ID subcommand which corresponds to this extremity */
151 union value identity;
154 struct exploratory_stats
161 /* Most operations need a sorted reader/writer */
162 struct casewriter *sorted_writer;
163 struct casereader *sorted_reader;
165 struct extremity *minima;
166 struct extremity *maxima;
169 Minimum should alway equal mimima[0].val.
170 Likewise, maximum should alway equal maxima[0].val.
171 This redundancy exists as an optimisation effort.
172 Some statistics (eg histogram) require early calculation
178 struct trimmed_mean *trimmed_mean;
179 struct percentile *quartiles[3];
180 struct percentile **percentiles;
182 struct tukey_hinges *hinges;
184 /* The data for the NP Plots */
187 struct histogram *histogram;
189 /* The data for the box plots */
190 struct box_whisker *box_whisker;
195 /* The minimum weight */
200 /* Returns an array of (iact->n_vars) pointers to union value initialised to NULL.
201 The caller must free this array when no longer required. */
202 static const union value **
203 previous_value_alloc (const struct interaction *iact)
207 const union value **prev_val = xcalloc (iact->n_vars, sizeof (*prev_val));
209 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
210 prev_val[ivar_idx] = NULL;
215 /* Set the contents of PREV_VAL to the values of C indexed by the variables of IACT */
217 previous_value_record (const struct interaction *iact, const struct ccase *c, const union value **prev_val)
222 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
224 const struct variable *ivar = iact->vars[ivar_idx];
225 const int width = var_get_width (ivar);
226 const union value *val = case_data (c, ivar);
228 if (prev_val[ivar_idx])
229 if (! value_equal (prev_val[ivar_idx], val, width))
236 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
238 const struct variable *ivar = iact->vars[ivar_idx];
239 const union value *val = case_data (c, ivar);
241 prev_val[ivar_idx] = val;
248 show_boxplot_grouped (const struct examine *cmd, int iact_idx)
252 const struct interaction *iact = cmd->iacts[iact_idx];
253 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
255 for (v = 0; v < cmd->n_dep_vars; ++v)
257 double y_min = DBL_MAX;
258 double y_max = -DBL_MAX;
260 struct boxplot *boxplot;
262 ds_init_empty (&title);
264 if (iact->n_vars > 0)
267 ds_init_empty (&istr);
268 interaction_to_string (iact, &istr);
269 ds_put_format (&title, _("Boxplot of %s vs. %s"),
270 var_to_string (cmd->dep_vars[v]),
275 ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v]));
277 for (grp = 0; grp < n_cats; ++grp)
279 const struct exploratory_stats *es =
280 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
282 if ( y_min > es[v].minimum)
283 y_min = es[v].minimum;
285 if ( y_max < es[v].maximum)
286 y_max = es[v].maximum;
289 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
293 for (grp = 0; grp < n_cats; ++grp)
298 const struct ccase *c =
299 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
301 const struct exploratory_stats *es =
302 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
304 ds_init_empty (&label);
305 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
307 const struct variable *ivar = iact->vars[ivar_idx];
308 const union value *val = case_data (c, ivar);
310 ds_put_cstr (&label, var_to_string (ivar));
311 ds_put_cstr (&label, " = ");
312 append_value_name (ivar, val, &label);
313 ds_put_cstr (&label, "; ");
316 boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
321 boxplot_submit (boxplot);
326 show_boxplot_variabled (const struct examine *cmd, int iact_idx)
329 const struct interaction *iact = cmd->iacts[iact_idx];
330 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
332 for (grp = 0; grp < n_cats; ++grp)
334 struct boxplot *boxplot;
336 double y_min = DBL_MAX;
337 double y_max = -DBL_MAX;
339 const struct ccase *c =
340 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
343 ds_init_empty (&title);
345 for (v = 0; v < cmd->n_dep_vars; ++v)
347 const struct exploratory_stats *es =
348 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
350 if ( y_min > es[v].minimum)
351 y_min = es[v].minimum;
353 if ( y_max < es[v].maximum)
354 y_max = es[v].maximum;
357 if ( iact->n_vars == 0)
358 ds_put_format (&title, _("Boxplot"));
363 ds_init_empty (&label);
364 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
366 const struct variable *ivar = iact->vars[ivar_idx];
367 const union value *val = case_data (c, ivar);
369 ds_put_cstr (&label, var_to_string (ivar));
370 ds_put_cstr (&label, " = ");
371 append_value_name (ivar, val, &label);
372 ds_put_cstr (&label, "; ");
375 ds_put_format (&title, _("Boxplot of %s"),
381 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
385 for (v = 0; v < cmd->n_dep_vars; ++v)
387 const struct exploratory_stats *es =
388 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
390 boxplot_add_box (boxplot, es[v].box_whisker,
391 var_to_string (cmd->dep_vars[v]));
394 boxplot_submit (boxplot);
400 show_npplot (const struct examine *cmd, int iact_idx)
402 const struct interaction *iact = cmd->iacts[iact_idx];
403 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
407 for (v = 0; v < cmd->n_dep_vars; ++v)
410 for (grp = 0; grp < n_cats; ++grp)
412 struct chart_item *npp, *dnpp;
413 struct casereader *reader;
417 const struct ccase *c =
418 categoricals_get_case_by_category_real (cmd->cats,
421 const struct exploratory_stats *es =
422 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
425 ds_init_cstr (&label,
426 var_to_string (cmd->dep_vars[v]));
428 if ( iact->n_vars > 0)
430 ds_put_cstr (&label, " (");
431 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
433 const struct variable *ivar = iact->vars[ivar_idx];
434 const union value *val = case_data (c, ivar);
436 ds_put_cstr (&label, var_to_string (ivar));
437 ds_put_cstr (&label, " = ");
438 append_value_name (ivar, val, &label);
439 ds_put_cstr (&label, "; ");
442 ds_put_cstr (&label, ")");
446 reader = casewriter_make_reader (np->writer);
449 npp = np_plot_create (np, reader, ds_cstr (&label));
450 dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
452 if (npp == NULL || dnpp == NULL)
454 msg (MW, _("Not creating NP plot because data set is empty."));
455 chart_item_unref (npp);
456 chart_item_unref (dnpp);
460 chart_item_submit (npp);
461 chart_item_submit (dnpp);
463 casereader_destroy (reader);
471 show_spreadlevel (const struct examine *cmd, int iact_idx)
473 const struct interaction *iact = cmd->iacts[iact_idx];
474 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
478 /* Spreadlevel when there are no levels is not useful */
479 if (iact->n_vars == 0)
482 for (v = 0; v < cmd->n_dep_vars; ++v)
485 struct chart_item *sl;
488 ds_init_cstr (&label,
489 var_to_string (cmd->dep_vars[v]));
491 if (iact->n_vars > 0)
493 ds_put_cstr (&label, " (");
494 interaction_to_string (iact, &label);
495 ds_put_cstr (&label, ")");
498 sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power);
500 for (grp = 0; grp < n_cats; ++grp)
502 const struct exploratory_stats *es =
503 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
505 double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg);
507 double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) -
508 percentile_calculate (es[v].quartiles[0], cmd->pc_alg);
510 spreadlevel_plot_add (sl, iqr, median);
514 msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
516 chart_item_submit (sl);
524 show_histogram (const struct examine *cmd, int iact_idx)
526 const struct interaction *iact = cmd->iacts[iact_idx];
527 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
531 for (v = 0; v < cmd->n_dep_vars; ++v)
534 for (grp = 0; grp < n_cats; ++grp)
538 const struct ccase *c =
539 categoricals_get_case_by_category_real (cmd->cats,
542 const struct exploratory_stats *es =
543 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
547 if (es[v].histogram == NULL)
550 ds_init_cstr (&label,
551 var_to_string (cmd->dep_vars[v]));
553 if ( iact->n_vars > 0)
555 ds_put_cstr (&label, " (");
556 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
558 const struct variable *ivar = iact->vars[ivar_idx];
559 const union value *val = case_data (c, ivar);
561 ds_put_cstr (&label, var_to_string (ivar));
562 ds_put_cstr (&label, " = ");
563 append_value_name (ivar, val, &label);
564 ds_put_cstr (&label, "; ");
567 ds_put_cstr (&label, ")");
571 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
574 ( histogram_chart_create (es[v].histogram->gsl_hist,
575 ds_cstr (&label), n, mean,
585 percentiles_report (const struct examine *cmd, int iact_idx)
587 const struct interaction *iact = cmd->iacts[iact_idx];
589 const int heading_columns = 1 + iact->n_vars + 1;
590 const int heading_rows = 2;
593 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
595 const int rows_per_cat = 2;
596 const int rows_per_var = n_cats * rows_per_cat;
598 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
599 const int nc = heading_columns + cmd->n_percentiles;
601 t = tab_create (nc, nr);
602 tab_title (t, _("Percentiles"));
604 tab_headers (t, heading_columns, 0, heading_rows, 0);
606 /* Internal Vertical lines */
607 tab_box (t, -1, -1, -1, TAL_1,
608 heading_columns, 0, nc - 1, nr - 1);
611 tab_box (t, TAL_2, TAL_2, -1, -1,
612 0, 0, nc - 1, nr - 1);
614 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
615 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
617 tab_joint_text (t, heading_columns, 0,
619 TAT_TITLE | TAB_CENTER,
623 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
626 for (i = 0; i < cmd->n_percentiles; ++i)
628 tab_text_format (t, heading_columns + i, 1,
629 TAT_TITLE | TAB_CENTER,
630 _("%g"), cmd->ptiles[i]);
633 for (i = 0; i < iact->n_vars; ++i)
638 var_to_string (iact->vars[i])
646 tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1);
648 for (v = 0; v < cmd->n_dep_vars; ++v)
650 const union value **prev_vals = previous_value_alloc (iact);
654 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
657 0, heading_rows + v * rows_per_var,
658 TAT_TITLE | TAB_LEFT,
659 var_to_string (cmd->dep_vars[v])
662 for (i = 0; i < n_cats; ++i)
664 const struct ccase *c =
665 categoricals_get_case_by_category_real (cmd->cats,
668 const struct exploratory_stats *ess =
669 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
671 const struct exploratory_stats *es = ess + v;
673 int diff_idx = previous_value_record (iact, c, prev_vals);
678 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
680 const struct variable *ivar = iact->vars[ivar_idx];
681 const union value *val = case_data (c, ivar);
683 if (( diff_idx != -1 && diff_idx <= ivar_idx)
687 ds_init_empty (&str);
688 append_value_name (ivar, val, &str);
692 heading_rows + v * rows_per_var + i * rows_per_cat,
693 TAT_TITLE | TAB_LEFT,
701 if ( diff_idx != -1 && diff_idx < iact->n_vars)
703 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
704 heading_rows + v * rows_per_var + i * rows_per_cat
708 tab_text (t, heading_columns - 1,
709 heading_rows + v * rows_per_var + i * rows_per_cat,
710 TAT_TITLE | TAB_LEFT,
711 gettext (ptile_alg_desc [cmd->pc_alg]));
713 tukey_hinges_calculate (es->hinges, hinges);
715 for (p = 0; p < cmd->n_percentiles; ++p)
717 tab_double (t, heading_columns + p,
718 heading_rows + v * rows_per_var + i * rows_per_cat,
720 percentile_calculate (es->percentiles[p], cmd->pc_alg),
723 if (cmd->ptiles[p] == 25.0)
725 tab_double (t, heading_columns + p,
726 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
731 else if (cmd->ptiles[p] == 50.0)
733 tab_double (t, heading_columns + p,
734 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
739 else if (cmd->ptiles[p] == 75.0)
741 tab_double (t, heading_columns + p,
742 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
750 tab_text (t, heading_columns - 1,
751 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
752 TAT_TITLE | TAB_LEFT,
753 _("Tukey's Hinges"));
764 descriptives_report (const struct examine *cmd, int iact_idx)
766 const struct interaction *iact = cmd->iacts[iact_idx];
768 const int heading_columns = 1 + iact->n_vars + 2;
769 const int heading_rows = 1;
772 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
774 const int rows_per_cat = 13;
775 const int rows_per_var = n_cats * rows_per_cat;
777 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
778 const int nc = 2 + heading_columns;
780 t = tab_create (nc, nr);
781 tab_title (t, _("Descriptives"));
783 tab_headers (t, heading_columns, 0, heading_rows, 0);
785 /* Internal Vertical lines */
786 tab_box (t, -1, -1, -1, TAL_1,
787 heading_columns, 0, nc - 1, nr - 1);
790 tab_box (t, TAL_2, TAL_2, -1, -1,
791 0, 0, nc - 1, nr - 1);
793 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
794 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
797 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
800 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
803 for (i = 0; i < iact->n_vars; ++i)
808 var_to_string (iact->vars[i])
812 for (v = 0; v < cmd->n_dep_vars; ++v)
814 const union value **prev_val = previous_value_alloc (iact);
818 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
821 0, heading_rows + v * rows_per_var,
822 TAT_TITLE | TAB_LEFT,
823 var_to_string (cmd->dep_vars[v])
826 for (i = 0; i < n_cats; ++i)
828 const struct ccase *c =
829 categoricals_get_case_by_category_real (cmd->cats,
832 const struct exploratory_stats *ess =
833 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
835 const struct exploratory_stats *es = ess + v;
837 const int diff_idx = previous_value_record (iact, c, prev_val);
839 double m0, m1, m2, m3, m4;
842 moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4);
844 tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0);
846 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
848 const struct variable *ivar = iact->vars[ivar_idx];
849 const union value *val = case_data (c, ivar);
851 if (( diff_idx != -1 && diff_idx <= ivar_idx)
855 ds_init_empty (&str);
856 append_value_name (ivar, val, &str);
860 heading_rows + v * rows_per_var + i * rows_per_cat,
861 TAT_TITLE | TAB_LEFT,
869 if ( diff_idx != -1 && diff_idx < iact->n_vars)
871 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
872 heading_rows + v * rows_per_var + i * rows_per_cat
878 heading_rows + v * rows_per_var + i * rows_per_cat,
884 1 + iact->n_vars + 2,
885 heading_rows + v * rows_per_var + i * rows_per_cat,
889 1 + iact->n_vars + 3,
890 heading_rows + v * rows_per_var + i * rows_per_cat,
891 0, calc_semean (m2, m0), 0);
895 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
897 _("%g%% Confidence Interval for Mean"),
902 1 + iact->n_vars + 1,
903 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
909 1 + iact->n_vars + 2,
910 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
911 0, m1 - tval * calc_semean (m2, m0), 0);
915 1 + iact->n_vars + 1,
916 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
922 1 + iact->n_vars + 2,
923 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
924 0, m1 + tval * calc_semean (m2, m0), 0);
929 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
935 1 + iact->n_vars + 2,
936 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
938 trimmed_mean_calculate (es->trimmed_mean),
943 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
949 1 + iact->n_vars + 2,
950 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
952 percentile_calculate (es->quartiles[1], cmd->pc_alg),
958 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
964 1 + iact->n_vars + 2,
965 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
970 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
976 1 + iact->n_vars + 2,
977 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
982 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
988 1 + iact->n_vars + 2,
989 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
996 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
1002 1 + iact->n_vars + 2,
1003 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
1010 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
1016 1 + iact->n_vars + 2,
1017 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
1019 es->maxima[0].val - es->minima[0].val,
1024 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
1026 _("Interquartile Range")
1031 1 + iact->n_vars + 2,
1032 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
1034 percentile_calculate (es->quartiles[2], cmd->pc_alg) -
1035 percentile_calculate (es->quartiles[0], cmd->pc_alg),
1043 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1049 1 + iact->n_vars + 2,
1050 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1054 1 + iact->n_vars + 3,
1055 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1056 0, calc_seskew (m0), 0);
1060 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1066 1 + iact->n_vars + 2,
1067 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1071 1 + iact->n_vars + 3,
1072 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1073 0, calc_sekurt (m0), 0);
1083 extremes_report (const struct examine *cmd, int iact_idx)
1085 const struct interaction *iact = cmd->iacts[iact_idx];
1087 const int heading_columns = 1 + iact->n_vars + 2;
1088 const int heading_rows = 1;
1089 struct tab_table *t;
1091 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1093 const int rows_per_cat = 2 * cmd->disp_extremes;
1094 const int rows_per_var = n_cats * rows_per_cat;
1096 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
1097 const int nc = 2 + heading_columns;
1099 t = tab_create (nc, nr);
1100 tab_title (t, _("Extreme Values"));
1102 tab_headers (t, heading_columns, 0, heading_rows, 0);
1104 /* Internal Vertical lines */
1105 tab_box (t, -1, -1, -1, TAL_1,
1106 heading_columns, 0, nc - 1, nr - 1);
1108 /* External Frame */
1109 tab_box (t, TAL_2, TAL_2, -1, -1,
1110 0, 0, nc - 1, nr - 1);
1112 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1113 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1117 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1118 var_to_string (cmd->id_var));
1120 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1123 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
1126 for (i = 0; i < iact->n_vars; ++i)
1131 var_to_string (iact->vars[i])
1135 for (v = 0; v < cmd->n_dep_vars; ++v)
1137 const union value **prev_val = previous_value_alloc (iact);
1141 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
1144 0, heading_rows + v * rows_per_var,
1146 var_to_string (cmd->dep_vars[v])
1149 for (i = 0; i < n_cats; ++i)
1152 const struct ccase *c =
1153 categoricals_get_case_by_category_real (cmd->cats, iact_idx, i);
1155 const struct exploratory_stats *ess =
1156 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1158 const struct exploratory_stats *es = ess + v;
1160 int diff_idx = previous_value_record (iact, c, prev_val);
1162 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1164 const struct variable *ivar = iact->vars[ivar_idx];
1165 const union value *val = case_data (c, ivar);
1167 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1171 ds_init_empty (&str);
1172 append_value_name (ivar, val, &str);
1176 heading_rows + v * rows_per_var + i * rows_per_cat,
1177 TAT_TITLE | TAB_LEFT,
1185 if ( diff_idx != -1 && diff_idx < iact->n_vars)
1187 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1188 heading_rows + v * rows_per_var + i * rows_per_cat
1193 heading_columns - 2,
1194 heading_rows + v * rows_per_var + i * rows_per_cat,
1199 tab_hline (t, TAL_1, heading_columns - 2, nc - 1,
1200 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes
1204 heading_columns - 2,
1205 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes,
1209 for (e = 0 ; e < cmd->disp_extremes; ++e)
1212 heading_columns - 1,
1213 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1218 /* The casenumber */
1222 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1224 &es->maxima[e].identity,
1230 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1232 es->maxima[e].identity.f,
1236 heading_columns + 1,
1237 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1240 var_get_print_format (cmd->dep_vars[v]));
1244 heading_columns - 1,
1245 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1250 /* The casenumber */
1254 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1256 &es->minima[e].identity,
1262 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1264 es->minima[e].identity.f,
1268 heading_columns + 1,
1269 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1272 var_get_print_format (cmd->dep_vars[v]));
1283 summary_report (const struct examine *cmd, int iact_idx)
1285 const struct interaction *iact = cmd->iacts[iact_idx];
1287 const int heading_columns = 1 + iact->n_vars;
1288 const int heading_rows = 3;
1289 struct tab_table *t;
1291 const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0;
1293 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1295 const int nr = heading_rows + n_cats * cmd->n_dep_vars;
1296 const int nc = 6 + heading_columns;
1298 t = tab_create (nc, nr);
1299 tab_title (t, _("Case Processing Summary"));
1301 tab_headers (t, heading_columns, 0, heading_rows, 0);
1303 /* Internal Vertical lines */
1304 tab_box (t, -1, -1, -1, TAL_1,
1305 heading_columns, 0, nc - 1, nr - 1);
1307 /* External Frame */
1308 tab_box (t, TAL_2, TAL_2, -1, -1,
1309 0, 0, nc - 1, nr - 1);
1311 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1312 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1314 tab_joint_text (t, heading_columns, 0,
1315 nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
1318 heading_columns + 1, 1,
1319 TAB_CENTER | TAT_TITLE, _("Valid"));
1322 heading_columns + 2, 1,
1323 heading_columns + 3, 1,
1324 TAB_CENTER | TAT_TITLE, _("Missing"));
1327 heading_columns + 4, 1,
1328 heading_columns + 5, 1,
1329 TAB_CENTER | TAT_TITLE, _("Total"));
1331 for (i = 0; i < 3; ++i)
1333 tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
1335 tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1339 for (i = 0; i < iact->n_vars; ++i)
1344 var_to_string (iact->vars[i])
1349 for (v = 0; v < cmd->n_dep_vars; ++v)
1352 const union value **prev_values = previous_value_alloc (iact);
1355 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats);
1358 0, heading_rows + n_cats * v,
1360 var_to_string (cmd->dep_vars[v])
1364 for (i = 0; i < n_cats; ++i)
1367 const struct exploratory_stats *es;
1369 const struct ccase *c =
1370 categoricals_get_case_by_category_real (cmd->cats,
1374 int diff_idx = previous_value_record (iact, c, prev_values);
1376 if ( diff_idx != -1 && diff_idx < iact->n_vars - 1)
1377 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1378 heading_rows + n_cats * v + i );
1380 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1382 const struct variable *ivar = iact->vars[ivar_idx];
1383 const union value *val = case_data (c, ivar);
1385 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1389 ds_init_empty (&str);
1390 append_value_name (ivar, val, &str);
1393 1 + ivar_idx, heading_rows + n_cats * v + i,
1394 TAT_TITLE | TAB_LEFT,
1404 es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1407 total = es[v].missing + es[v].non_missing;
1409 heading_columns + 0,
1410 heading_rows + n_cats * v + i,
1417 heading_columns + 1,
1418 heading_rows + n_cats * v + i,
1421 100.0 * es[v].non_missing / total
1426 heading_columns + 2,
1427 heading_rows + n_cats * v + i,
1433 heading_columns + 3,
1434 heading_rows + n_cats * v + i,
1437 100.0 * es[v].missing / total
1440 heading_columns + 4,
1441 heading_rows + n_cats * v + i,
1446 /* This can only be 100% can't it? */
1448 heading_columns + 5,
1449 heading_rows + n_cats * v + i,
1452 100.0 * (es[v].missing + es[v].non_missing)/ total
1458 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
1459 tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
1464 /* Attempt to parse an interaction from LEXER */
1465 static struct interaction *
1466 parse_interaction (struct lexer *lexer, struct examine *ex)
1468 const struct variable *v = NULL;
1469 struct interaction *iact = NULL;
1471 if ( lex_match_variable (lexer, ex->dict, &v))
1473 iact = interaction_create (v);
1475 while (lex_match (lexer, T_BY))
1477 if (!lex_match_variable (lexer, ex->dict, &v))
1479 interaction_destroy (iact);
1482 interaction_add_variable (iact, v);
1484 lex_match (lexer, T_COMMA);
1492 create_n (const void *aux1, void *aux2 UNUSED)
1496 const struct examine *examine = aux1;
1497 struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es));
1498 struct subcase ordering;
1499 subcase_init (&ordering, 0, 0, SC_ASCEND);
1501 for (v = 0; v < examine->n_dep_vars; v++)
1503 es[v].sorted_writer = sort_create_writer (&ordering, examine->ex_proto);
1504 es[v].sorted_reader = NULL;
1506 es[v].mom = moments_create (MOMENT_KURTOSIS);
1507 es[v].cmin = DBL_MAX;
1509 es[v].maximum = -DBL_MAX;
1510 es[v].minimum = DBL_MAX;
1513 subcase_destroy (&ordering);
1518 update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
1519 const struct ccase *c, double weight)
1522 const struct examine *examine = aux1;
1523 struct exploratory_stats *es = user_data;
1525 for (v = 0; v < examine->n_dep_vars; v++)
1527 struct ccase *outcase ;
1528 const struct variable *var = examine->dep_vars[v];
1529 const double x = case_data (c, var)->f;
1531 if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
1533 es[v].missing += weight;
1537 outcase = case_create (examine->ex_proto);
1539 if (x > es[v].maximum)
1542 if (x < es[v].minimum)
1545 es[v].non_missing += weight;
1547 moments_pass_one (es[v].mom, x, weight);
1549 /* Save the value and the ID to the writer */
1550 assert (examine->id_idx != -1);
1551 case_data_rw_idx (outcase, EX_VAL)->f = x;
1552 value_copy (case_data_rw_idx (outcase, EX_ID),
1553 case_data_idx (c, examine->id_idx), examine->id_width);
1555 case_data_rw_idx (outcase, EX_WT)->f = weight;
1559 if (es[v].cmin > weight)
1560 es[v].cmin = weight;
1562 casewriter_write (es[v].sorted_writer, outcase);
1567 calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
1570 const struct examine *examine = aux1;
1571 struct exploratory_stats *es = user_data;
1573 for (v = 0; v < examine->n_dep_vars; v++)
1576 casenumber imin = 0;
1577 double imax = es[v].cc;
1578 struct casereader *reader;
1581 if (examine->histogram)
1584 double bin_width = fabs (es[v].minimum - es[v].maximum)
1585 / (1 + log2 (es[v].cc))
1589 histogram_create (bin_width, es[v].minimum, es[v].maximum);
1592 es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
1593 es[v].sorted_writer = NULL;
1595 es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
1596 es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
1597 for (i = 0; i < examine->calc_extremes; ++i)
1599 value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ;
1600 value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ;
1603 for (reader = casereader_clone (es[v].sorted_reader);
1604 (c = casereader_read (reader)) != NULL; case_unref (c))
1606 const double val = case_data_idx (c, EX_VAL)->f;
1607 const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */
1609 moments_pass_two (es[v].mom, val, wt);
1611 if (es[v].histogram)
1612 histogram_add (es[v].histogram, val, wt);
1614 if (imin < examine->calc_extremes)
1617 for (x = imin; x < examine->calc_extremes; ++x)
1619 struct extremity *min = &es[v].minima[x];
1621 value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
1627 if (imax < examine->calc_extremes)
1631 for (x = imax; x < imax + wt; ++x)
1633 struct extremity *max;
1635 if (x >= examine->calc_extremes)
1638 max = &es[v].maxima[x];
1640 value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width);
1644 casereader_destroy (reader);
1646 if (examine->calc_extremes > 0)
1648 assert (es[v].minima[0].val == es[v].minimum);
1649 assert (es[v].maxima[0].val == es[v].maximum);
1653 const int n_os = 5 + examine->n_percentiles;
1654 struct order_stats **os ;
1655 es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
1657 es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
1659 os = xcalloc (n_os, sizeof *os);
1660 os[0] = &es[v].trimmed_mean->parent;
1662 es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
1663 es[v].quartiles[1] = percentile_create (0.5, es[v].cc);
1664 es[v].quartiles[2] = percentile_create (0.75, es[v].cc);
1666 os[1] = &es[v].quartiles[0]->parent;
1667 os[2] = &es[v].quartiles[1]->parent;
1668 os[3] = &es[v].quartiles[2]->parent;
1670 es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin);
1671 os[4] = &es[v].hinges->parent;
1673 for (i = 0; i < examine->n_percentiles; ++i)
1675 es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc);
1676 os[5 + i] = &es[v].percentiles[i]->parent;
1679 order_stats_accumulate_idx (os, n_os,
1680 casereader_clone (es[v].sorted_reader),
1686 if (examine->boxplot)
1688 struct order_stats *os;
1690 es[v].box_whisker = box_whisker_create (es[v].hinges,
1691 EX_ID, examine->id_var);
1693 os = &es[v].box_whisker->parent;
1694 order_stats_accumulate_idx (&os, 1,
1695 casereader_clone (es[v].sorted_reader),
1699 if (examine->npplot)
1701 double n, mean, var;
1702 struct order_stats *os;
1704 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
1706 es[v].np = np_create (n, mean, var);
1708 os = &es[v].np->parent;
1710 order_stats_accumulate_idx (&os, 1,
1711 casereader_clone (es[v].sorted_reader),
1719 cleanup_exploratory_stats (struct examine *cmd)
1722 for (i = 0; i < cmd->n_iacts; ++i)
1725 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1727 for (v = 0; v < cmd->n_dep_vars; ++v)
1730 for (grp = 0; grp < n_cats; ++grp)
1733 const struct exploratory_stats *es =
1734 categoricals_get_user_data_by_category_real (cmd->cats, i, grp);
1736 struct order_stats *os = &es[v].hinges->parent;
1737 struct statistic *stat = &os->parent;
1738 stat->destroy (stat);
1740 for (q = 0; q < 3 ; q++)
1742 os = &es[v].quartiles[q]->parent;
1744 stat->destroy (stat);
1747 for (q = 0; q < cmd->n_percentiles ; q++)
1749 os = &es[v].percentiles[q]->parent;
1751 stat->destroy (stat);
1754 os = &es[v].trimmed_mean->parent;
1756 stat->destroy (stat);
1758 os = &es[v].np->parent;
1762 stat->destroy (stat);
1765 statistic_destroy (&es[v].histogram->parent);
1766 moments_destroy (es[v].mom);
1768 casereader_destroy (es[v].sorted_reader);
1776 run_examine (struct examine *cmd, struct casereader *input)
1780 struct casereader *reader;
1782 struct payload payload;
1783 payload.create = create_n;
1784 payload.update = update_n;
1785 payload.calculate = calculate_n;
1786 payload.destroy = NULL;
1788 cmd->wv = dict_get_weight (cmd->dict);
1791 = categoricals_create (cmd->iacts, cmd->n_iacts,
1792 cmd->wv, cmd->dep_excl, cmd->fctr_excl);
1794 categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
1796 if (cmd->id_idx == -1)
1798 struct ccase *c = casereader_peek (input, 0);
1800 assert (cmd->id_var == NULL);
1802 cmd->id_idx = case_get_value_cnt (c);
1803 input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
1808 /* Remove cases on a listwise basis if requested */
1809 if ( cmd->missing_pw == false)
1810 input = casereader_create_filter_missing (input,
1817 for (reader = input;
1818 (c = casereader_read (reader)) != NULL; case_unref (c))
1820 categoricals_update (cmd->cats, c);
1822 casereader_destroy (reader);
1823 categoricals_done (cmd->cats);
1825 for (i = 0; i < cmd->n_iacts; ++i)
1827 summary_report (cmd, i);
1829 if (cmd->disp_extremes > 0)
1830 extremes_report (cmd, i);
1832 if (cmd->n_percentiles > 0)
1833 percentiles_report (cmd, i);
1837 switch (cmd->boxplot_mode)
1840 show_boxplot_grouped (cmd, i);
1843 show_boxplot_variabled (cmd, i);
1852 show_histogram (cmd, i);
1855 show_npplot (cmd, i);
1857 if (cmd->spreadlevel)
1858 show_spreadlevel (cmd, i);
1860 if (cmd->descriptives)
1861 descriptives_report (cmd, i);
1864 cleanup_exploratory_stats (cmd);
1865 categoricals_destroy (cmd->cats);
1870 cmd_examine (struct lexer *lexer, struct dataset *ds)
1873 bool nototals_seen = false;
1874 bool totals_seen = false;
1876 struct interaction **iacts_mem = NULL;
1877 struct examine examine;
1878 bool percentiles_seen = false;
1880 examine.missing_pw = false;
1881 examine.disp_extremes = 0;
1882 examine.calc_extremes = 0;
1883 examine.descriptives = false;
1884 examine.conf = 0.95;
1885 examine.pc_alg = PC_HAVERAGE;
1886 examine.ptiles = NULL;
1887 examine.n_percentiles = 0;
1888 examine.id_idx = -1;
1889 examine.id_width = 0;
1890 examine.id_var = NULL;
1891 examine.boxplot_mode = BP_GROUPS;
1893 examine.ex_proto = caseproto_create ();
1895 examine.pool = pool_create ();
1897 /* Allocate space for the first interaction.
1898 This is interaction is an empty one (for the totals).
1899 If no totals are requested, we will simply ignore this
1902 examine.n_iacts = 1;
1903 examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
1904 examine.iacts[0] = interaction_create (NULL);
1906 examine.dep_excl = MV_ANY;
1907 examine.fctr_excl = MV_ANY;
1908 examine.histogram = false;
1909 examine.npplot = false;
1910 examine.boxplot = false;
1911 examine.spreadlevel = false;
1912 examine.sl_power = 0;
1914 examine.dict = dataset_dict (ds);
1916 /* Accept an optional, completely pointless "/VARIABLES=" */
1917 lex_match (lexer, T_SLASH);
1918 if (lex_match_id (lexer, "VARIABLES"))
1920 if (! lex_force_match (lexer, T_EQUALS) )
1924 if (!parse_variables_const (lexer, examine.dict,
1925 &examine.dep_vars, &examine.n_dep_vars,
1926 PV_NO_DUPLICATE | PV_NUMERIC))
1929 if (lex_match (lexer, T_BY))
1931 struct interaction *iact = NULL;
1934 iact = parse_interaction (lexer, &examine);
1939 pool_nrealloc (examine.pool, iacts_mem,
1941 sizeof (*iacts_mem));
1943 iacts_mem[examine.n_iacts - 1] = iact;
1950 while (lex_token (lexer) != T_ENDCMD)
1952 lex_match (lexer, T_SLASH);
1954 if (lex_match_id (lexer, "STATISTICS"))
1956 lex_match (lexer, T_EQUALS);
1958 while (lex_token (lexer) != T_ENDCMD
1959 && lex_token (lexer) != T_SLASH)
1961 if (lex_match_id (lexer, "DESCRIPTIVES"))
1963 examine.descriptives = true;
1965 else if (lex_match_id (lexer, "EXTREME"))
1968 if (lex_match (lexer, T_LPAREN))
1970 extr = lex_integer (lexer);
1974 msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0);
1979 if (! lex_force_match (lexer, T_RPAREN))
1982 examine.disp_extremes = extr;
1984 else if (lex_match_id (lexer, "NONE"))
1987 else if (lex_match (lexer, T_ALL))
1989 if (examine.disp_extremes == 0)
1990 examine.disp_extremes = 5;
1994 lex_error (lexer, NULL);
1999 else if (lex_match_id (lexer, "PERCENTILES"))
2001 percentiles_seen = true;
2002 if (lex_match (lexer, T_LPAREN))
2004 while (lex_is_number (lexer))
2006 double p = lex_number (lexer);
2008 if ( p <= 0 || p >= 100.0)
2011 _("Percentiles must lie in the range (0, 100)"));
2015 examine.n_percentiles++;
2017 xrealloc (examine.ptiles,
2018 sizeof (*examine.ptiles) *
2019 examine.n_percentiles);
2021 examine.ptiles[examine.n_percentiles - 1] = p;
2024 lex_match (lexer, T_COMMA);
2026 if (!lex_force_match (lexer, T_RPAREN))
2030 lex_match (lexer, T_EQUALS);
2032 while (lex_token (lexer) != T_ENDCMD
2033 && lex_token (lexer) != T_SLASH)
2035 if (lex_match_id (lexer, "HAVERAGE"))
2037 examine.pc_alg = PC_HAVERAGE;
2039 else if (lex_match_id (lexer, "WAVERAGE"))
2041 examine.pc_alg = PC_WAVERAGE;
2043 else if (lex_match_id (lexer, "ROUND"))
2045 examine.pc_alg = PC_ROUND;
2047 else if (lex_match_id (lexer, "EMPIRICAL"))
2049 examine.pc_alg = PC_EMPIRICAL;
2051 else if (lex_match_id (lexer, "AEMPIRICAL"))
2053 examine.pc_alg = PC_AEMPIRICAL;
2055 else if (lex_match_id (lexer, "NONE"))
2057 examine.pc_alg = PC_NONE;
2061 lex_error (lexer, NULL);
2066 else if (lex_match_id (lexer, "TOTAL"))
2070 else if (lex_match_id (lexer, "NOTOTAL"))
2072 nototals_seen = true;
2074 else if (lex_match_id (lexer, "MISSING"))
2076 lex_match (lexer, T_EQUALS);
2078 while (lex_token (lexer) != T_ENDCMD
2079 && lex_token (lexer) != T_SLASH)
2081 if (lex_match_id (lexer, "LISTWISE"))
2083 examine.missing_pw = false;
2085 else if (lex_match_id (lexer, "PAIRWISE"))
2087 examine.missing_pw = true;
2089 else if (lex_match_id (lexer, "EXCLUDE"))
2091 examine.dep_excl = MV_ANY;
2093 else if (lex_match_id (lexer, "INCLUDE"))
2095 examine.dep_excl = MV_SYSTEM;
2097 else if (lex_match_id (lexer, "REPORT"))
2099 examine.fctr_excl = MV_NEVER;
2101 else if (lex_match_id (lexer, "NOREPORT"))
2103 examine.fctr_excl = MV_ANY;
2107 lex_error (lexer, NULL);
2112 else if (lex_match_id (lexer, "COMPARE"))
2114 lex_match (lexer, T_EQUALS);
2115 if (lex_match_id (lexer, "VARIABLES"))
2117 examine.boxplot_mode = BP_VARIABLES;
2119 else if (lex_match_id (lexer, "GROUPS"))
2121 examine.boxplot_mode = BP_GROUPS;
2125 lex_error (lexer, NULL);
2129 else if (lex_match_id (lexer, "PLOT"))
2131 lex_match (lexer, T_EQUALS);
2133 while (lex_token (lexer) != T_ENDCMD
2134 && lex_token (lexer) != T_SLASH)
2136 if (lex_match_id (lexer, "BOXPLOT"))
2138 examine.boxplot = true;
2140 else if (lex_match_id (lexer, "NPPLOT"))
2142 examine.npplot = true;
2144 else if (lex_match_id (lexer, "HISTOGRAM"))
2146 examine.histogram = true;
2148 else if (lex_match_id (lexer, "SPREADLEVEL"))
2150 examine.spreadlevel = true;
2151 examine.sl_power = 0;
2152 if (lex_match (lexer, T_LPAREN))
2154 examine.sl_power = lex_integer (lexer);
2157 if (! lex_force_match (lexer, T_RPAREN))
2161 else if (lex_match_id (lexer, "NONE"))
2163 examine.histogram = false;
2164 examine.npplot = false;
2165 examine.boxplot = false;
2167 else if (lex_match (lexer, T_ALL))
2169 examine.histogram = true;
2170 examine.npplot = true;
2171 examine.boxplot = true;
2175 lex_error (lexer, NULL);
2178 lex_match (lexer, T_COMMA);
2181 else if (lex_match_id (lexer, "CINTERVAL"))
2183 if ( !lex_force_num (lexer))
2186 examine.conf = lex_number (lexer);
2189 else if (lex_match_id (lexer, "ID"))
2191 lex_match (lexer, T_EQUALS);
2193 examine.id_var = parse_variable_const (lexer, examine.dict);
2197 lex_error (lexer, NULL);
2203 if ( totals_seen && nototals_seen)
2205 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
2209 /* If totals have been requested or if there are no factors
2210 in this analysis, then the totals need to be included. */
2211 if ( !nototals_seen || examine.n_iacts == 1)
2213 examine.iacts = &iacts_mem[0];
2218 examine.iacts = &iacts_mem[1];
2219 interaction_destroy (iacts_mem[0]);
2223 if ( examine.id_var )
2225 examine.id_idx = var_get_case_index (examine.id_var);
2226 examine.id_width = var_get_width (examine.id_var);
2229 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */
2230 examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */
2231 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */
2234 if (examine.disp_extremes > 0)
2236 examine.calc_extremes = examine.disp_extremes;
2239 if (examine.descriptives && examine.calc_extremes == 0)
2241 /* Descriptives always displays the max and min */
2242 examine.calc_extremes = 1;
2245 if (percentiles_seen && examine.n_percentiles == 0)
2247 examine.n_percentiles = 7;
2248 examine.ptiles = xcalloc (examine.n_percentiles,
2249 sizeof (*examine.ptiles));
2251 examine.ptiles[0] = 5;
2252 examine.ptiles[1] = 10;
2253 examine.ptiles[2] = 25;
2254 examine.ptiles[3] = 50;
2255 examine.ptiles[4] = 75;
2256 examine.ptiles[5] = 90;
2257 examine.ptiles[6] = 95;
2260 assert (examine.calc_extremes >= examine.disp_extremes);
2262 struct casegrouper *grouper;
2263 struct casereader *group;
2266 grouper = casegrouper_create_splits (proc_open (ds), examine.dict);
2267 while (casegrouper_get_next_group (grouper, &group))
2268 run_examine (&examine, group);
2269 ok = casegrouper_destroy (grouper);
2270 ok = proc_commit (ds) && ok;
2273 caseproto_unref (examine.ex_proto);
2275 free (examine.ptiles);
2276 free (examine.dep_vars);
2277 pool_destroy (examine.pool);
2282 caseproto_unref (examine.ex_proto);
2283 examine.iacts = iacts_mem;
2284 for (i = 0; i < examine.n_iacts; ++i)
2285 interaction_destroy (examine.iacts[i]);
2286 free (examine.dep_vars);
2287 free (examine.ptiles);
2288 pool_destroy (examine.pool);