2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <gsl/gsl_cdf.h>
24 #include "libpspp/assertion.h"
25 #include "libpspp/message.h"
26 #include "libpspp/pool.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/casegrouper.h"
32 #include "data/casereader.h"
33 #include "data/casewriter.h"
34 #include "data/caseproto.h"
35 #include "data/subcase.h"
38 #include "data/format.h"
40 #include "math/interaction.h"
41 #include "math/box-whisker.h"
42 #include "math/categoricals.h"
43 #include "math/chart-geometry.h"
44 #include "math/histogram.h"
45 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "math/percentiles.h"
50 #include "math/tukey-hinges.h"
51 #include "math/trimmed-mean.h"
53 #include "output/charts/boxplot.h"
54 #include "output/charts/np-plot.h"
55 #include "output/charts/spreadlevel-plot.h"
56 #include "output/charts/plot-hist.h"
58 #include "language/command.h"
59 #include "language/lexer/lexer.h"
60 #include "language/lexer/value-parser.h"
61 #include "language/lexer/variable-parser.h"
63 #include "output/tab.h"
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
76 /* Indices for the ex_proto member (below) */
89 /* A caseproto used to contain the data subsets under examination,
91 struct caseproto *ex_proto;
94 const struct variable **dep_vars;
97 struct interaction **iacts;
99 enum mv_class exclude;
101 const struct dictionary *dict;
103 struct categoricals *cats;
105 /* how many extremities to display */
114 /* The case index of the ID value (or -1) if not applicable */
120 size_t n_percentiles;
128 enum bp_mode boxplot_mode;
130 const struct variable *id_var;
132 const struct variable *wv;
137 /* The value of this extremity */
140 /* Either the casenumber or the value of the variable specified
141 by the /ID subcommand which corresponds to this extremity */
142 union value identity;
145 struct exploratory_stats
152 /* Most operations need a sorted reader/writer */
153 struct casewriter *sorted_writer;
154 struct casereader *sorted_reader;
156 struct extremity *minima;
157 struct extremity *maxima;
160 Minimum should alway equal mimima[0].val.
161 Likewise, maximum should alway equal maxima[0].val.
162 This redundancy exists as an optimisation effort.
163 Some statistics (eg histogram) require early calculation
169 struct trimmed_mean *trimmed_mean;
170 struct percentile *quartiles[3];
171 struct percentile **percentiles;
173 struct tukey_hinges *hinges;
175 /* The data for the NP Plots */
178 struct histogram *histogram;
180 /* The data for the box plots */
181 struct box_whisker *box_whisker;
186 /* The minimum weight */
191 /* Returns an array of (iact->n_vars) pointers to union value initialised to NULL.
192 The caller must free this array when no longer required. */
193 static const union value **
194 previous_value_alloc (const struct interaction *iact)
198 const union value **prev_val = xcalloc (iact->n_vars, sizeof (*prev_val));
200 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
201 prev_val[ivar_idx] = NULL;
206 /* Set the contents of PREV_VAL to the values of C indexed by the variables of IACT */
208 previous_value_record (const struct interaction *iact, const struct ccase *c, const union value **prev_val)
213 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
215 const struct variable *ivar = iact->vars[ivar_idx];
216 const int width = var_get_width (ivar);
217 const union value *val = case_data (c, ivar);
219 if (prev_val[ivar_idx])
220 if (! value_equal (prev_val[ivar_idx], val, width))
227 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
229 const struct variable *ivar = iact->vars[ivar_idx];
230 const union value *val = case_data (c, ivar);
232 prev_val[ivar_idx] = val;
239 show_boxplot_grouped (const struct examine *cmd, int iact_idx)
243 const struct interaction *iact = cmd->iacts[iact_idx];
244 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
246 for (v = 0; v < cmd->n_dep_vars; ++v)
248 double y_min = DBL_MAX;
249 double y_max = -DBL_MAX;
251 struct boxplot *boxplot;
253 ds_init_empty (&title);
255 if (iact->n_vars > 0)
258 ds_init_empty (&istr);
259 interaction_to_string (iact, &istr);
260 ds_put_format (&title, _("Boxplot of %s vs. %s"),
261 var_to_string (cmd->dep_vars[v]),
266 ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v]));
268 for (grp = 0; grp < n_cats; ++grp)
270 const struct exploratory_stats *es =
271 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
273 if ( y_min > es[v].minimum)
274 y_min = es[v].minimum;
276 if ( y_max < es[v].maximum)
277 y_max = es[v].maximum;
280 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
284 for (grp = 0; grp < n_cats; ++grp)
289 const struct ccase *c =
290 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
292 const struct exploratory_stats *es =
293 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
295 ds_init_empty (&label);
296 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
298 const struct variable *ivar = iact->vars[ivar_idx];
299 const union value *val = case_data (c, ivar);
301 ds_put_cstr (&label, var_to_string (ivar));
302 ds_put_cstr (&label, " = ");
303 var_append_value_name (ivar, val, &label);
304 ds_put_cstr (&label, "; ");
307 boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
312 boxplot_submit (boxplot);
317 show_boxplot_variabled (const struct examine *cmd, int iact_idx)
320 const struct interaction *iact = cmd->iacts[iact_idx];
321 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
323 for (grp = 0; grp < n_cats; ++grp)
325 struct boxplot *boxplot;
327 double y_min = DBL_MAX;
328 double y_max = -DBL_MAX;
330 const struct ccase *c =
331 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
334 ds_init_empty (&title);
336 for (v = 0; v < cmd->n_dep_vars; ++v)
338 const struct exploratory_stats *es =
339 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
341 if ( y_min > es[v].minimum)
342 y_min = es[v].minimum;
344 if ( y_max < es[v].maximum)
345 y_max = es[v].maximum;
348 if ( iact->n_vars == 0)
349 ds_put_format (&title, _("Boxplot"));
354 ds_init_empty (&label);
355 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
357 const struct variable *ivar = iact->vars[ivar_idx];
358 const union value *val = case_data (c, ivar);
360 ds_put_cstr (&label, var_to_string (ivar));
361 ds_put_cstr (&label, " = ");
362 var_append_value_name (ivar, val, &label);
363 ds_put_cstr (&label, "; ");
366 ds_put_format (&title, _("Boxplot of %s"),
372 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
376 for (v = 0; v < cmd->n_dep_vars; ++v)
378 const struct exploratory_stats *es =
379 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
381 boxplot_add_box (boxplot, es[v].box_whisker,
382 var_to_string (cmd->dep_vars[v]));
385 boxplot_submit (boxplot);
391 show_npplot (const struct examine *cmd, int iact_idx)
393 const struct interaction *iact = cmd->iacts[iact_idx];
394 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
398 for (v = 0; v < cmd->n_dep_vars; ++v)
401 for (grp = 0; grp < n_cats; ++grp)
403 struct chart_item *npp, *dnpp;
404 struct casereader *reader;
408 const struct ccase *c =
409 categoricals_get_case_by_category_real (cmd->cats,
412 const struct exploratory_stats *es =
413 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
416 ds_init_cstr (&label,
417 var_to_string (cmd->dep_vars[v]));
419 if ( iact->n_vars > 0)
421 ds_put_cstr (&label, " (");
422 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
424 const struct variable *ivar = iact->vars[ivar_idx];
425 const union value *val = case_data (c, ivar);
427 ds_put_cstr (&label, var_to_string (ivar));
428 ds_put_cstr (&label, " = ");
429 var_append_value_name (ivar, val, &label);
430 ds_put_cstr (&label, "; ");
433 ds_put_cstr (&label, ")");
437 reader = casewriter_make_reader (np->writer);
440 npp = np_plot_create (np, reader, ds_cstr (&label));
441 dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
443 if (npp == NULL || dnpp == NULL)
445 msg (MW, _("Not creating NP plot because data set is empty."));
446 chart_item_unref (npp);
447 chart_item_unref (dnpp);
451 chart_item_submit (npp);
452 chart_item_submit (dnpp);
454 casereader_destroy (reader);
462 show_spreadlevel (const struct examine *cmd, int iact_idx)
464 const struct interaction *iact = cmd->iacts[iact_idx];
465 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
469 /* Spreadlevel when there are no levels is not useful */
470 if (iact->n_vars == 0)
473 for (v = 0; v < cmd->n_dep_vars; ++v)
476 struct chart_item *sl;
479 ds_init_cstr (&label,
480 var_to_string (cmd->dep_vars[v]));
482 if (iact->n_vars > 0)
484 ds_put_cstr (&label, " (");
485 interaction_to_string (iact, &label);
486 ds_put_cstr (&label, ")");
489 sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power);
491 for (grp = 0; grp < n_cats; ++grp)
493 const struct exploratory_stats *es =
494 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
496 double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg);
498 double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) -
499 percentile_calculate (es[v].quartiles[0], cmd->pc_alg);
501 spreadlevel_plot_add (sl, iqr, median);
505 msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
507 chart_item_submit (sl);
515 show_histogram (const struct examine *cmd, int iact_idx)
517 const struct interaction *iact = cmd->iacts[iact_idx];
518 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
522 for (v = 0; v < cmd->n_dep_vars; ++v)
525 for (grp = 0; grp < n_cats; ++grp)
529 const struct ccase *c =
530 categoricals_get_case_by_category_real (cmd->cats,
533 const struct exploratory_stats *es =
534 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
538 if (es[v].histogram == NULL)
541 ds_init_cstr (&label,
542 var_to_string (cmd->dep_vars[v]));
544 if ( iact->n_vars > 0)
546 ds_put_cstr (&label, " (");
547 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
549 const struct variable *ivar = iact->vars[ivar_idx];
550 const union value *val = case_data (c, ivar);
552 ds_put_cstr (&label, var_to_string (ivar));
553 ds_put_cstr (&label, " = ");
554 var_append_value_name (ivar, val, &label);
555 ds_put_cstr (&label, "; ");
558 ds_put_cstr (&label, ")");
562 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
565 ( histogram_chart_create (es[v].histogram->gsl_hist,
566 ds_cstr (&label), n, mean,
576 percentiles_report (const struct examine *cmd, int iact_idx)
578 const struct interaction *iact = cmd->iacts[iact_idx];
580 const int heading_columns = 1 + iact->n_vars + 1;
581 const int heading_rows = 2;
584 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
586 const int rows_per_cat = 2;
587 const int rows_per_var = n_cats * rows_per_cat;
589 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
590 const int nc = heading_columns + cmd->n_percentiles;
592 t = tab_create (nc, nr);
593 tab_title (t, _("Percentiles"));
595 tab_headers (t, heading_columns, 0, heading_rows, 0);
597 /* Internal Vertical lines */
598 tab_box (t, -1, -1, -1, TAL_1,
599 heading_columns, 0, nc - 1, nr - 1);
602 tab_box (t, TAL_2, TAL_2, -1, -1,
603 0, 0, nc - 1, nr - 1);
605 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
606 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
608 tab_joint_text (t, heading_columns, 0,
610 TAT_TITLE | TAB_CENTER,
614 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
617 for (i = 0; i < cmd->n_percentiles; ++i)
619 tab_text_format (t, heading_columns + i, 1,
620 TAT_TITLE | TAB_CENTER,
621 _("%g"), cmd->ptiles[i]);
624 for (i = 0; i < iact->n_vars; ++i)
629 var_to_string (iact->vars[i])
637 tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1);
639 for (v = 0; v < cmd->n_dep_vars; ++v)
641 const union value **prev_vals = previous_value_alloc (iact);
645 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
648 0, heading_rows + v * rows_per_var,
649 TAT_TITLE | TAB_LEFT,
650 var_to_string (cmd->dep_vars[v])
653 for (i = 0; i < n_cats; ++i)
655 const struct ccase *c =
656 categoricals_get_case_by_category_real (cmd->cats,
659 const struct exploratory_stats *ess =
660 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
662 const struct exploratory_stats *es = ess + v;
664 int diff_idx = previous_value_record (iact, c, prev_vals);
669 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
671 const struct variable *ivar = iact->vars[ivar_idx];
672 const union value *val = case_data (c, ivar);
674 if (( diff_idx != -1 && diff_idx <= ivar_idx)
678 ds_init_empty (&str);
679 var_append_value_name (ivar, val, &str);
683 heading_rows + v * rows_per_var + i * rows_per_cat,
684 TAT_TITLE | TAB_LEFT,
692 if ( diff_idx != -1 && diff_idx < iact->n_vars)
694 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
695 heading_rows + v * rows_per_var + i * rows_per_cat
699 tab_text (t, heading_columns - 1,
700 heading_rows + v * rows_per_var + i * rows_per_cat,
701 TAT_TITLE | TAB_LEFT,
702 gettext (ptile_alg_desc [cmd->pc_alg]));
704 tukey_hinges_calculate (es->hinges, hinges);
706 for (p = 0; p < cmd->n_percentiles; ++p)
708 tab_double (t, heading_columns + p,
709 heading_rows + v * rows_per_var + i * rows_per_cat,
711 percentile_calculate (es->percentiles[p], cmd->pc_alg),
714 if (cmd->ptiles[p] == 25.0)
716 tab_double (t, heading_columns + p,
717 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
722 else if (cmd->ptiles[p] == 50.0)
724 tab_double (t, heading_columns + p,
725 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
730 else if (cmd->ptiles[p] == 75.0)
732 tab_double (t, heading_columns + p,
733 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
741 tab_text (t, heading_columns - 1,
742 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
743 TAT_TITLE | TAB_LEFT,
744 _("Tukey's Hinges"));
755 descriptives_report (const struct examine *cmd, int iact_idx)
757 const struct interaction *iact = cmd->iacts[iact_idx];
759 const int heading_columns = 1 + iact->n_vars + 2;
760 const int heading_rows = 1;
763 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
765 const int rows_per_cat = 13;
766 const int rows_per_var = n_cats * rows_per_cat;
768 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
769 const int nc = 2 + heading_columns;
771 t = tab_create (nc, nr);
772 tab_title (t, _("Descriptives"));
774 tab_headers (t, heading_columns, 0, heading_rows, 0);
776 /* Internal Vertical lines */
777 tab_box (t, -1, -1, -1, TAL_1,
778 heading_columns, 0, nc - 1, nr - 1);
781 tab_box (t, TAL_2, TAL_2, -1, -1,
782 0, 0, nc - 1, nr - 1);
784 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
785 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
788 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
791 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
794 for (i = 0; i < iact->n_vars; ++i)
799 var_to_string (iact->vars[i])
803 for (v = 0; v < cmd->n_dep_vars; ++v)
805 const union value **prev_val = previous_value_alloc (iact);
809 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
812 0, heading_rows + v * rows_per_var,
813 TAT_TITLE | TAB_LEFT,
814 var_to_string (cmd->dep_vars[v])
817 for (i = 0; i < n_cats; ++i)
819 const struct ccase *c =
820 categoricals_get_case_by_category_real (cmd->cats,
823 const struct exploratory_stats *ess =
824 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
826 const struct exploratory_stats *es = ess + v;
828 const int diff_idx = previous_value_record (iact, c, prev_val);
830 double m0, m1, m2, m3, m4;
833 moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4);
835 tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0);
837 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
839 const struct variable *ivar = iact->vars[ivar_idx];
840 const union value *val = case_data (c, ivar);
842 if (( diff_idx != -1 && diff_idx <= ivar_idx)
846 ds_init_empty (&str);
847 var_append_value_name (ivar, val, &str);
851 heading_rows + v * rows_per_var + i * rows_per_cat,
852 TAT_TITLE | TAB_LEFT,
860 if ( diff_idx != -1 && diff_idx < iact->n_vars)
862 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
863 heading_rows + v * rows_per_var + i * rows_per_cat
869 heading_rows + v * rows_per_var + i * rows_per_cat,
875 1 + iact->n_vars + 2,
876 heading_rows + v * rows_per_var + i * rows_per_cat,
880 1 + iact->n_vars + 3,
881 heading_rows + v * rows_per_var + i * rows_per_cat,
882 0, calc_semean (m2, m0), 0);
886 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
888 _("%g%% Confidence Interval for Mean"),
893 1 + iact->n_vars + 1,
894 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
900 1 + iact->n_vars + 2,
901 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
902 0, m1 - tval * calc_semean (m2, m0), 0);
906 1 + iact->n_vars + 1,
907 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
913 1 + iact->n_vars + 2,
914 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
915 0, m1 + tval * calc_semean (m2, m0), 0);
920 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
926 1 + iact->n_vars + 2,
927 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
929 trimmed_mean_calculate (es->trimmed_mean),
934 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
940 1 + iact->n_vars + 2,
941 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
943 percentile_calculate (es->quartiles[1], cmd->pc_alg),
949 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
955 1 + iact->n_vars + 2,
956 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
961 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
967 1 + iact->n_vars + 2,
968 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
973 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
979 1 + iact->n_vars + 2,
980 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
987 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
993 1 + iact->n_vars + 2,
994 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
1001 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
1007 1 + iact->n_vars + 2,
1008 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
1010 es->maxima[0].val - es->minima[0].val,
1015 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
1017 _("Interquartile Range")
1022 1 + iact->n_vars + 2,
1023 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
1025 percentile_calculate (es->quartiles[2], cmd->pc_alg) -
1026 percentile_calculate (es->quartiles[0], cmd->pc_alg),
1034 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1040 1 + iact->n_vars + 2,
1041 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1045 1 + iact->n_vars + 3,
1046 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1047 0, calc_seskew (m0), 0);
1051 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1057 1 + iact->n_vars + 2,
1058 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1062 1 + iact->n_vars + 3,
1063 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1064 0, calc_sekurt (m0), 0);
1074 extremes_report (const struct examine *cmd, int iact_idx)
1076 const struct interaction *iact = cmd->iacts[iact_idx];
1078 const int heading_columns = 1 + iact->n_vars + 2;
1079 const int heading_rows = 1;
1080 struct tab_table *t;
1082 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1084 const int rows_per_cat = 2 * cmd->disp_extremes;
1085 const int rows_per_var = n_cats * rows_per_cat;
1087 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
1088 const int nc = 2 + heading_columns;
1090 t = tab_create (nc, nr);
1091 tab_title (t, _("Extreme Values"));
1093 tab_headers (t, heading_columns, 0, heading_rows, 0);
1095 /* Internal Vertical lines */
1096 tab_box (t, -1, -1, -1, TAL_1,
1097 heading_columns, 0, nc - 1, nr - 1);
1099 /* External Frame */
1100 tab_box (t, TAL_2, TAL_2, -1, -1,
1101 0, 0, nc - 1, nr - 1);
1103 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1104 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1108 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1109 var_to_string (cmd->id_var));
1111 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1114 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
1117 for (i = 0; i < iact->n_vars; ++i)
1122 var_to_string (iact->vars[i])
1126 for (v = 0; v < cmd->n_dep_vars; ++v)
1128 const union value **prev_val = previous_value_alloc (iact);
1132 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
1135 0, heading_rows + v * rows_per_var,
1137 var_to_string (cmd->dep_vars[v])
1140 for (i = 0; i < n_cats; ++i)
1143 const struct ccase *c =
1144 categoricals_get_case_by_category_real (cmd->cats, iact_idx, i);
1146 const struct exploratory_stats *ess =
1147 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1149 const struct exploratory_stats *es = ess + v;
1151 int diff_idx = previous_value_record (iact, c, prev_val);
1153 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1155 const struct variable *ivar = iact->vars[ivar_idx];
1156 const union value *val = case_data (c, ivar);
1158 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1162 ds_init_empty (&str);
1163 var_append_value_name (ivar, val, &str);
1167 heading_rows + v * rows_per_var + i * rows_per_cat,
1168 TAT_TITLE | TAB_LEFT,
1176 if ( diff_idx != -1 && diff_idx < iact->n_vars)
1178 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1179 heading_rows + v * rows_per_var + i * rows_per_cat
1184 heading_columns - 2,
1185 heading_rows + v * rows_per_var + i * rows_per_cat,
1190 tab_hline (t, TAL_1, heading_columns - 2, nc - 1,
1191 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes
1195 heading_columns - 2,
1196 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes,
1200 for (e = 0 ; e < cmd->disp_extremes; ++e)
1203 heading_columns - 1,
1204 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1209 /* The casenumber */
1213 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1215 &es->maxima[e].identity,
1221 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1223 es->maxima[e].identity.f,
1227 heading_columns + 1,
1228 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1231 var_get_print_format (cmd->dep_vars[v]));
1235 heading_columns - 1,
1236 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1241 /* The casenumber */
1245 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1247 &es->minima[e].identity,
1253 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1255 es->minima[e].identity.f,
1259 heading_columns + 1,
1260 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1263 var_get_print_format (cmd->dep_vars[v]));
1274 summary_report (const struct examine *cmd, int iact_idx)
1276 const struct interaction *iact = cmd->iacts[iact_idx];
1278 const int heading_columns = 1 + iact->n_vars;
1279 const int heading_rows = 3;
1280 struct tab_table *t;
1282 const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0;
1284 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1286 const int nr = heading_rows + n_cats * cmd->n_dep_vars;
1287 const int nc = 6 + heading_columns;
1289 t = tab_create (nc, nr);
1290 tab_title (t, _("Case Processing Summary"));
1292 tab_headers (t, heading_columns, 0, heading_rows, 0);
1294 /* Internal Vertical lines */
1295 tab_box (t, -1, -1, -1, TAL_1,
1296 heading_columns, 0, nc - 1, nr - 1);
1298 /* External Frame */
1299 tab_box (t, TAL_2, TAL_2, -1, -1,
1300 0, 0, nc - 1, nr - 1);
1302 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1303 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1305 tab_joint_text (t, heading_columns, 0,
1306 nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
1309 heading_columns + 1, 1,
1310 TAB_CENTER | TAT_TITLE, _("Valid"));
1313 heading_columns + 2, 1,
1314 heading_columns + 3, 1,
1315 TAB_CENTER | TAT_TITLE, _("Missing"));
1318 heading_columns + 4, 1,
1319 heading_columns + 5, 1,
1320 TAB_CENTER | TAT_TITLE, _("Total"));
1322 for (i = 0; i < 3; ++i)
1324 tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
1326 tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1330 for (i = 0; i < iact->n_vars; ++i)
1335 var_to_string (iact->vars[i])
1340 for (v = 0; v < cmd->n_dep_vars; ++v)
1343 const union value **prev_values = previous_value_alloc (iact);
1346 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats);
1349 0, heading_rows + n_cats * v,
1351 var_to_string (cmd->dep_vars[v])
1355 for (i = 0; i < n_cats; ++i)
1358 const struct exploratory_stats *es;
1360 const struct ccase *c =
1361 categoricals_get_case_by_category_real (cmd->cats,
1365 int diff_idx = previous_value_record (iact, c, prev_values);
1367 if ( diff_idx != -1 && diff_idx < iact->n_vars - 1)
1368 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1369 heading_rows + n_cats * v + i );
1371 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1373 const struct variable *ivar = iact->vars[ivar_idx];
1374 const union value *val = case_data (c, ivar);
1376 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1380 ds_init_empty (&str);
1381 var_append_value_name (ivar, val, &str);
1384 1 + ivar_idx, heading_rows + n_cats * v + i,
1385 TAT_TITLE | TAB_LEFT,
1395 es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1398 total = es[v].missing + es[v].non_missing;
1400 heading_columns + 0,
1401 heading_rows + n_cats * v + i,
1408 heading_columns + 1,
1409 heading_rows + n_cats * v + i,
1412 100.0 * es[v].non_missing / total
1417 heading_columns + 2,
1418 heading_rows + n_cats * v + i,
1424 heading_columns + 3,
1425 heading_rows + n_cats * v + i,
1428 100.0 * es[v].missing / total
1431 heading_columns + 4,
1432 heading_rows + n_cats * v + i,
1437 /* This can only be 100% can't it? */
1439 heading_columns + 5,
1440 heading_rows + n_cats * v + i,
1443 100.0 * (es[v].missing + es[v].non_missing)/ total
1449 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
1450 tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
1456 /* Match a variable.
1457 If the match succeeds, the variable will be placed in VAR.
1458 Returns true if successful */
1460 lex_match_variable (struct lexer *lexer,
1461 const struct dictionary *dict, const struct variable **var)
1463 if (lex_token (lexer) != T_ID)
1467 *var = parse_variable_const (lexer, dict);
1474 /* Attempt to parse an interaction from LEXER */
1475 static struct interaction *
1476 parse_interaction (struct lexer *lexer, struct examine *ex)
1478 const struct variable *v = NULL;
1479 struct interaction *iact = NULL;
1481 if ( lex_match_variable (lexer, ex->dict, &v))
1483 iact = interaction_create (v);
1485 while (lex_match (lexer, T_BY))
1487 if (!lex_match_variable (lexer, ex->dict, &v))
1489 interaction_destroy (iact);
1492 interaction_add_variable (iact, v);
1494 lex_match (lexer, T_COMMA);
1502 create_n (const void *aux1, void *aux2 UNUSED)
1506 const struct examine *examine = aux1;
1507 struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es));
1508 struct subcase ordering;
1509 subcase_init (&ordering, 0, 0, SC_ASCEND);
1511 for (v = 0; v < examine->n_dep_vars; v++)
1513 es[v].sorted_writer = sort_create_writer (&ordering, examine->ex_proto);
1514 es[v].sorted_reader = NULL;
1516 es[v].mom = moments_create (MOMENT_KURTOSIS);
1517 es[v].cmin = DBL_MAX;
1519 es[v].maximum = -DBL_MAX;
1520 es[v].minimum = DBL_MAX;
1523 subcase_destroy (&ordering);
1528 update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
1529 const struct ccase *c, double weight)
1532 const struct examine *examine = aux1;
1533 struct exploratory_stats *es = user_data;
1535 for (v = 0; v < examine->n_dep_vars; v++)
1537 struct ccase *outcase ;
1538 const struct variable *var = examine->dep_vars[v];
1539 const double x = case_data (c, var)->f;
1541 if (var_is_value_missing (var, case_data (c, var), examine->exclude))
1543 es[v].missing += weight;
1547 outcase = case_create (examine->ex_proto);
1549 if (x > es[v].maximum)
1552 if (x < es[v].minimum)
1555 es[v].non_missing += weight;
1557 moments_pass_one (es[v].mom, x, weight);
1559 /* Save the value and the ID to the writer */
1560 assert (examine->id_idx != -1);
1561 case_data_rw_idx (outcase, EX_VAL)->f = x;
1562 value_copy (case_data_rw_idx (outcase, EX_ID),
1563 case_data_idx (c, examine->id_idx), examine->id_width);
1565 case_data_rw_idx (outcase, EX_WT)->f = weight;
1569 if (es[v].cmin > weight)
1570 es[v].cmin = weight;
1572 casewriter_write (es[v].sorted_writer, outcase);
1577 calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
1580 const struct examine *examine = aux1;
1581 struct exploratory_stats *es = user_data;
1583 for (v = 0; v < examine->n_dep_vars; v++)
1586 casenumber imin = 0;
1587 double imax = es[v].cc;
1588 struct casereader *reader;
1590 casenumber total_cases;
1592 if (examine->histogram)
1595 double bin_width = fabs (es[v].minimum - es[v].maximum)
1596 / (1 + log2 (es[v].cc))
1599 bin_width = chart_rounded_tick (bin_width);
1602 histogram_create (bin_width, es[v].minimum, es[v].maximum);
1605 es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
1606 total_cases = casereader_count_cases (es[v].sorted_reader);
1607 es[v].sorted_writer = NULL;
1609 es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
1610 es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
1611 for (i = 0; i < examine->calc_extremes; ++i)
1613 value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ;
1614 value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ;
1617 for (reader = casereader_clone (es[v].sorted_reader);
1618 (c = casereader_read (reader)) != NULL; case_unref (c))
1620 const double val = case_data_idx (c, EX_VAL)->f;
1621 const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */
1623 moments_pass_two (es[v].mom, val, wt);
1625 if (es[v].histogram)
1626 histogram_add (es[v].histogram, val, wt);
1628 if (imin < examine->calc_extremes)
1631 for (x = imin; x < examine->calc_extremes; ++x)
1633 struct extremity *min = &es[v].minima[x];
1635 value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
1641 if (imax < examine->calc_extremes)
1645 for (x = imax; x < imax + wt; ++x)
1647 struct extremity *max;
1649 if (x >= examine->calc_extremes)
1652 max = &es[v].maxima[x];
1654 value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width);
1658 casereader_destroy (reader);
1660 if (examine->calc_extremes > 0)
1662 assert (es[v].minima[0].val == es[v].minimum);
1663 assert (es[v].maxima[0].val == es[v].maximum);
1667 const int n_os = 5 + examine->n_percentiles;
1668 struct order_stats **os ;
1669 es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
1671 es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
1673 os = xcalloc (n_os, sizeof *os);
1674 os[0] = &es[v].trimmed_mean->parent;
1676 es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
1677 es[v].quartiles[1] = percentile_create (0.5, es[v].cc);
1678 es[v].quartiles[2] = percentile_create (0.75, es[v].cc);
1680 os[1] = &es[v].quartiles[0]->parent;
1681 os[2] = &es[v].quartiles[1]->parent;
1682 os[3] = &es[v].quartiles[2]->parent;
1684 es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin);
1685 os[4] = &es[v].hinges->parent;
1687 for (i = 0; i < examine->n_percentiles; ++i)
1689 es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc);
1690 os[5 + i] = &es[v].percentiles[i]->parent;
1693 order_stats_accumulate_idx (os, n_os,
1694 casereader_clone (es[v].sorted_reader),
1700 if (examine->boxplot)
1702 struct order_stats *os;
1704 es[v].box_whisker = box_whisker_create (es[v].hinges,
1705 EX_ID, examine->id_var);
1707 os = &es[v].box_whisker->parent;
1708 order_stats_accumulate_idx (&os, 1,
1709 casereader_clone (es[v].sorted_reader),
1713 if (examine->npplot)
1715 double n, mean, var;
1716 struct order_stats *os;
1718 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
1720 es[v].np = np_create (n, mean, var);
1722 os = &es[v].np->parent;
1724 order_stats_accumulate_idx (&os, 1,
1725 casereader_clone (es[v].sorted_reader),
1733 cleanup_exploratory_stats (struct examine *cmd)
1736 for (i = 0; i < cmd->n_iacts; ++i)
1739 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1741 for (v = 0; v < cmd->n_dep_vars; ++v)
1744 for (grp = 0; grp < n_cats; ++grp)
1747 const struct exploratory_stats *es =
1748 categoricals_get_user_data_by_category_real (cmd->cats, i, grp);
1750 struct order_stats *os = &es[v].hinges->parent;
1751 struct statistic *stat = &os->parent;
1752 stat->destroy (stat);
1754 for (q = 0; q < 3 ; q++)
1756 os = &es[v].quartiles[q]->parent;
1758 stat->destroy (stat);
1761 for (q = 0; q < cmd->n_percentiles ; q++)
1763 os = &es[v].percentiles[q]->parent;
1765 stat->destroy (stat);
1768 os = &es[v].trimmed_mean->parent;
1770 stat->destroy (stat);
1772 os = &es[v].np->parent;
1776 stat->destroy (stat);
1779 statistic_destroy (&es[v].histogram->parent);
1780 moments_destroy (es[v].mom);
1782 casereader_destroy (es[v].sorted_reader);
1790 run_examine (struct examine *cmd, struct casereader *input)
1794 struct casereader *reader;
1796 struct payload payload;
1797 payload.create = create_n;
1798 payload.update = update_n;
1799 payload.destroy = calculate_n;
1801 cmd->wv = dict_get_weight (cmd->dict);
1804 = categoricals_create (cmd->iacts, cmd->n_iacts,
1805 cmd->wv, cmd->exclude);
1807 categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
1809 if (cmd->id_idx == -1)
1811 struct ccase *c = casereader_peek (input, 0);
1813 assert (cmd->id_var == NULL);
1815 cmd->id_idx = case_get_value_cnt (c);
1816 input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
1821 /* FIXME: Filter out missing factor variables */
1823 /* Remove cases on a listwise basis if requested */
1824 if ( cmd->missing_pw == false)
1825 input = casereader_create_filter_missing (input,
1832 for (reader = input;
1833 (c = casereader_read (reader)) != NULL; case_unref (c))
1835 categoricals_update (cmd->cats, c);
1837 casereader_destroy (reader);
1838 categoricals_done (cmd->cats);
1840 for (i = 0; i < cmd->n_iacts; ++i)
1842 summary_report (cmd, i);
1844 if (cmd->disp_extremes > 0)
1845 extremes_report (cmd, i);
1847 if (cmd->n_percentiles > 0)
1848 percentiles_report (cmd, i);
1852 switch (cmd->boxplot_mode)
1855 show_boxplot_grouped (cmd, i);
1858 show_boxplot_variabled (cmd, i);
1867 show_histogram (cmd, i);
1870 show_npplot (cmd, i);
1872 if (cmd->spreadlevel)
1873 show_spreadlevel (cmd, i);
1875 if (cmd->descriptives)
1876 descriptives_report (cmd, i);
1879 cleanup_exploratory_stats (cmd);
1880 categoricals_destroy (cmd->cats);
1885 cmd_examine (struct lexer *lexer, struct dataset *ds)
1888 bool nototals_seen = false;
1889 bool totals_seen = false;
1891 struct interaction **iacts_mem = NULL;
1892 struct examine examine;
1893 bool percentiles_seen = false;
1895 examine.missing_pw = false;
1896 examine.disp_extremes = 0;
1897 examine.calc_extremes = 0;
1898 examine.descriptives = false;
1899 examine.conf = 0.95;
1900 examine.pc_alg = PC_HAVERAGE;
1901 examine.ptiles = NULL;
1902 examine.n_percentiles = 0;
1903 examine.id_idx = -1;
1904 examine.id_width = 0;
1905 examine.id_var = NULL;
1906 examine.boxplot_mode = BP_GROUPS;
1908 examine.ex_proto = caseproto_create ();
1910 examine.pool = pool_create ();
1912 /* Allocate space for the first interaction.
1913 This is interaction is an empty one (for the totals).
1914 If no totals are requested, we will simply ignore this
1917 examine.n_iacts = 1;
1918 examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
1919 examine.iacts[0] = interaction_create (NULL);
1921 examine.exclude = MV_ANY;
1922 examine.histogram = false;
1923 examine.npplot = false;
1924 examine.boxplot = false;
1925 examine.spreadlevel = false;
1926 examine.sl_power = 0;
1928 examine.dict = dataset_dict (ds);
1930 /* Accept an optional, completely pointless "/VARIABLES=" */
1931 lex_match (lexer, T_SLASH);
1932 if (lex_match_id (lexer, "VARIABLES"))
1934 if (! lex_force_match (lexer, T_EQUALS) )
1938 if (!parse_variables_const (lexer, examine.dict,
1939 &examine.dep_vars, &examine.n_dep_vars,
1940 PV_NO_DUPLICATE | PV_NUMERIC))
1943 if (lex_match (lexer, T_BY))
1945 struct interaction *iact = NULL;
1948 iact = parse_interaction (lexer, &examine);
1953 pool_nrealloc (examine.pool, iacts_mem,
1955 sizeof (*iacts_mem));
1957 iacts_mem[examine.n_iacts - 1] = iact;
1964 while (lex_token (lexer) != T_ENDCMD)
1966 lex_match (lexer, T_SLASH);
1968 if (lex_match_id (lexer, "STATISTICS"))
1970 lex_match (lexer, T_EQUALS);
1972 while (lex_token (lexer) != T_ENDCMD
1973 && lex_token (lexer) != T_SLASH)
1975 if (lex_match_id (lexer, "DESCRIPTIVES"))
1977 examine.descriptives = true;
1979 else if (lex_match_id (lexer, "EXTREME"))
1982 if (lex_match (lexer, T_LPAREN))
1984 extr = lex_integer (lexer);
1988 msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0);
1993 if (! lex_force_match (lexer, T_RPAREN))
1996 examine.disp_extremes = extr;
1998 else if (lex_match_id (lexer, "NONE"))
2001 else if (lex_match (lexer, T_ALL))
2003 if (examine.disp_extremes == 0)
2004 examine.disp_extremes = 5;
2008 lex_error (lexer, NULL);
2013 else if (lex_match_id (lexer, "PERCENTILES"))
2015 percentiles_seen = true;
2016 if (lex_match (lexer, T_LPAREN))
2018 while (lex_is_number (lexer))
2020 double p = lex_number (lexer);
2022 if ( p <= 0 || p >= 100.0)
2025 _("Percentiles must lie in the range (0, 100)"));
2029 examine.n_percentiles++;
2031 xrealloc (examine.ptiles,
2032 sizeof (*examine.ptiles) *
2033 examine.n_percentiles);
2035 examine.ptiles[examine.n_percentiles - 1] = p;
2038 lex_match (lexer, T_COMMA);
2040 if (!lex_force_match (lexer, T_RPAREN))
2044 lex_match (lexer, T_EQUALS);
2046 while (lex_token (lexer) != T_ENDCMD
2047 && lex_token (lexer) != T_SLASH)
2049 if (lex_match_id (lexer, "HAVERAGE"))
2051 examine.pc_alg = PC_HAVERAGE;
2053 else if (lex_match_id (lexer, "WAVERAGE"))
2055 examine.pc_alg = PC_WAVERAGE;
2057 else if (lex_match_id (lexer, "ROUND"))
2059 examine.pc_alg = PC_ROUND;
2061 else if (lex_match_id (lexer, "EMPIRICAL"))
2063 examine.pc_alg = PC_EMPIRICAL;
2065 else if (lex_match_id (lexer, "AEMPIRICAL"))
2067 examine.pc_alg = PC_AEMPIRICAL;
2069 else if (lex_match_id (lexer, "NONE"))
2071 examine.pc_alg = PC_NONE;
2075 lex_error (lexer, NULL);
2080 else if (lex_match_id (lexer, "TOTAL"))
2084 else if (lex_match_id (lexer, "NOTOTAL"))
2086 nototals_seen = true;
2088 else if (lex_match_id (lexer, "MISSING"))
2090 lex_match (lexer, T_EQUALS);
2092 while (lex_token (lexer) != T_ENDCMD
2093 && lex_token (lexer) != T_SLASH)
2095 if (lex_match_id (lexer, "LISTWISE"))
2097 examine.missing_pw = false;
2099 else if (lex_match_id (lexer, "PAIRWISE"))
2101 examine.missing_pw = true;
2103 else if (lex_match_id (lexer, "EXCLUDE"))
2105 examine.exclude = MV_ANY;
2107 else if (lex_match_id (lexer, "INCLUDE"))
2109 examine.exclude = MV_SYSTEM;
2113 lex_error (lexer, NULL);
2118 else if (lex_match_id (lexer, "COMPARE"))
2120 lex_match (lexer, T_EQUALS);
2121 if (lex_match_id (lexer, "VARIABLES"))
2123 examine.boxplot_mode = BP_VARIABLES;
2125 else if (lex_match_id (lexer, "GROUPS"))
2127 examine.boxplot_mode = BP_GROUPS;
2131 lex_error (lexer, NULL);
2135 else if (lex_match_id (lexer, "PLOT"))
2137 lex_match (lexer, T_EQUALS);
2139 while (lex_token (lexer) != T_ENDCMD
2140 && lex_token (lexer) != T_SLASH)
2142 if (lex_match_id (lexer, "BOXPLOT"))
2144 examine.boxplot = true;
2146 else if (lex_match_id (lexer, "NPPLOT"))
2148 examine.npplot = true;
2150 else if (lex_match_id (lexer, "HISTOGRAM"))
2152 examine.histogram = true;
2154 else if (lex_match_id (lexer, "SPREADLEVEL"))
2156 examine.spreadlevel = true;
2157 examine.sl_power = 0;
2158 if (lex_match (lexer, T_LPAREN))
2160 examine.sl_power = lex_integer (lexer);
2163 if (! lex_force_match (lexer, T_RPAREN))
2167 else if (lex_match_id (lexer, "NONE"))
2169 examine.histogram = false;
2170 examine.npplot = false;
2171 examine.boxplot = false;
2173 else if (lex_match (lexer, T_ALL))
2175 examine.histogram = true;
2176 examine.npplot = true;
2177 examine.boxplot = true;
2181 lex_error (lexer, NULL);
2184 lex_match (lexer, T_COMMA);
2187 else if (lex_match_id (lexer, "CINTERVAL"))
2189 if ( !lex_force_num (lexer))
2192 examine.conf = lex_number (lexer);
2195 else if (lex_match_id (lexer, "ID"))
2197 lex_match (lexer, T_EQUALS);
2199 examine.id_var = parse_variable_const (lexer, examine.dict);
2203 lex_error (lexer, NULL);
2209 if ( totals_seen && nototals_seen)
2211 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
2215 /* If totals have been requested or if there are no factors
2216 in this analysis, then the totals need to be included. */
2217 if ( !nototals_seen || examine.n_iacts == 1)
2219 examine.iacts = &iacts_mem[0];
2224 examine.iacts = &iacts_mem[1];
2228 if ( examine.id_var )
2230 examine.id_idx = var_get_case_index (examine.id_var);
2231 examine.id_width = var_get_width (examine.id_var);
2234 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */
2235 examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */
2236 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */
2239 if (examine.disp_extremes > 0)
2241 examine.calc_extremes = examine.disp_extremes;
2244 if (examine.descriptives && examine.calc_extremes == 0)
2246 /* Descriptives always displays the max and min */
2247 examine.calc_extremes = 1;
2250 if (percentiles_seen && examine.n_percentiles == 0)
2252 examine.n_percentiles = 7;
2253 examine.ptiles = xcalloc (examine.n_percentiles,
2254 sizeof (*examine.ptiles));
2256 examine.ptiles[0] = 5;
2257 examine.ptiles[1] = 10;
2258 examine.ptiles[2] = 25;
2259 examine.ptiles[3] = 50;
2260 examine.ptiles[4] = 75;
2261 examine.ptiles[5] = 90;
2262 examine.ptiles[6] = 95;
2265 assert (examine.calc_extremes >= examine.disp_extremes);
2267 struct casegrouper *grouper;
2268 struct casereader *group;
2271 grouper = casegrouper_create_splits (proc_open (ds), examine.dict);
2272 while (casegrouper_get_next_group (grouper, &group))
2273 run_examine (&examine, group);
2274 ok = casegrouper_destroy (grouper);
2275 ok = proc_commit (ds) && ok;
2278 caseproto_unref (examine.ex_proto);
2280 for (i = 0; i < examine.n_iacts; ++i)
2281 interaction_destroy (examine.iacts[i]);
2283 free (examine.ptiles);
2284 free (examine.dep_vars);
2285 pool_destroy (examine.pool);
2290 caseproto_unref (examine.ex_proto);
2291 examine.iacts = iacts_mem;
2292 for (i = 0; i < examine.n_iacts; ++i)
2293 interaction_destroy (examine.iacts[i]);
2294 free (examine.dep_vars);
2295 free (examine.ptiles);
2296 pool_destroy (examine.pool);