2 PSPP - a program for statistical analysis.
3 Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include <gsl/gsl_cdf.h>
24 #include "libpspp/assertion.h"
25 #include "libpspp/message.h"
26 #include "libpspp/pool.h"
29 #include "data/dataset.h"
30 #include "data/dictionary.h"
31 #include "data/casegrouper.h"
32 #include "data/casereader.h"
33 #include "data/casewriter.h"
34 #include "data/caseproto.h"
35 #include "data/subcase.h"
38 #include "data/format.h"
40 #include "math/interaction.h"
41 #include "math/box-whisker.h"
42 #include "math/categoricals.h"
43 #include "math/chart-geometry.h"
44 #include "math/histogram.h"
45 #include "math/moments.h"
47 #include "math/sort.h"
48 #include "math/order-stats.h"
49 #include "math/percentiles.h"
50 #include "math/tukey-hinges.h"
51 #include "math/trimmed-mean.h"
53 #include "output/charts/boxplot.h"
54 #include "output/charts/np-plot.h"
55 #include "output/charts/spreadlevel-plot.h"
56 #include "output/charts/plot-hist.h"
58 #include "language/command.h"
59 #include "language/lexer/lexer.h"
60 #include "language/lexer/value-parser.h"
61 #include "language/lexer/variable-parser.h"
63 #include "output/tab.h"
66 #define _(msgid) gettext (msgid)
67 #define N_(msgid) msgid
70 append_value_name (const struct variable *var, const union value *val, struct string *str)
72 var_append_value_name (var, val, str);
73 if ( var_is_value_missing (var, val, MV_ANY))
74 ds_put_cstr (str, _(" (missing)"));
84 /* Indices for the ex_proto member (below) */
97 /* A caseproto used to contain the data subsets under examination,
99 struct caseproto *ex_proto;
102 const struct variable **dep_vars;
105 struct interaction **iacts;
107 enum mv_class dep_excl;
108 enum mv_class fctr_excl;
110 const struct dictionary *dict;
112 struct categoricals *cats;
114 /* how many extremities to display */
123 /* The case index of the ID value (or -1) if not applicable */
129 size_t n_percentiles;
134 bool spreadlevelplot;
137 enum bp_mode boxplot_mode;
139 const struct variable *id_var;
141 const struct variable *wv;
146 /* The value of this extremity */
149 /* Either the casenumber or the value of the variable specified
150 by the /ID subcommand which corresponds to this extremity */
151 union value identity;
154 struct exploratory_stats
161 /* Most operations need a sorted reader/writer */
162 struct casewriter *sorted_writer;
163 struct casereader *sorted_reader;
165 struct extremity *minima;
166 struct extremity *maxima;
169 Minimum should alway equal mimima[0].val.
170 Likewise, maximum should alway equal maxima[0].val.
171 This redundancy exists as an optimisation effort.
172 Some statistics (eg histogram) require early calculation
178 struct trimmed_mean *trimmed_mean;
179 struct percentile *quartiles[3];
180 struct percentile **percentiles;
182 struct tukey_hinges *hinges;
184 /* The data for the NP Plots */
187 struct histogram *histogram;
189 /* The data for the box plots */
190 struct box_whisker *box_whisker;
195 /* The minimum weight */
200 /* Returns an array of (iact->n_vars) pointers to union value initialised to NULL.
201 The caller must free this array when no longer required. */
202 static const union value **
203 previous_value_alloc (const struct interaction *iact)
207 const union value **prev_val = xcalloc (iact->n_vars, sizeof (*prev_val));
209 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
210 prev_val[ivar_idx] = NULL;
215 /* Set the contents of PREV_VAL to the values of C indexed by the variables of IACT */
217 previous_value_record (const struct interaction *iact, const struct ccase *c, const union value **prev_val)
222 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
224 const struct variable *ivar = iact->vars[ivar_idx];
225 const int width = var_get_width (ivar);
226 const union value *val = case_data (c, ivar);
228 if (prev_val[ivar_idx])
229 if (! value_equal (prev_val[ivar_idx], val, width))
236 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
238 const struct variable *ivar = iact->vars[ivar_idx];
239 const union value *val = case_data (c, ivar);
241 prev_val[ivar_idx] = val;
248 show_boxplot_grouped (const struct examine *cmd, int iact_idx)
252 const struct interaction *iact = cmd->iacts[iact_idx];
253 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
255 for (v = 0; v < cmd->n_dep_vars; ++v)
257 double y_min = DBL_MAX;
258 double y_max = -DBL_MAX;
260 struct boxplot *boxplot;
262 ds_init_empty (&title);
264 if (iact->n_vars > 0)
267 ds_init_empty (&istr);
268 interaction_to_string (iact, &istr);
269 ds_put_format (&title, _("Boxplot of %s vs. %s"),
270 var_to_string (cmd->dep_vars[v]),
275 ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v]));
277 for (grp = 0; grp < n_cats; ++grp)
279 const struct exploratory_stats *es =
280 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
282 if ( y_min > es[v].minimum)
283 y_min = es[v].minimum;
285 if ( y_max < es[v].maximum)
286 y_max = es[v].maximum;
289 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
293 for (grp = 0; grp < n_cats; ++grp)
298 const struct ccase *c =
299 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
301 struct exploratory_stats *es =
302 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
304 ds_init_empty (&label);
305 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
308 const struct variable *ivar = iact->vars[ivar_idx];
309 const union value *val = case_data (c, ivar);
312 append_value_name (ivar, val, &l);
313 ds_ltrim (&l, ss_cstr (" "));
315 ds_put_substring (&label, l.ss);
316 if (ivar_idx < iact->n_vars - 1)
317 ds_put_cstr (&label, "; ");
322 boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
323 es[v].box_whisker = NULL;
328 boxplot_submit (boxplot);
333 show_boxplot_variabled (const struct examine *cmd, int iact_idx)
336 const struct interaction *iact = cmd->iacts[iact_idx];
337 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
339 for (grp = 0; grp < n_cats; ++grp)
341 struct boxplot *boxplot;
343 double y_min = DBL_MAX;
344 double y_max = -DBL_MAX;
346 const struct ccase *c =
347 categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
350 ds_init_empty (&title);
352 for (v = 0; v < cmd->n_dep_vars; ++v)
354 const struct exploratory_stats *es =
355 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
357 if ( y_min > es[v].minimum)
358 y_min = es[v].minimum;
360 if ( y_max < es[v].maximum)
361 y_max = es[v].maximum;
364 if ( iact->n_vars == 0)
365 ds_put_format (&title, _("Boxplot"));
370 ds_init_empty (&label);
371 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
373 const struct variable *ivar = iact->vars[ivar_idx];
374 const union value *val = case_data (c, ivar);
376 ds_put_cstr (&label, var_to_string (ivar));
377 ds_put_cstr (&label, " = ");
378 append_value_name (ivar, val, &label);
379 ds_put_cstr (&label, "; ");
382 ds_put_format (&title, _("Boxplot of %s"),
388 boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
392 for (v = 0; v < cmd->n_dep_vars; ++v)
394 struct exploratory_stats *es =
395 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
397 boxplot_add_box (boxplot, es[v].box_whisker,
398 var_to_string (cmd->dep_vars[v]));
399 es[v].box_whisker = NULL;
402 boxplot_submit (boxplot);
408 show_npplot (const struct examine *cmd, int iact_idx)
410 const struct interaction *iact = cmd->iacts[iact_idx];
411 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
415 for (v = 0; v < cmd->n_dep_vars; ++v)
418 for (grp = 0; grp < n_cats; ++grp)
420 struct chart_item *npp, *dnpp;
421 struct casereader *reader;
425 const struct ccase *c =
426 categoricals_get_case_by_category_real (cmd->cats,
429 const struct exploratory_stats *es =
430 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
433 ds_init_cstr (&label,
434 var_to_string (cmd->dep_vars[v]));
436 if ( iact->n_vars > 0)
438 ds_put_cstr (&label, " (");
439 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
441 const struct variable *ivar = iact->vars[ivar_idx];
442 const union value *val = case_data (c, ivar);
444 ds_put_cstr (&label, var_to_string (ivar));
445 ds_put_cstr (&label, " = ");
446 append_value_name (ivar, val, &label);
447 ds_put_cstr (&label, "; ");
450 ds_put_cstr (&label, ")");
454 reader = casewriter_make_reader (np->writer);
457 npp = np_plot_create (np, reader, ds_cstr (&label));
458 dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
460 if (npp == NULL || dnpp == NULL)
462 msg (MW, _("Not creating NP plot because data set is empty."));
463 chart_item_unref (npp);
464 chart_item_unref (dnpp);
468 chart_item_submit (npp);
469 chart_item_submit (dnpp);
471 casereader_destroy (reader);
479 show_spreadlevel (const struct examine *cmd, int iact_idx)
481 const struct interaction *iact = cmd->iacts[iact_idx];
482 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
486 /* Spreadlevel when there are no levels is not useful */
487 if (iact->n_vars == 0)
490 for (v = 0; v < cmd->n_dep_vars; ++v)
493 struct chart_item *sl;
496 ds_init_cstr (&label,
497 var_to_string (cmd->dep_vars[v]));
499 if (iact->n_vars > 0)
501 ds_put_cstr (&label, " (");
502 interaction_to_string (iact, &label);
503 ds_put_cstr (&label, ")");
506 sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power);
508 for (grp = 0; grp < n_cats; ++grp)
510 const struct exploratory_stats *es =
511 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
513 double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg);
515 double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) -
516 percentile_calculate (es[v].quartiles[0], cmd->pc_alg);
518 spreadlevel_plot_add (sl, iqr, median);
522 msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
524 chart_item_submit (sl);
532 show_histogram (const struct examine *cmd, int iact_idx)
534 const struct interaction *iact = cmd->iacts[iact_idx];
535 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
539 for (v = 0; v < cmd->n_dep_vars; ++v)
542 for (grp = 0; grp < n_cats; ++grp)
546 const struct ccase *c =
547 categoricals_get_case_by_category_real (cmd->cats,
550 const struct exploratory_stats *es =
551 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
555 if (es[v].histogram == NULL)
558 ds_init_cstr (&label,
559 var_to_string (cmd->dep_vars[v]));
561 if ( iact->n_vars > 0)
563 ds_put_cstr (&label, " (");
564 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
566 const struct variable *ivar = iact->vars[ivar_idx];
567 const union value *val = case_data (c, ivar);
569 ds_put_cstr (&label, var_to_string (ivar));
570 ds_put_cstr (&label, " = ");
571 append_value_name (ivar, val, &label);
572 ds_put_cstr (&label, "; ");
575 ds_put_cstr (&label, ")");
579 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
582 ( histogram_chart_create (es[v].histogram->gsl_hist,
583 ds_cstr (&label), n, mean,
593 percentiles_report (const struct examine *cmd, int iact_idx)
595 const struct interaction *iact = cmd->iacts[iact_idx];
597 const int heading_columns = 1 + iact->n_vars + 1;
598 const int heading_rows = 2;
601 const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
603 const int rows_per_cat = 2;
604 const int rows_per_var = n_cats * rows_per_cat;
606 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
607 const int nc = heading_columns + cmd->n_percentiles;
609 t = tab_create (nc, nr);
611 tab_title (t, _("Percentiles"));
613 tab_headers (t, heading_columns, 0, heading_rows, 0);
615 /* Internal Vertical lines */
616 tab_box (t, -1, -1, -1, TAL_1,
617 heading_columns, 0, nc - 1, nr - 1);
620 tab_box (t, TAL_2, TAL_2, -1, -1,
621 0, 0, nc - 1, nr - 1);
623 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
624 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
626 tab_joint_text (t, heading_columns, 0,
628 TAT_TITLE | TAB_CENTER,
632 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
635 for (i = 0; i < cmd->n_percentiles; ++i)
637 tab_text_format (t, heading_columns + i, 1,
638 TAT_TITLE | TAB_CENTER,
639 _("%g"), cmd->ptiles[i]);
642 for (i = 0; i < iact->n_vars; ++i)
647 var_to_string (iact->vars[i])
655 tab_vline (t, TAL_1, heading_columns - 1, heading_rows, nr - 1);
657 for (v = 0; v < cmd->n_dep_vars; ++v)
659 const union value **prev_vals = previous_value_alloc (iact);
663 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
666 0, heading_rows + v * rows_per_var,
667 TAT_TITLE | TAB_LEFT,
668 var_to_string (cmd->dep_vars[v])
671 for (i = 0; i < n_cats; ++i)
673 const struct ccase *c =
674 categoricals_get_case_by_category_real (cmd->cats,
677 const struct exploratory_stats *ess =
678 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
680 const struct exploratory_stats *es = ess + v;
682 int diff_idx = previous_value_record (iact, c, prev_vals);
687 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
689 const struct variable *ivar = iact->vars[ivar_idx];
690 const union value *val = case_data (c, ivar);
692 if (( diff_idx != -1 && diff_idx <= ivar_idx)
696 ds_init_empty (&str);
697 append_value_name (ivar, val, &str);
701 heading_rows + v * rows_per_var + i * rows_per_cat,
702 TAT_TITLE | TAB_LEFT,
710 if ( diff_idx != -1 && diff_idx < iact->n_vars)
712 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
713 heading_rows + v * rows_per_var + i * rows_per_cat
717 tab_text (t, heading_columns - 1,
718 heading_rows + v * rows_per_var + i * rows_per_cat,
719 TAT_TITLE | TAB_LEFT,
720 gettext (ptile_alg_desc [cmd->pc_alg]));
722 tukey_hinges_calculate (es->hinges, hinges);
724 for (p = 0; p < cmd->n_percentiles; ++p)
726 tab_double (t, heading_columns + p,
727 heading_rows + v * rows_per_var + i * rows_per_cat,
729 percentile_calculate (es->percentiles[p], cmd->pc_alg),
732 if (cmd->ptiles[p] == 25.0)
734 tab_double (t, heading_columns + p,
735 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
740 else if (cmd->ptiles[p] == 50.0)
742 tab_double (t, heading_columns + p,
743 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
748 else if (cmd->ptiles[p] == 75.0)
750 tab_double (t, heading_columns + p,
751 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
759 tab_text (t, heading_columns - 1,
760 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
761 TAT_TITLE | TAB_LEFT,
762 _("Tukey's Hinges"));
773 descriptives_report (const struct examine *cmd, int iact_idx)
775 const struct interaction *iact = cmd->iacts[iact_idx];
777 const int heading_columns = 1 + iact->n_vars + 2;
778 const int heading_rows = 1;
781 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
783 const int rows_per_cat = 13;
784 const int rows_per_var = n_cats * rows_per_cat;
786 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
787 const int nc = 2 + heading_columns;
789 t = tab_create (nc, nr);
791 tab_title (t, _("Descriptives"));
793 tab_headers (t, heading_columns, 0, heading_rows, 0);
795 /* Internal Vertical lines */
796 tab_box (t, -1, -1, -1, TAL_1,
797 heading_columns, 0, nc - 1, nr - 1);
800 tab_box (t, TAL_2, TAL_2, -1, -1,
801 0, 0, nc - 1, nr - 1);
803 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
804 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
807 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
810 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
813 for (i = 0; i < iact->n_vars; ++i)
818 var_to_string (iact->vars[i])
822 for (v = 0; v < cmd->n_dep_vars; ++v)
824 const union value **prev_val = previous_value_alloc (iact);
828 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
831 0, heading_rows + v * rows_per_var,
832 TAT_TITLE | TAB_LEFT,
833 var_to_string (cmd->dep_vars[v])
836 for (i = 0; i < n_cats; ++i)
838 const struct ccase *c =
839 categoricals_get_case_by_category_real (cmd->cats,
842 const struct exploratory_stats *ess =
843 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
845 const struct exploratory_stats *es = ess + v;
847 const int diff_idx = previous_value_record (iact, c, prev_val);
849 double m0, m1, m2, m3, m4;
852 moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4);
854 tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0);
856 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
858 const struct variable *ivar = iact->vars[ivar_idx];
859 const union value *val = case_data (c, ivar);
861 if (( diff_idx != -1 && diff_idx <= ivar_idx)
865 ds_init_empty (&str);
866 append_value_name (ivar, val, &str);
870 heading_rows + v * rows_per_var + i * rows_per_cat,
871 TAT_TITLE | TAB_LEFT,
879 if ( diff_idx != -1 && diff_idx < iact->n_vars)
881 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
882 heading_rows + v * rows_per_var + i * rows_per_cat
888 heading_rows + v * rows_per_var + i * rows_per_cat,
894 1 + iact->n_vars + 2,
895 heading_rows + v * rows_per_var + i * rows_per_cat,
896 0, m1, NULL, RC_OTHER);
899 1 + iact->n_vars + 3,
900 heading_rows + v * rows_per_var + i * rows_per_cat,
901 0, calc_semean (m2, m0), NULL, RC_OTHER);
905 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
907 _("%g%% Confidence Interval for Mean"),
912 1 + iact->n_vars + 1,
913 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
919 1 + iact->n_vars + 2,
920 heading_rows + v * rows_per_var + i * rows_per_cat + 1,
921 0, m1 - tval * calc_semean (m2, m0), NULL, RC_OTHER);
925 1 + iact->n_vars + 1,
926 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
932 1 + iact->n_vars + 2,
933 heading_rows + v * rows_per_var + i * rows_per_cat + 2,
934 0, m1 + tval * calc_semean (m2, m0), NULL, RC_OTHER);
939 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
945 1 + iact->n_vars + 2,
946 heading_rows + v * rows_per_var + i * rows_per_cat + 3,
948 trimmed_mean_calculate (es->trimmed_mean),
953 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
959 1 + iact->n_vars + 2,
960 heading_rows + v * rows_per_var + i * rows_per_cat + 4,
962 percentile_calculate (es->quartiles[1], cmd->pc_alg),
968 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
974 1 + iact->n_vars + 2,
975 heading_rows + v * rows_per_var + i * rows_per_cat + 5,
976 0, m2, NULL, RC_OTHER);
980 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
986 1 + iact->n_vars + 2,
987 heading_rows + v * rows_per_var + i * rows_per_cat + 6,
988 0, sqrt (m2), NULL, RC_OTHER);
992 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
998 1 + iact->n_vars + 2,
999 heading_rows + v * rows_per_var + i * rows_per_cat + 7,
1006 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
1012 1 + iact->n_vars + 2,
1013 heading_rows + v * rows_per_var + i * rows_per_cat + 8,
1020 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
1026 1 + iact->n_vars + 2,
1027 heading_rows + v * rows_per_var + i * rows_per_cat + 9,
1029 es->maxima[0].val - es->minima[0].val,
1034 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
1036 _("Interquartile Range")
1041 1 + iact->n_vars + 2,
1042 heading_rows + v * rows_per_var + i * rows_per_cat + 10,
1044 percentile_calculate (es->quartiles[2], cmd->pc_alg) -
1045 percentile_calculate (es->quartiles[0], cmd->pc_alg),
1053 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1059 1 + iact->n_vars + 2,
1060 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1061 0, m3, NULL, RC_OTHER);
1064 1 + iact->n_vars + 3,
1065 heading_rows + v * rows_per_var + i * rows_per_cat + 11,
1066 0, calc_seskew (m0), NULL, RC_OTHER);
1070 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1076 1 + iact->n_vars + 2,
1077 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1078 0, m4, NULL, RC_OTHER);
1081 1 + iact->n_vars + 3,
1082 heading_rows + v * rows_per_var + i * rows_per_cat + 12,
1083 0, calc_sekurt (m0), NULL, RC_OTHER);
1093 extremes_report (const struct examine *cmd, int iact_idx)
1095 const struct interaction *iact = cmd->iacts[iact_idx];
1097 const int heading_columns = 1 + iact->n_vars + 2;
1098 const int heading_rows = 1;
1099 struct tab_table *t;
1101 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1103 const int rows_per_cat = 2 * cmd->disp_extremes;
1104 const int rows_per_var = n_cats * rows_per_cat;
1106 const int nr = heading_rows + cmd->n_dep_vars * rows_per_var;
1107 const int nc = 2 + heading_columns;
1109 t = tab_create (nc, nr);
1111 tab_title (t, _("Extreme Values"));
1113 tab_headers (t, heading_columns, 0, heading_rows, 0);
1115 /* Internal Vertical lines */
1116 tab_box (t, -1, -1, -1, TAL_1,
1117 heading_columns, 0, nc - 1, nr - 1);
1119 /* External Frame */
1120 tab_box (t, TAL_2, TAL_2, -1, -1,
1121 0, 0, nc - 1, nr - 1);
1123 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1124 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1128 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1129 var_to_string (cmd->id_var));
1131 tab_text (t, heading_columns, 0, TAB_CENTER | TAT_TITLE,
1134 tab_text (t, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE,
1137 for (i = 0; i < iact->n_vars; ++i)
1142 var_to_string (iact->vars[i])
1146 for (v = 0; v < cmd->n_dep_vars; ++v)
1148 const union value **prev_val = previous_value_alloc (iact);
1152 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * rows_per_var);
1155 0, heading_rows + v * rows_per_var,
1157 var_to_string (cmd->dep_vars[v])
1160 for (i = 0; i < n_cats; ++i)
1163 const struct ccase *c =
1164 categoricals_get_case_by_category_real (cmd->cats, iact_idx, i);
1166 const struct exploratory_stats *ess =
1167 categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1169 const struct exploratory_stats *es = ess + v;
1171 int diff_idx = previous_value_record (iact, c, prev_val);
1173 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1175 const struct variable *ivar = iact->vars[ivar_idx];
1176 const union value *val = case_data (c, ivar);
1178 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1182 ds_init_empty (&str);
1183 append_value_name (ivar, val, &str);
1187 heading_rows + v * rows_per_var + i * rows_per_cat,
1188 TAT_TITLE | TAB_LEFT,
1196 if ( diff_idx != -1 && diff_idx < iact->n_vars)
1198 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1199 heading_rows + v * rows_per_var + i * rows_per_cat
1204 heading_columns - 2,
1205 heading_rows + v * rows_per_var + i * rows_per_cat,
1210 tab_hline (t, TAL_1, heading_columns - 2, nc - 1,
1211 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes
1215 heading_columns - 2,
1216 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes,
1220 for (e = 0 ; e < cmd->disp_extremes; ++e)
1223 heading_columns - 1,
1224 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1229 /* The casenumber */
1233 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1235 &es->maxima[e].identity,
1241 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1243 es->maxima[e].identity.f,
1247 heading_columns + 1,
1248 heading_rows + v * rows_per_var + i * rows_per_cat + e,
1251 var_get_print_format (cmd->dep_vars[v]), RC_OTHER);
1255 heading_columns - 1,
1256 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1261 /* The casenumber */
1265 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1267 &es->minima[e].identity,
1273 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1275 es->minima[e].identity.f,
1279 heading_columns + 1,
1280 heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e,
1283 var_get_print_format (cmd->dep_vars[v]), RC_OTHER);
1294 summary_report (const struct examine *cmd, int iact_idx)
1296 const struct interaction *iact = cmd->iacts[iact_idx];
1298 const int heading_columns = 1 + iact->n_vars;
1299 const int heading_rows = 3;
1300 struct tab_table *t;
1302 const struct fmt_spec *wfmt = cmd->wv ? var_get_print_format (cmd->wv) : &F_8_0;
1304 size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
1306 const int nr = heading_rows + n_cats * cmd->n_dep_vars;
1307 const int nc = 6 + heading_columns;
1309 t = tab_create (nc, nr);
1310 tab_set_format (t, RC_WEIGHT, wfmt);
1311 tab_title (t, _("Case Processing Summary"));
1313 tab_headers (t, heading_columns, 0, heading_rows, 0);
1315 /* Internal Vertical lines */
1316 tab_box (t, -1, -1, -1, TAL_1,
1317 heading_columns, 0, nc - 1, nr - 1);
1319 /* External Frame */
1320 tab_box (t, TAL_2, TAL_2, -1, -1,
1321 0, 0, nc - 1, nr - 1);
1323 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1324 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1326 tab_joint_text (t, heading_columns, 0,
1327 nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
1330 heading_columns + 1, 1,
1331 TAB_CENTER | TAT_TITLE, _("Valid"));
1334 heading_columns + 2, 1,
1335 heading_columns + 3, 1,
1336 TAB_CENTER | TAT_TITLE, _("Missing"));
1339 heading_columns + 4, 1,
1340 heading_columns + 5, 1,
1341 TAB_CENTER | TAT_TITLE, _("Total"));
1343 for (i = 0; i < 3; ++i)
1345 tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
1347 tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1351 for (i = 0; i < iact->n_vars; ++i)
1356 var_to_string (iact->vars[i])
1361 for (v = 0; v < cmd->n_dep_vars; ++v)
1364 const union value **prev_values = previous_value_alloc (iact);
1367 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + v * n_cats);
1370 0, heading_rows + n_cats * v,
1372 var_to_string (cmd->dep_vars[v])
1376 for (i = 0; i < n_cats; ++i)
1379 const struct exploratory_stats *es;
1381 const struct ccase *c =
1382 categoricals_get_case_by_category_real (cmd->cats,
1386 int diff_idx = previous_value_record (iact, c, prev_values);
1388 if ( diff_idx != -1 && diff_idx < iact->n_vars - 1)
1389 tab_hline (t, TAL_1, 1 + diff_idx, nc - 1,
1390 heading_rows + n_cats * v + i );
1392 for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
1394 const struct variable *ivar = iact->vars[ivar_idx];
1395 const union value *val = case_data (c, ivar);
1397 if (( diff_idx != -1 && diff_idx <= ivar_idx)
1401 ds_init_empty (&str);
1402 append_value_name (ivar, val, &str);
1405 1 + ivar_idx, heading_rows + n_cats * v + i,
1406 TAT_TITLE | TAB_LEFT,
1416 es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, i);
1419 total = es[v].missing + es[v].non_missing;
1421 heading_columns + 0,
1422 heading_rows + n_cats * v + i,
1429 heading_columns + 1,
1430 heading_rows + n_cats * v + i,
1433 100.0 * es[v].non_missing / total
1438 heading_columns + 2,
1439 heading_rows + n_cats * v + i,
1445 heading_columns + 3,
1446 heading_rows + n_cats * v + i,
1449 100.0 * es[v].missing / total
1452 heading_columns + 4,
1453 heading_rows + n_cats * v + i,
1458 /* This can only be 100% can't it? */
1460 heading_columns + 5,
1461 heading_rows + n_cats * v + i,
1464 100.0 * (es[v].missing + es[v].non_missing)/ total
1470 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
1471 tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
1476 /* Attempt to parse an interaction from LEXER */
1477 static struct interaction *
1478 parse_interaction (struct lexer *lexer, struct examine *ex)
1480 const struct variable *v = NULL;
1481 struct interaction *iact = NULL;
1483 if ( lex_match_variable (lexer, ex->dict, &v))
1485 iact = interaction_create (v);
1487 while (lex_match (lexer, T_BY))
1489 if (!lex_match_variable (lexer, ex->dict, &v))
1491 interaction_destroy (iact);
1494 interaction_add_variable (iact, v);
1496 lex_match (lexer, T_COMMA);
1504 create_n (const void *aux1, void *aux2 UNUSED)
1508 const struct examine *examine = aux1;
1509 struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es));
1510 struct subcase ordering;
1511 subcase_init (&ordering, 0, 0, SC_ASCEND);
1513 for (v = 0; v < examine->n_dep_vars; v++)
1515 es[v].sorted_writer = sort_create_writer (&ordering, examine->ex_proto);
1516 es[v].sorted_reader = NULL;
1518 es[v].mom = moments_create (MOMENT_KURTOSIS);
1519 es[v].cmin = DBL_MAX;
1521 es[v].maximum = -DBL_MAX;
1522 es[v].minimum = DBL_MAX;
1525 subcase_destroy (&ordering);
1530 update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
1531 const struct ccase *c, double weight)
1534 const struct examine *examine = aux1;
1535 struct exploratory_stats *es = user_data;
1537 bool this_case_is_missing = false;
1538 /* LISTWISE missing must be dealt with here */
1539 if (!examine->missing_pw)
1541 for (v = 0; v < examine->n_dep_vars; v++)
1543 const struct variable *var = examine->dep_vars[v];
1545 if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
1547 es[v].missing += weight;
1548 this_case_is_missing = true;
1553 if (this_case_is_missing)
1556 for (v = 0; v < examine->n_dep_vars; v++)
1558 struct ccase *outcase ;
1559 const struct variable *var = examine->dep_vars[v];
1560 const double x = case_data (c, var)->f;
1562 if (var_is_value_missing (var, case_data (c, var), examine->dep_excl))
1564 es[v].missing += weight;
1568 outcase = case_create (examine->ex_proto);
1570 if (x > es[v].maximum)
1573 if (x < es[v].minimum)
1576 es[v].non_missing += weight;
1578 moments_pass_one (es[v].mom, x, weight);
1580 /* Save the value and the ID to the writer */
1581 assert (examine->id_idx != -1);
1582 case_data_rw_idx (outcase, EX_VAL)->f = x;
1583 value_copy (case_data_rw_idx (outcase, EX_ID),
1584 case_data_idx (c, examine->id_idx), examine->id_width);
1586 case_data_rw_idx (outcase, EX_WT)->f = weight;
1590 if (es[v].cmin > weight)
1591 es[v].cmin = weight;
1593 casewriter_write (es[v].sorted_writer, outcase);
1598 calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
1601 const struct examine *examine = aux1;
1602 struct exploratory_stats *es = user_data;
1604 for (v = 0; v < examine->n_dep_vars; v++)
1607 casenumber imin = 0;
1609 struct casereader *reader;
1612 if (examine->histogramplot && es[v].non_missing > 0)
1615 double bin_width = fabs (es[v].minimum - es[v].maximum)
1616 / (1 + log2 (es[v].cc))
1620 histogram_create (bin_width, es[v].minimum, es[v].maximum);
1623 es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
1624 es[v].sorted_writer = NULL;
1626 imax = casereader_get_case_cnt (es[v].sorted_reader);
1628 es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
1629 es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
1630 for (i = 0; i < examine->calc_extremes; ++i)
1632 value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ;
1633 value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ;
1636 for (reader = casereader_clone (es[v].sorted_reader);
1637 (c = casereader_read (reader)) != NULL; case_unref (c))
1639 const double val = case_data_idx (c, EX_VAL)->f;
1640 const double wt = case_data_idx (c, EX_WT)->f;
1642 moments_pass_two (es[v].mom, val, wt);
1644 if (es[v].histogram)
1645 histogram_add (es[v].histogram, val, wt);
1647 if (imin < examine->calc_extremes)
1650 for (x = imin; x < examine->calc_extremes; ++x)
1652 struct extremity *min = &es[v].minima[x];
1654 value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
1660 if (imax < examine->calc_extremes)
1664 for (x = imax; x < imax + 1; ++x)
1666 struct extremity *max;
1668 if (x >= examine->calc_extremes)
1671 max = &es[v].maxima[x];
1673 value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width);
1677 casereader_destroy (reader);
1679 if (examine->calc_extremes > 0 && es[v].non_missing > 0)
1681 assert (es[v].minima[0].val == es[v].minimum);
1682 assert (es[v].maxima[0].val == es[v].maximum);
1686 const int n_os = 5 + examine->n_percentiles;
1687 struct order_stats **os ;
1688 es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
1690 es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
1692 os = xcalloc (n_os, sizeof *os);
1693 os[0] = &es[v].trimmed_mean->parent;
1695 es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
1696 es[v].quartiles[1] = percentile_create (0.5, es[v].cc);
1697 es[v].quartiles[2] = percentile_create (0.75, es[v].cc);
1699 os[1] = &es[v].quartiles[0]->parent;
1700 os[2] = &es[v].quartiles[1]->parent;
1701 os[3] = &es[v].quartiles[2]->parent;
1703 es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin);
1704 os[4] = &es[v].hinges->parent;
1706 for (i = 0; i < examine->n_percentiles; ++i)
1708 es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc);
1709 os[5 + i] = &es[v].percentiles[i]->parent;
1712 order_stats_accumulate_idx (os, n_os,
1713 casereader_clone (es[v].sorted_reader),
1719 if (examine->boxplot)
1721 struct order_stats *os;
1723 es[v].box_whisker = box_whisker_create (es[v].hinges,
1724 EX_ID, examine->id_var);
1726 os = &es[v].box_whisker->parent;
1727 order_stats_accumulate_idx (&os, 1,
1728 casereader_clone (es[v].sorted_reader),
1732 if (examine->npplot)
1734 double n, mean, var;
1735 struct order_stats *os;
1737 moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
1739 es[v].np = np_create (n, mean, var);
1741 os = &es[v].np->parent;
1743 order_stats_accumulate_idx (&os, 1,
1744 casereader_clone (es[v].sorted_reader),
1752 cleanup_exploratory_stats (struct examine *cmd)
1755 for (i = 0; i < cmd->n_iacts; ++i)
1758 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1760 for (v = 0; v < cmd->n_dep_vars; ++v)
1763 for (grp = 0; grp < n_cats; ++grp)
1766 const struct exploratory_stats *es =
1767 categoricals_get_user_data_by_category_real (cmd->cats, i, grp);
1769 struct order_stats *os = &es[v].hinges->parent;
1770 struct statistic *stat = &os->parent;
1771 stat->destroy (stat);
1773 for (q = 0; q < 3 ; q++)
1775 os = &es[v].quartiles[q]->parent;
1777 stat->destroy (stat);
1780 for (q = 0; q < cmd->n_percentiles ; q++)
1782 os = &es[v].percentiles[q]->parent;
1784 stat->destroy (stat);
1787 os = &es[v].trimmed_mean->parent;
1789 stat->destroy (stat);
1791 os = &es[v].np->parent;
1795 stat->destroy (stat);
1798 statistic_destroy (&es[v].histogram->parent);
1799 moments_destroy (es[v].mom);
1801 if (es[v].box_whisker)
1803 stat = &es[v].box_whisker->parent.parent;
1804 stat->destroy (stat);
1807 casereader_destroy (es[v].sorted_reader);
1815 run_examine (struct examine *cmd, struct casereader *input)
1819 struct casereader *reader;
1821 struct payload payload;
1822 payload.create = create_n;
1823 payload.update = update_n;
1824 payload.calculate = calculate_n;
1825 payload.destroy = NULL;
1827 cmd->wv = dict_get_weight (cmd->dict);
1830 = categoricals_create (cmd->iacts, cmd->n_iacts,
1831 cmd->wv, cmd->dep_excl, cmd->fctr_excl);
1833 categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
1835 if (cmd->id_var == NULL)
1837 struct ccase *c = casereader_peek (input, 0);
1839 cmd->id_idx = case_get_value_cnt (c);
1840 input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
1845 for (reader = input;
1846 (c = casereader_read (reader)) != NULL; case_unref (c))
1848 categoricals_update (cmd->cats, c);
1850 casereader_destroy (reader);
1851 categoricals_done (cmd->cats);
1853 for (i = 0; i < cmd->n_iacts; ++i)
1855 summary_report (cmd, i);
1857 const size_t n_cats = categoricals_n_count (cmd->cats, i);
1861 if (cmd->disp_extremes > 0)
1862 extremes_report (cmd, i);
1864 if (cmd->n_percentiles > 0)
1865 percentiles_report (cmd, i);
1869 switch (cmd->boxplot_mode)
1872 show_boxplot_grouped (cmd, i);
1875 show_boxplot_variabled (cmd, i);
1883 if (cmd->histogramplot)
1884 show_histogram (cmd, i);
1887 show_npplot (cmd, i);
1889 if (cmd->spreadlevelplot)
1890 show_spreadlevel (cmd, i);
1892 if (cmd->descriptives)
1893 descriptives_report (cmd, i);
1896 cleanup_exploratory_stats (cmd);
1897 categoricals_destroy (cmd->cats);
1902 cmd_examine (struct lexer *lexer, struct dataset *ds)
1905 bool nototals_seen = false;
1906 bool totals_seen = false;
1908 struct interaction **iacts_mem = NULL;
1909 struct examine examine;
1910 bool percentiles_seen = false;
1912 examine.missing_pw = false;
1913 examine.disp_extremes = 0;
1914 examine.calc_extremes = 0;
1915 examine.descriptives = false;
1916 examine.conf = 0.95;
1917 examine.pc_alg = PC_HAVERAGE;
1918 examine.ptiles = NULL;
1919 examine.n_percentiles = 0;
1920 examine.id_idx = -1;
1921 examine.id_width = 0;
1922 examine.id_var = NULL;
1923 examine.boxplot_mode = BP_GROUPS;
1925 examine.ex_proto = caseproto_create ();
1927 examine.pool = pool_create ();
1929 /* Allocate space for the first interaction.
1930 This is interaction is an empty one (for the totals).
1931 If no totals are requested, we will simply ignore this
1934 examine.n_iacts = 1;
1935 examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *));
1936 examine.iacts[0] = interaction_create (NULL);
1938 examine.dep_excl = MV_ANY;
1939 examine.fctr_excl = MV_ANY;
1940 examine.histogramplot = false;
1941 examine.npplot = false;
1942 examine.boxplot = false;
1943 examine.spreadlevelplot = false;
1944 examine.sl_power = 0;
1946 examine.dict = dataset_dict (ds);
1948 /* Accept an optional, completely pointless "/VARIABLES=" */
1949 lex_match (lexer, T_SLASH);
1950 if (lex_match_id (lexer, "VARIABLES"))
1952 if (! lex_force_match (lexer, T_EQUALS) )
1956 if (!parse_variables_const (lexer, examine.dict,
1957 &examine.dep_vars, &examine.n_dep_vars,
1958 PV_NO_DUPLICATE | PV_NUMERIC))
1961 if (lex_match (lexer, T_BY))
1963 struct interaction *iact = NULL;
1966 iact = parse_interaction (lexer, &examine);
1971 pool_nrealloc (examine.pool, iacts_mem,
1973 sizeof (*iacts_mem));
1975 iacts_mem[examine.n_iacts - 1] = iact;
1982 while (lex_token (lexer) != T_ENDCMD)
1984 lex_match (lexer, T_SLASH);
1986 if (lex_match_id (lexer, "STATISTICS"))
1988 lex_match (lexer, T_EQUALS);
1990 while (lex_token (lexer) != T_ENDCMD
1991 && lex_token (lexer) != T_SLASH)
1993 if (lex_match_id (lexer, "DESCRIPTIVES"))
1995 examine.descriptives = true;
1997 else if (lex_match_id (lexer, "EXTREME"))
2000 if (lex_match (lexer, T_LPAREN))
2002 if (!lex_force_int (lexer))
2004 extr = lex_integer (lexer);
2008 msg (MW, _("%s may not be negative. Using default value (%g)."), "EXTREME", 5.0);
2013 if (! lex_force_match (lexer, T_RPAREN))
2016 examine.disp_extremes = extr;
2018 else if (lex_match_id (lexer, "NONE"))
2021 else if (lex_match (lexer, T_ALL))
2023 if (examine.disp_extremes == 0)
2024 examine.disp_extremes = 5;
2028 lex_error (lexer, NULL);
2033 else if (lex_match_id (lexer, "PERCENTILES"))
2035 percentiles_seen = true;
2036 if (lex_match (lexer, T_LPAREN))
2038 while (lex_is_number (lexer))
2040 double p = lex_number (lexer);
2042 if ( p <= 0 || p >= 100.0)
2045 _("Percentiles must lie in the range (0, 100)"));
2049 examine.n_percentiles++;
2051 xrealloc (examine.ptiles,
2052 sizeof (*examine.ptiles) *
2053 examine.n_percentiles);
2055 examine.ptiles[examine.n_percentiles - 1] = p;
2058 lex_match (lexer, T_COMMA);
2060 if (!lex_force_match (lexer, T_RPAREN))
2064 lex_match (lexer, T_EQUALS);
2066 while (lex_token (lexer) != T_ENDCMD
2067 && lex_token (lexer) != T_SLASH)
2069 if (lex_match_id (lexer, "HAVERAGE"))
2071 examine.pc_alg = PC_HAVERAGE;
2073 else if (lex_match_id (lexer, "WAVERAGE"))
2075 examine.pc_alg = PC_WAVERAGE;
2077 else if (lex_match_id (lexer, "ROUND"))
2079 examine.pc_alg = PC_ROUND;
2081 else if (lex_match_id (lexer, "EMPIRICAL"))
2083 examine.pc_alg = PC_EMPIRICAL;
2085 else if (lex_match_id (lexer, "AEMPIRICAL"))
2087 examine.pc_alg = PC_AEMPIRICAL;
2089 else if (lex_match_id (lexer, "NONE"))
2091 examine.pc_alg = PC_NONE;
2095 lex_error (lexer, NULL);
2100 else if (lex_match_id (lexer, "TOTAL"))
2104 else if (lex_match_id (lexer, "NOTOTAL"))
2106 nototals_seen = true;
2108 else if (lex_match_id (lexer, "MISSING"))
2110 lex_match (lexer, T_EQUALS);
2112 while (lex_token (lexer) != T_ENDCMD
2113 && lex_token (lexer) != T_SLASH)
2115 if (lex_match_id (lexer, "LISTWISE"))
2117 examine.missing_pw = false;
2119 else if (lex_match_id (lexer, "PAIRWISE"))
2121 examine.missing_pw = true;
2123 else if (lex_match_id (lexer, "EXCLUDE"))
2125 examine.dep_excl = MV_ANY;
2127 else if (lex_match_id (lexer, "INCLUDE"))
2129 examine.dep_excl = MV_SYSTEM;
2131 else if (lex_match_id (lexer, "REPORT"))
2133 examine.fctr_excl = MV_NEVER;
2135 else if (lex_match_id (lexer, "NOREPORT"))
2137 examine.fctr_excl = MV_ANY;
2141 lex_error (lexer, NULL);
2146 else if (lex_match_id (lexer, "COMPARE"))
2148 lex_match (lexer, T_EQUALS);
2149 if (lex_match_id (lexer, "VARIABLES"))
2151 examine.boxplot_mode = BP_VARIABLES;
2153 else if (lex_match_id (lexer, "GROUPS"))
2155 examine.boxplot_mode = BP_GROUPS;
2159 lex_error (lexer, NULL);
2163 else if (lex_match_id (lexer, "PLOT"))
2165 lex_match (lexer, T_EQUALS);
2167 while (lex_token (lexer) != T_ENDCMD
2168 && lex_token (lexer) != T_SLASH)
2170 if (lex_match_id (lexer, "BOXPLOT"))
2172 examine.boxplot = true;
2174 else if (lex_match_id (lexer, "NPPLOT"))
2176 examine.npplot = true;
2178 else if (lex_match_id (lexer, "HISTOGRAM"))
2180 examine.histogramplot = true;
2182 else if (lex_match_id (lexer, "SPREADLEVEL"))
2184 examine.spreadlevelplot = true;
2185 examine.sl_power = 0;
2186 if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer))
2188 examine.sl_power = lex_integer (lexer);
2191 if (! lex_force_match (lexer, T_RPAREN))
2195 else if (lex_match_id (lexer, "NONE"))
2197 examine.histogramplot = false;
2198 examine.npplot = false;
2199 examine.boxplot = false;
2201 else if (lex_match (lexer, T_ALL))
2203 examine.histogramplot = true;
2204 examine.npplot = true;
2205 examine.boxplot = true;
2209 lex_error (lexer, NULL);
2212 lex_match (lexer, T_COMMA);
2215 else if (lex_match_id (lexer, "CINTERVAL"))
2217 if ( !lex_force_num (lexer))
2220 examine.conf = lex_number (lexer);
2223 else if (lex_match_id (lexer, "ID"))
2225 lex_match (lexer, T_EQUALS);
2227 examine.id_var = parse_variable_const (lexer, examine.dict);
2231 lex_error (lexer, NULL);
2237 if ( totals_seen && nototals_seen)
2239 msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
2243 /* If totals have been requested or if there are no factors
2244 in this analysis, then the totals need to be included. */
2245 if ( !nototals_seen || examine.n_iacts == 1)
2247 examine.iacts = &iacts_mem[0];
2252 examine.iacts = &iacts_mem[1];
2253 interaction_destroy (iacts_mem[0]);
2257 if ( examine.id_var )
2259 examine.id_idx = var_get_case_index (examine.id_var);
2260 examine.id_width = var_get_width (examine.id_var);
2263 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */
2264 examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */
2265 examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */
2268 if (examine.disp_extremes > 0)
2270 examine.calc_extremes = examine.disp_extremes;
2273 if (examine.descriptives && examine.calc_extremes == 0)
2275 /* Descriptives always displays the max and min */
2276 examine.calc_extremes = 1;
2279 if (percentiles_seen && examine.n_percentiles == 0)
2281 examine.n_percentiles = 7;
2282 examine.ptiles = xcalloc (examine.n_percentiles,
2283 sizeof (*examine.ptiles));
2285 examine.ptiles[0] = 5;
2286 examine.ptiles[1] = 10;
2287 examine.ptiles[2] = 25;
2288 examine.ptiles[3] = 50;
2289 examine.ptiles[4] = 75;
2290 examine.ptiles[5] = 90;
2291 examine.ptiles[6] = 95;
2294 assert (examine.calc_extremes >= examine.disp_extremes);
2296 struct casegrouper *grouper;
2297 struct casereader *group;
2300 grouper = casegrouper_create_splits (proc_open (ds), examine.dict);
2301 while (casegrouper_get_next_group (grouper, &group))
2302 run_examine (&examine, group);
2303 ok = casegrouper_destroy (grouper);
2304 ok = proc_commit (ds) && ok;
2307 caseproto_unref (examine.ex_proto);
2309 for (i = 0; i < examine.n_iacts; ++i)
2310 interaction_destroy (examine.iacts[i]);
2311 free (examine.ptiles);
2312 free (examine.dep_vars);
2313 pool_destroy (examine.pool);
2318 caseproto_unref (examine.ex_proto);
2319 examine.iacts = iacts_mem;
2320 for (i = 0; i < examine.n_iacts; ++i)
2321 interaction_destroy (examine.iacts[i]);
2322 free (examine.dep_vars);
2323 free (examine.ptiles);
2324 pool_destroy (examine.pool);