+++ /dev/null
-/*
- PSPP - a program for statistical analysis.
- Copyright (C) 2012, 2013, 2016, 2019 Free Software Foundation, Inc.
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-*/
-
-#include <config.h>
-
-#include <math.h>
-#include <gsl/gsl_cdf.h>
-
-#include "data/casegrouper.h"
-#include "data/caseproto.h"
-#include "data/casereader.h"
-#include "data/casewriter.h"
-#include "data/dataset.h"
-#include "data/dictionary.h"
-#include "data/format.h"
-#include "data/subcase.h"
-#include "language/command.h"
-#include "language/lexer/lexer.h"
-#include "language/lexer/value-parser.h"
-#include "language/lexer/variable-parser.h"
-#include "libpspp/assertion.h"
-#include "libpspp/message.h"
-#include "libpspp/pool.h"
-#include "math/box-whisker.h"
-#include "math/categoricals.h"
-#include "math/chart-geometry.h"
-#include "math/histogram.h"
-#include "math/interaction.h"
-#include "math/moments.h"
-#include "math/np.h"
-#include "math/order-stats.h"
-#include "math/percentiles.h"
-#include "math/shapiro-wilk.h"
-#include "math/sort.h"
-#include "math/trimmed-mean.h"
-#include "math/tukey-hinges.h"
-#include "output/charts/boxplot.h"
-#include "output/charts/np-plot.h"
-#include "output/charts/plot-hist.h"
-#include "output/charts/spreadlevel-plot.h"
-#include "output/pivot-table.h"
-
-#include "gettext.h"
-#define _(msgid) gettext (msgid)
-#define N_(msgid) msgid
-
-static void
-append_value_name (const struct variable *var, const union value *val, struct string *str)
-{
- var_append_value_name (var, val, str);
- if (var_is_value_missing (var, val))
- ds_put_cstr (str, _(" (missing)"));
-}
-
-enum bp_mode
- {
- BP_GROUPS,
- BP_VARIABLES
- };
-
-/* Indices for the ex_proto member (below) */
-enum
- {
- EX_VAL, /* value */
- EX_ID, /* identity */
- EX_WT /* weight */
- };
-
-
-struct examine
-{
- struct pool *pool;
-
- /* A caseproto used to contain the data subsets under examination,
- see (enum above) */
- struct caseproto *ex_proto;
-
- size_t n_dep_vars;
- const struct variable **dep_vars;
-
- size_t n_iacts;
- struct interaction **iacts;
-
- enum mv_class dep_excl;
- enum mv_class fctr_excl;
-
- const struct dictionary *dict;
-
- struct categoricals *cats;
-
- /* how many extremities to display */
- int disp_extremes;
- int calc_extremes;
- bool descriptives;
-
- double conf;
-
- bool missing_pw;
-
- /* The case index of the ID value (or -1) if not applicable */
- size_t id_idx;
- int id_width;
-
- enum pc_alg pc_alg;
- double *ptiles;
- size_t n_percentiles;
-
- bool plot_histogram;
- bool plot_boxplot;
- bool plot_npplot;
- bool plot_spreadlevel;
- float sl_power;
-
- enum bp_mode boxplot_mode;
-
- const struct variable *id_var;
-
- const struct variable *wv;
-};
-
-struct extremity
-{
- /* The value of this extremity */
- double val;
-
- /* Either the casenumber or the value of the variable specified
- by the /ID subcommand which corresponds to this extremity */
- union value identity;
-};
-
-struct exploratory_stats
-{
- double missing;
- double non_missing;
-
- struct moments *mom;
-
- /* Most operations need a sorted reader/writer */
- struct casewriter *sorted_writer;
- struct casereader *sorted_reader;
-
- struct extremity *minima;
- struct extremity *maxima;
-
- /*
- Minimum should alway equal mimima[0].val.
- Likewise, maximum should alway equal maxima[0].val.
- This redundancy exists as an optimisation effort.
- Some statistics (eg histogram) require early calculation
- of the min and max
- */
- double minimum;
- double maximum;
-
- struct trimmed_mean *trimmed_mean;
- struct percentile *quartiles[3];
- struct percentile **percentiles;
- struct shapiro_wilk *shapiro_wilk;
-
- struct tukey_hinges *hinges;
-
- /* The data for the NP Plots */
- struct np *np;
-
- struct histogram *histogram;
-
- /* The data for the box plots */
- struct box_whisker *box_whisker;
-
- /* Total weight */
- double cc;
-
- /* The minimum weight */
- double cmin;
-};
-
-static void
-show_boxplot_grouped (const struct examine *cmd, int iact_idx)
-{
- int v;
-
- const struct interaction *iact = cmd->iacts[iact_idx];
- const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- double y_min = DBL_MAX;
- double y_max = -DBL_MAX;
- int grp;
- struct boxplot *boxplot;
- struct string title;
- ds_init_empty (&title);
-
- if (iact->n_vars > 0)
- {
- struct string istr;
- ds_init_empty (&istr);
- interaction_to_string (iact, &istr);
- ds_put_format (&title, _("Boxplot of %s vs. %s"),
- var_to_string (cmd->dep_vars[v]),
- ds_cstr (&istr));
- ds_destroy (&istr);
- }
- else
- ds_put_format (&title, _("Boxplot of %s"), var_to_string (cmd->dep_vars[v]));
-
- for (grp = 0; grp < n_cats; ++grp)
- {
- const struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- if (y_min > es[v].minimum)
- y_min = es[v].minimum;
-
- if (y_max < es[v].maximum)
- y_max = es[v].maximum;
- }
-
- boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
-
- ds_destroy (&title);
-
- for (grp = 0; grp < n_cats; ++grp)
- {
- int ivar_idx;
- struct string label;
-
- const struct ccase *c =
- categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
-
- struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- ds_init_empty (&label);
- for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
- {
- struct string l;
- const struct variable *ivar = iact->vars[ivar_idx];
- const union value *val = case_data (c, ivar);
- ds_init_empty (&l);
-
- append_value_name (ivar, val, &l);
- ds_ltrim (&l, ss_cstr (" "));
-
- ds_put_substring (&label, l.ss);
- if (ivar_idx < iact->n_vars - 1)
- ds_put_cstr (&label, "; ");
-
- ds_destroy (&l);
- }
-
- boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label));
- es[v].box_whisker = NULL;
-
- ds_destroy (&label);
- }
-
- boxplot_submit (boxplot);
- }
-}
-
-static void
-show_boxplot_variabled (const struct examine *cmd, int iact_idx)
-{
- int grp;
- const struct interaction *iact = cmd->iacts[iact_idx];
- const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
-
- for (grp = 0; grp < n_cats; ++grp)
- {
- struct boxplot *boxplot;
- int v;
- double y_min = DBL_MAX;
- double y_max = -DBL_MAX;
-
- const struct ccase *c =
- categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp);
-
- struct string title;
- ds_init_empty (&title);
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- const struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- if (y_min > es[v].minimum)
- y_min = es[v].minimum;
-
- if (y_max < es[v].maximum)
- y_max = es[v].maximum;
- }
-
- if (iact->n_vars == 0)
- ds_put_format (&title, _("Boxplot"));
- else
- {
- int ivar_idx;
- struct string label;
- ds_init_empty (&label);
- for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
- {
- const struct variable *ivar = iact->vars[ivar_idx];
- const union value *val = case_data (c, ivar);
-
- ds_put_cstr (&label, var_to_string (ivar));
- ds_put_cstr (&label, " = ");
- append_value_name (ivar, val, &label);
- ds_put_cstr (&label, "; ");
- }
-
- ds_put_format (&title, _("Boxplot of %s"),
- ds_cstr (&label));
-
- ds_destroy (&label);
- }
-
- boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
-
- ds_destroy (&title);
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- boxplot_add_box (boxplot, es[v].box_whisker,
- var_to_string (cmd->dep_vars[v]));
- es[v].box_whisker = NULL;
- }
-
- boxplot_submit (boxplot);
- }
-}
-
-
-static void
-show_npplot (const struct examine *cmd, int iact_idx)
-{
- const struct interaction *iact = cmd->iacts[iact_idx];
- const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
-
- int v;
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- int grp;
- for (grp = 0; grp < n_cats; ++grp)
- {
- struct chart *npp, *dnpp;
- struct casereader *reader;
- struct np *np;
-
- int ivar_idx;
- const struct ccase *c =
- categoricals_get_case_by_category_real (cmd->cats,
- iact_idx, grp);
-
- const struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- struct string label;
- ds_init_cstr (&label,
- var_to_string (cmd->dep_vars[v]));
-
- if (iact->n_vars > 0)
- {
- ds_put_cstr (&label, " (");
- for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
- {
- const struct variable *ivar = iact->vars[ivar_idx];
- const union value *val = case_data (c, ivar);
-
- ds_put_cstr (&label, var_to_string (ivar));
- ds_put_cstr (&label, " = ");
- append_value_name (ivar, val, &label);
- ds_put_cstr (&label, "; ");
-
- }
- ds_put_cstr (&label, ")");
- }
-
- np = es[v].np;
- reader = casewriter_make_reader (np->writer);
- np->writer = NULL;
-
- npp = np_plot_create (np, reader, ds_cstr (&label));
- dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
-
- if (npp == NULL || dnpp == NULL)
- {
- msg (MW, _("Not creating NP plot because data set is empty."));
- chart_unref (npp);
- chart_unref (dnpp);
- }
- else
- {
- chart_submit (npp);
- chart_submit (dnpp);
- }
- casereader_destroy (reader);
-
- ds_destroy (&label);
- }
- }
-}
-
-static void
-show_spreadlevel (const struct examine *cmd, int iact_idx)
-{
- const struct interaction *iact = cmd->iacts[iact_idx];
- const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
-
- int v;
-
- /* Spreadlevel when there are no levels is not useful */
- if (iact->n_vars == 0)
- return;
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- int grp;
- struct chart *sl;
-
- struct string label;
- ds_init_cstr (&label,
- var_to_string (cmd->dep_vars[v]));
-
- if (iact->n_vars > 0)
- {
- ds_put_cstr (&label, " (");
- interaction_to_string (iact, &label);
- ds_put_cstr (&label, ")");
- }
-
- sl = spreadlevel_plot_create (ds_cstr (&label), cmd->sl_power);
-
- for (grp = 0; grp < n_cats; ++grp)
- {
- const struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- double median = percentile_calculate (es[v].quartiles[1], cmd->pc_alg);
-
- double iqr = percentile_calculate (es[v].quartiles[2], cmd->pc_alg) -
- percentile_calculate (es[v].quartiles[0], cmd->pc_alg);
-
- spreadlevel_plot_add (sl, iqr, median);
- }
-
- if (sl == NULL)
- msg (MW, _("Not creating spreadlevel chart for %s"), ds_cstr (&label));
- else
- chart_submit (sl);
-
- ds_destroy (&label);
- }
-}
-
-
-static void
-show_histogram (const struct examine *cmd, int iact_idx)
-{
- const struct interaction *iact = cmd->iacts[iact_idx];
- const size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
-
- int v;
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- int grp;
- for (grp = 0; grp < n_cats; ++grp)
- {
- double n, mean, var;
- int ivar_idx;
- const struct ccase *c =
- categoricals_get_case_by_category_real (cmd->cats,
- iact_idx, grp);
-
- const struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp);
-
- struct string label;
-
- if (es[v].histogram == NULL)
- continue;
-
- ds_init_cstr (&label,
- var_to_string (cmd->dep_vars[v]));
-
- if (iact->n_vars > 0)
- {
- ds_put_cstr (&label, " (");
- for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx)
- {
- const struct variable *ivar = iact->vars[ivar_idx];
- const union value *val = case_data (c, ivar);
-
- ds_put_cstr (&label, var_to_string (ivar));
- ds_put_cstr (&label, " = ");
- append_value_name (ivar, val, &label);
- ds_put_cstr (&label, "; ");
-
- }
- ds_put_cstr (&label, ")");
- }
-
-
- moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
-
- chart_submit
- (histogram_chart_create (es[v].histogram->gsl_hist,
- ds_cstr (&label), n, mean,
- sqrt (var), false));
-
-
- ds_destroy (&label);
- }
- }
-}
-
-static struct pivot_value *
-new_value_with_missing_footnote (const struct variable *var,
- const union value *value,
- struct pivot_footnote *missing_footnote)
-{
- struct pivot_value *pv = pivot_value_new_var_value (var, value);
- if (var_is_value_missing (var, value) == MV_USER)
- pivot_value_add_footnote (pv, missing_footnote);
- return pv;
-}
-
-static void
-create_interaction_dimensions (struct pivot_table *table,
- const struct categoricals *cats,
- const struct interaction *iact,
- struct pivot_footnote *missing_footnote)
-{
- for (size_t i = iact->n_vars; i-- > 0;)
- {
- const struct variable *var = iact->vars[i];
- struct pivot_dimension *d = pivot_dimension_create__ (
- table, PIVOT_AXIS_ROW, pivot_value_new_variable (var));
- d->root->show_label = true;
-
- size_t n;
- union value *values = categoricals_get_var_values (cats, var, &n);
- for (size_t j = 0; j < n; j++)
- pivot_category_create_leaf (
- d->root, new_value_with_missing_footnote (var, &values[j],
- missing_footnote));
- }
-}
-
-static struct pivot_footnote *
-create_missing_footnote (struct pivot_table *table)
-{
- return pivot_table_create_footnote (
- table, pivot_value_new_text (N_("User-missing value.")));
-}
-
-static void
-percentiles_report (const struct examine *cmd, int iact_idx)
-{
- struct pivot_table *table = pivot_table_create (N_("Percentiles"));
-
- struct pivot_dimension *percentiles = pivot_dimension_create (
- table, PIVOT_AXIS_COLUMN, N_("Percentiles"));
- percentiles->root->show_label = true;
- for (int i = 0; i < cmd->n_percentiles; ++i)
- pivot_category_create_leaf (
- percentiles->root,
- pivot_value_new_user_text_nocopy (xasprintf ("%g", cmd->ptiles[i])));
-
- pivot_dimension_create (table, PIVOT_AXIS_ROW, N_("Statistics"),
- N_("Weighted Average"), N_("Tukey's Hinges"));
-
- const struct interaction *iact = cmd->iacts[iact_idx];
- struct pivot_footnote *missing_footnote = create_missing_footnote (table);
- create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
-
- struct pivot_dimension *dep_dim = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
-
- size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
-
- size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
- for (size_t v = 0; v < cmd->n_dep_vars; ++v)
- {
- indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
- dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
-
- for (size_t i = 0; i < n_cats; ++i)
- {
- for (size_t j = 0; j < iact->n_vars; j++)
- {
- int idx = categoricals_get_value_index_by_category_real (
- cmd->cats, iact_idx, i, j);
- indexes[table->n_dimensions - 2 - j] = idx;
- }
-
- const struct exploratory_stats *ess
- = categoricals_get_user_data_by_category_real (
- cmd->cats, iact_idx, i);
- const struct exploratory_stats *es = ess + v;
-
- double hinges[3];
- tukey_hinges_calculate (es->hinges, hinges);
-
- for (size_t pc_idx = 0; pc_idx < cmd->n_percentiles; ++pc_idx)
- {
- indexes[0] = pc_idx;
-
- indexes[1] = 0;
- double value = percentile_calculate (es->percentiles[pc_idx],
- cmd->pc_alg);
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (value));
-
- double hinge = (cmd->ptiles[pc_idx] == 25.0 ? hinges[0]
- : cmd->ptiles[pc_idx] == 50.0 ? hinges[1]
- : cmd->ptiles[pc_idx] == 75.0 ? hinges[2]
- : SYSMIS);
- if (hinge != SYSMIS)
- {
- indexes[1] = 1;
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (hinge));
- }
- }
- }
-
- }
- free (indexes);
-
- pivot_table_submit (table);
-}
-
-static void
-normality_report (const struct examine *cmd, int iact_idx)
-{
- struct pivot_table *table = pivot_table_create (N_("Tests of Normality"));
-
- struct pivot_dimension *test =
- pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Shapiro-Wilk"),
- N_("Statistic"),
- N_("df"), PIVOT_RC_COUNT,
- N_("Sig."));
-
- test->root->show_label = true;
-
- const struct interaction *iact = cmd->iacts[iact_idx];
- struct pivot_footnote *missing_footnote = create_missing_footnote (table);
- create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
-
- struct pivot_dimension *dep_dim = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
-
- size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
-
- size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
- for (size_t v = 0; v < cmd->n_dep_vars; ++v)
- {
- indexes[table->n_dimensions - 1] =
- pivot_category_create_leaf (dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
-
- for (size_t i = 0; i < n_cats; ++i)
- {
- indexes[1] = i;
-
- const struct exploratory_stats *es
- = categoricals_get_user_data_by_category_real (
- cmd->cats, iact_idx, i);
-
- struct shapiro_wilk *sw = es[v].shapiro_wilk;
-
- if (sw == NULL)
- continue;
-
- double w = shapiro_wilk_calculate (sw);
-
- int j = 0;
- indexes[0] = j;
-
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (w));
-
- indexes[0] = ++j;
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (sw->n));
-
- indexes[0] = ++j;
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (shapiro_wilk_significance (sw->n, w)));
- }
- }
-
- free (indexes);
-
- pivot_table_submit (table);
-}
-
-
-static void
-descriptives_report (const struct examine *cmd, int iact_idx)
-{
- struct pivot_table *table = pivot_table_create (N_("Descriptives"));
-
- pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Aspect"),
- N_("Statistic"), N_("Std. Error"));
-
- struct pivot_dimension *statistics = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Statistics"), N_("Mean"));
- struct pivot_category *interval = pivot_category_create_group__ (
- statistics->root,
- pivot_value_new_text_format (N_("%g%% Confidence Interval for Mean"),
- cmd->conf * 100.0));
- pivot_category_create_leaves (interval, N_("Lower Bound"),
- N_("Upper Bound"));
- pivot_category_create_leaves (
- statistics->root, N_("5% Trimmed Mean"), N_("Median"), N_("Variance"),
- N_("Std. Deviation"), N_("Minimum"), N_("Maximum"), N_("Range"),
- N_("Interquartile Range"), N_("Skewness"), N_("Kurtosis"));
-
- const struct interaction *iact = cmd->iacts[iact_idx];
- struct pivot_footnote *missing_footnote = create_missing_footnote (table);
- create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
-
- struct pivot_dimension *dep_dim = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
-
- size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
-
- size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
- for (size_t v = 0; v < cmd->n_dep_vars; ++v)
- {
- indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
- dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
-
- for (size_t i = 0; i < n_cats; ++i)
- {
- for (size_t j = 0; j < iact->n_vars; j++)
- {
- int idx = categoricals_get_value_index_by_category_real (
- cmd->cats, iact_idx, i, j);
- indexes[table->n_dimensions - 2 - j] = idx;
- }
-
- const struct exploratory_stats *ess
- = categoricals_get_user_data_by_category_real (cmd->cats,
- iact_idx, i);
- const struct exploratory_stats *es = ess + v;
-
- double m0, m1, m2, m3, m4;
- moments_calculate (es->mom, &m0, &m1, &m2, &m3, &m4);
- double tval = gsl_cdf_tdist_Qinv ((1.0 - cmd->conf) / 2.0, m0 - 1.0);
-
- struct entry
- {
- int stat_idx;
- int aspect_idx;
- double x;
- }
- entries[] = {
- { 0, 0, m1 },
- { 0, 1, calc_semean (m2, m0) },
- { 1, 0, m1 - tval * calc_semean (m2, m0) },
- { 2, 0, m1 + tval * calc_semean (m2, m0) },
- { 3, 0, trimmed_mean_calculate (es->trimmed_mean) },
- { 4, 0, percentile_calculate (es->quartiles[1], cmd->pc_alg) },
- { 5, 0, m2 },
- { 6, 0, sqrt (m2) },
- { 7, 0, es->minima[0].val },
- { 8, 0, es->maxima[0].val },
- { 9, 0, es->maxima[0].val - es->minima[0].val },
- { 10, 0, (percentile_calculate (es->quartiles[2], cmd->pc_alg) -
- percentile_calculate (es->quartiles[0], cmd->pc_alg)) },
- { 11, 0, m3 },
- { 11, 1, calc_seskew (m0) },
- { 12, 0, m4 },
- { 12, 1, calc_sekurt (m0) },
- };
- for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
- {
- const struct entry *e = &entries[j];
- indexes[0] = e->aspect_idx;
- indexes[1] = e->stat_idx;
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (e->x));
- }
- }
- }
-
- free (indexes);
-
- pivot_table_submit (table);
-}
-
-
-static void
-extremes_report (const struct examine *cmd, int iact_idx)
-{
- struct pivot_table *table = pivot_table_create (N_("Extreme Values"));
-
- struct pivot_dimension *statistics = pivot_dimension_create (
- table, PIVOT_AXIS_COLUMN, N_("Statistics"));
- pivot_category_create_leaf (statistics->root,
- (cmd->id_var
- ? pivot_value_new_variable (cmd->id_var)
- : pivot_value_new_text (N_("Case Number"))));
- pivot_category_create_leaves (statistics->root, N_("Value"));
-
- struct pivot_dimension *order = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Order"));
- for (size_t i = 0; i < cmd->disp_extremes; i++)
- pivot_category_create_leaf (order->root, pivot_value_new_integer (i + 1));
-
- pivot_dimension_create (table, PIVOT_AXIS_ROW,
- /* TRANSLATORS: This is a noun, not an adjective. */
- N_("Extreme"),
- N_("Highest"), N_("Lowest"));
-
- const struct interaction *iact = cmd->iacts[iact_idx];
- struct pivot_footnote *missing_footnote = create_missing_footnote (table);
- create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
-
- struct pivot_dimension *dep_dim = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
-
- size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
-
- size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
- for (size_t v = 0; v < cmd->n_dep_vars; ++v)
- {
- indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
- dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
-
- for (size_t i = 0; i < n_cats; ++i)
- {
- for (size_t j = 0; j < iact->n_vars; j++)
- {
- int idx = categoricals_get_value_index_by_category_real (
- cmd->cats, iact_idx, i, j);
- indexes[table->n_dimensions - 2 - j] = idx;
- }
-
- const struct exploratory_stats *ess
- = categoricals_get_user_data_by_category_real (cmd->cats,
- iact_idx, i);
- const struct exploratory_stats *es = ess + v;
-
- for (int e = 0; e < cmd->disp_extremes; ++e)
- {
- indexes[1] = e;
-
- for (size_t j = 0; j < 2; j++)
- {
- const struct extremity *extremity
- = j ? &es->minima[e] : &es->maxima[e];
- indexes[2] = j;
-
- indexes[0] = 0;
- pivot_table_put (
- table, indexes, table->n_dimensions,
- (cmd->id_var
- ? new_value_with_missing_footnote (cmd->id_var,
- &extremity->identity,
- missing_footnote)
- : pivot_value_new_integer (extremity->identity.f)));
-
- indexes[0] = 1;
- union value val = { .f = extremity->val };
- pivot_table_put (
- table, indexes, table->n_dimensions,
- new_value_with_missing_footnote (cmd->dep_vars[v], &val,
- missing_footnote));
- }
- }
- }
- }
- free (indexes);
-
- pivot_table_submit (table);
-}
-
-
-static void
-summary_report (const struct examine *cmd, int iact_idx)
-{
- struct pivot_table *table = pivot_table_create (
- N_("Case Processing Summary"));
- pivot_table_set_weight_var (table, dict_get_weight (cmd->dict));
-
- pivot_dimension_create (table, PIVOT_AXIS_COLUMN, N_("Statistics"),
- N_("N"), PIVOT_RC_COUNT,
- N_("Percent"), PIVOT_RC_PERCENT);
- struct pivot_dimension *cases = pivot_dimension_create (
- table, PIVOT_AXIS_COLUMN, N_("Cases"), N_("Valid"), N_("Missing"),
- N_("Total"));
- cases->root->show_label = true;
-
- const struct interaction *iact = cmd->iacts[iact_idx];
- struct pivot_footnote *missing_footnote = create_missing_footnote (table);
- create_interaction_dimensions (table, cmd->cats, iact, missing_footnote);
-
- struct pivot_dimension *dep_dim = pivot_dimension_create (
- table, PIVOT_AXIS_ROW, N_("Dependent Variables"));
-
- size_t *indexes = xnmalloc (table->n_dimensions, sizeof *indexes);
-
- size_t n_cats = categoricals_n_count (cmd->cats, iact_idx);
- for (size_t v = 0; v < cmd->n_dep_vars; ++v)
- {
- indexes[table->n_dimensions - 1] = pivot_category_create_leaf (
- dep_dim->root, pivot_value_new_variable (cmd->dep_vars[v]));
-
- for (size_t i = 0; i < n_cats; ++i)
- {
- for (size_t j = 0; j < iact->n_vars; j++)
- {
- int idx = categoricals_get_value_index_by_category_real (
- cmd->cats, iact_idx, i, j);
- indexes[table->n_dimensions - 2 - j] = idx;
- }
-
- const struct exploratory_stats *es
- = categoricals_get_user_data_by_category_real (
- cmd->cats, iact_idx, i);
-
- double total = es[v].missing + es[v].non_missing;
- struct entry
- {
- int stat_idx;
- int case_idx;
- double x;
- }
- entries[] = {
- { 0, 0, es[v].non_missing },
- { 1, 0, 100.0 * es[v].non_missing / total },
- { 0, 1, es[v].missing },
- { 1, 1, 100.0 * es[v].missing / total },
- { 0, 2, total },
- { 1, 2, 100.0 },
- };
- for (size_t j = 0; j < sizeof entries / sizeof *entries; j++)
- {
- const struct entry *e = &entries[j];
- indexes[0] = e->stat_idx;
- indexes[1] = e->case_idx;
- pivot_table_put (table, indexes, table->n_dimensions,
- pivot_value_new_number (e->x));
- }
- }
- }
-
- free (indexes);
-
- pivot_table_submit (table);
-}
-
-/* Attempt to parse an interaction from LEXER */
-static struct interaction *
-parse_interaction (struct lexer *lexer, struct examine *ex)
-{
- const struct variable *v;
- if (!lex_match_variable (lexer, ex->dict, &v))
- return NULL;
-
- struct interaction *iact = interaction_create (v);
- while (lex_match (lexer, T_BY))
- {
- if (!lex_match_variable (lexer, ex->dict, &v))
- {
- interaction_destroy (iact);
- return NULL;
- }
- interaction_add_variable (iact, v);
- }
- lex_match (lexer, T_COMMA);
- return iact;
-}
-
-
-static void *
-create_n (const void *aux1, void *aux2 UNUSED)
-{
- int v;
-
- const struct examine *examine = aux1;
- struct exploratory_stats *es = pool_calloc (examine->pool, examine->n_dep_vars, sizeof (*es));
- struct subcase ordering;
- subcase_init (&ordering, 0, 0, SC_ASCEND);
-
- for (v = 0; v < examine->n_dep_vars; v++)
- {
- es[v].sorted_writer = sort_create_writer (&ordering, examine->ex_proto);
- es[v].sorted_reader = NULL;
-
- es[v].mom = moments_create (MOMENT_KURTOSIS);
- es[v].cmin = DBL_MAX;
-
- es[v].maximum = -DBL_MAX;
- es[v].minimum = DBL_MAX;
- }
-
- subcase_uninit (&ordering);
- return es;
-}
-
-static void
-update_n (const void *aux1, void *aux2 UNUSED, void *user_data,
- const struct ccase *c, double weight)
-{
- int v;
- const struct examine *examine = aux1;
- struct exploratory_stats *es = user_data;
-
- bool this_case_is_missing = false;
- /* LISTWISE missing must be dealt with here */
- if (!examine->missing_pw)
- {
- for (v = 0; v < examine->n_dep_vars; v++)
- {
- const struct variable *var = examine->dep_vars[v];
-
- if (var_is_value_missing (var, case_data (c, var))
- & examine->dep_excl)
- {
- es[v].missing += weight;
- this_case_is_missing = true;
- }
- }
- }
-
- if (this_case_is_missing)
- return;
-
- for (v = 0; v < examine->n_dep_vars; v++)
- {
- struct ccase *outcase;
- const struct variable *var = examine->dep_vars[v];
- const double x = case_num (c, var);
-
- if (var_is_value_missing (var, case_data (c, var)) & examine->dep_excl)
- {
- es[v].missing += weight;
- continue;
- }
-
- outcase = case_create (examine->ex_proto);
-
- if (x > es[v].maximum)
- es[v].maximum = x;
-
- if (x < es[v].minimum)
- es[v].minimum = x;
-
- es[v].non_missing += weight;
-
- moments_pass_one (es[v].mom, x, weight);
-
- /* Save the value and the ID to the writer */
- assert (examine->id_idx != -1);
- *case_num_rw_idx (outcase, EX_VAL) = x;
- value_copy (case_data_rw_idx (outcase, EX_ID),
- case_data_idx (c, examine->id_idx), examine->id_width);
-
- *case_num_rw_idx (outcase, EX_WT) = weight;
-
- es[v].cc += weight;
-
- if (es[v].cmin > weight)
- es[v].cmin = weight;
-
- casewriter_write (es[v].sorted_writer, outcase);
- }
-}
-
-static void
-calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data)
-{
- int v;
- const struct examine *examine = aux1;
- struct exploratory_stats *es = user_data;
-
- for (v = 0; v < examine->n_dep_vars; v++)
- {
- int i;
- casenumber imin = 0;
- casenumber imax;
- struct casereader *reader;
- struct ccase *c;
-
- if (examine->plot_histogram && es[v].non_missing > 0)
- {
- /* Sturges Rule */
- double bin_width = fabs (es[v].minimum - es[v].maximum)
- / (1 + log2 (es[v].cc));
-
- es[v].histogram =
- histogram_create (bin_width, es[v].minimum, es[v].maximum);
- }
-
- es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer);
- es[v].sorted_writer = NULL;
-
- imax = casereader_get_n_cases (es[v].sorted_reader);
-
- es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima));
- es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima));
- for (i = 0; i < examine->calc_extremes; ++i)
- {
- value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width);
- value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width);
- }
-
- bool warn = true;
- for (reader = casereader_clone (es[v].sorted_reader);
- (c = casereader_read (reader)) != NULL; case_unref (c))
- {
- const double val = case_num_idx (c, EX_VAL);
- double wt = case_num_idx (c, EX_WT);
- wt = var_force_valid_weight (examine->wv, wt, &warn);
-
- moments_pass_two (es[v].mom, val, wt);
-
- if (es[v].histogram)
- histogram_add (es[v].histogram, val, wt);
-
- if (imin < examine->calc_extremes)
- {
- int x;
- for (x = imin; x < examine->calc_extremes; ++x)
- {
- struct extremity *min = &es[v].minima[x];
- min->val = val;
- value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width);
- }
- imin ++;
- }
-
- imax --;
- if (imax < examine->calc_extremes)
- {
- int x;
-
- for (x = imax; x < imax + 1; ++x)
- {
- struct extremity *max;
-
- if (x >= examine->calc_extremes)
- break;
-
- max = &es[v].maxima[x];
- max->val = val;
- value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width);
- }
- }
- }
- casereader_destroy (reader);
-
- if (examine->calc_extremes > 0 && es[v].non_missing > 0)
- {
- assert (es[v].minima[0].val == es[v].minimum);
- assert (es[v].maxima[0].val == es[v].maximum);
- }
-
- {
- const int n_os = 5 + examine->n_percentiles;
- es[v].percentiles = pool_calloc (examine->pool, examine->n_percentiles, sizeof (*es[v].percentiles));
-
- es[v].trimmed_mean = trimmed_mean_create (es[v].cc, 0.05);
- es[v].shapiro_wilk = NULL;
-
- struct order_stats **os = XCALLOC (n_os, struct order_stats *);
- os[0] = &es[v].trimmed_mean->parent;
-
- es[v].quartiles[0] = percentile_create (0.25, es[v].cc);
- es[v].quartiles[1] = percentile_create (0.5, es[v].cc);
- es[v].quartiles[2] = percentile_create (0.75, es[v].cc);
-
- os[1] = &es[v].quartiles[0]->parent;
- os[2] = &es[v].quartiles[1]->parent;
- os[3] = &es[v].quartiles[2]->parent;
-
- es[v].hinges = tukey_hinges_create (es[v].cc, es[v].cmin);
- os[4] = &es[v].hinges->parent;
-
- for (i = 0; i < examine->n_percentiles; ++i)
- {
- es[v].percentiles[i] = percentile_create (examine->ptiles[i] / 100.00, es[v].cc);
- os[5 + i] = &es[v].percentiles[i]->parent;
- }
-
- order_stats_accumulate_idx (os, n_os,
- casereader_clone (es[v].sorted_reader),
- EX_WT, EX_VAL);
-
- free (os);
- }
-
- if (examine->plot_boxplot)
- {
- struct order_stats *os;
-
- es[v].box_whisker = box_whisker_create (es[v].hinges,
- EX_ID, examine->id_var);
-
- os = &es[v].box_whisker->parent;
- order_stats_accumulate_idx (&os, 1,
- casereader_clone (es[v].sorted_reader),
- EX_WT, EX_VAL);
- }
-
- if (examine->plot_boxplot || examine->plot_histogram
- || examine->plot_npplot || examine->plot_spreadlevel)
- {
- double mean;
-
- moments_calculate (es[v].mom, NULL, &mean, NULL, NULL, NULL);
-
- es[v].shapiro_wilk = shapiro_wilk_create (es[v].non_missing, mean);
-
- if (es[v].shapiro_wilk)
- {
- struct order_stats *os = &es[v].shapiro_wilk->parent;
- order_stats_accumulate_idx (&os, 1,
- casereader_clone (es[v].sorted_reader),
- EX_WT, EX_VAL);
- }
- }
-
- if (examine->plot_npplot)
- {
- double n, mean, var;
- struct order_stats *os;
-
- moments_calculate (es[v].mom, &n, &mean, &var, NULL, NULL);
-
- es[v].np = np_create (n, mean, var);
-
- os = &es[v].np->parent;
-
- order_stats_accumulate_idx (&os, 1,
- casereader_clone (es[v].sorted_reader),
- EX_WT, EX_VAL);
- }
-
- }
-}
-
-static void
-cleanup_exploratory_stats (struct examine *cmd)
-{
- int i;
- for (i = 0; i < cmd->n_iacts; ++i)
- {
- int v;
- const size_t n_cats = categoricals_n_count (cmd->cats, i);
-
- for (v = 0; v < cmd->n_dep_vars; ++v)
- {
- int grp;
- for (grp = 0; grp < n_cats; ++grp)
- {
- int q;
- const struct exploratory_stats *es =
- categoricals_get_user_data_by_category_real (cmd->cats, i, grp);
-
- struct order_stats *os = &es[v].hinges->parent;
- struct statistic *stat = &os->parent;
- stat->destroy (stat);
-
- for (q = 0; q < 3; q++)
- {
- os = &es[v].quartiles[q]->parent;
- stat = &os->parent;
- stat->destroy (stat);
- }
-
- for (q = 0; q < cmd->n_percentiles; q++)
- {
- os = &es[v].percentiles[q]->parent;
- stat = &os->parent;
- stat->destroy (stat);
- }
-
- if (es[v].shapiro_wilk)
- {
- stat = &es[v].shapiro_wilk->parent.parent;
- stat->destroy (stat);
- }
-
- os = &es[v].trimmed_mean->parent;
- stat = &os->parent;
- stat->destroy (stat);
-
- os = &es[v].np->parent;
- if (os)
- {
- stat = &os->parent;
- stat->destroy (stat);
- }
-
- statistic_destroy (&es[v].histogram->parent);
- moments_destroy (es[v].mom);
-
- if (es[v].box_whisker)
- {
- stat = &es[v].box_whisker->parent.parent;
- stat->destroy (stat);
- }
-
- casereader_destroy (es[v].sorted_reader);
- }
- }
- }
-}
-
-
-static void
-run_examine (struct examine *cmd, struct casereader *input)
-{
- int i;
- struct ccase *c;
- struct casereader *reader;
-
- struct payload payload;
- payload.create = create_n;
- payload.update = update_n;
- payload.calculate = calculate_n;
- payload.destroy = NULL;
-
- cmd->wv = dict_get_weight (cmd->dict);
-
- cmd->cats
- = categoricals_create (cmd->iacts, cmd->n_iacts, cmd->wv, cmd->fctr_excl);
-
- categoricals_set_payload (cmd->cats, &payload, cmd, NULL);
-
- if (cmd->id_var == NULL)
- {
- struct ccase *c = casereader_peek (input, 0);
-
- cmd->id_idx = case_get_n_values (c);
- input = casereader_create_arithmetic_sequence (input, 1.0, 1.0);
-
- case_unref (c);
- }
-
- for (reader = input;
- (c = casereader_read (reader)) != NULL; case_unref (c))
- {
- categoricals_update (cmd->cats, c);
- }
- casereader_destroy (reader);
- categoricals_done (cmd->cats);
-
- for (i = 0; i < cmd->n_iacts; ++i)
- {
- summary_report (cmd, i);
-
- const size_t n_cats = categoricals_n_count (cmd->cats, i);
- if (n_cats == 0)
- continue;
-
- if (cmd->disp_extremes > 0)
- extremes_report (cmd, i);
-
- if (cmd->n_percentiles > 0)
- percentiles_report (cmd, i);
-
- if (cmd->plot_boxplot)
- {
- switch (cmd->boxplot_mode)
- {
- case BP_GROUPS:
- show_boxplot_grouped (cmd, i);
- break;
- case BP_VARIABLES:
- show_boxplot_variabled (cmd, i);
- break;
- default:
- NOT_REACHED ();
- break;
- }
- }
-
- if (cmd->plot_histogram)
- show_histogram (cmd, i);
-
- if (cmd->plot_npplot)
- show_npplot (cmd, i);
-
- if (cmd->plot_spreadlevel)
- show_spreadlevel (cmd, i);
-
- if (cmd->descriptives)
- descriptives_report (cmd, i);
-
- if (cmd->plot_histogram || cmd->plot_npplot
- || cmd->plot_spreadlevel || cmd->plot_boxplot)
- normality_report (cmd, i);
- }
-
- cleanup_exploratory_stats (cmd);
- categoricals_destroy (cmd->cats);
-}
-
-static void
-add_interaction (struct examine *examine, struct interaction *iact,
- size_t *allocated_iacts)
-{
- if (examine->n_iacts >= *allocated_iacts)
- examine->iacts = pool_2nrealloc (examine->pool, examine->iacts,
- allocated_iacts, sizeof *examine->iacts);
- examine->iacts[examine->n_iacts++] = iact;
-}
-
-int
-cmd_examine (struct lexer *lexer, struct dataset *ds)
-{
- bool nototals_seen = false;
- bool totals_seen = false;
-
- bool percentiles_seen = false;
-
- size_t allocated_iacts = 0;
- struct examine examine = {
- .pool = pool_create (),
- .dict = dataset_dict (ds),
-
- .conf = 0.95,
- .pc_alg = PC_HAVERAGE,
- .id_idx = -1,
- .boxplot_mode = BP_GROUPS,
-
- .ex_proto = caseproto_create (),
-
- .dep_excl = MV_ANY,
- .fctr_excl = MV_ANY,
- };
-
- /* Allocate space for the first interaction.
- This is interaction is an empty one (for the totals).
- If no totals are requested, we will simply ignore this
- interaction.
- */
- add_interaction (&examine, interaction_create (NULL), &allocated_iacts);
-
- /* Accept an optional, completely pointless "/VARIABLES=" */
- lex_match (lexer, T_SLASH);
- if (lex_match_id (lexer, "VARIABLES") && !lex_force_match (lexer, T_EQUALS))
- goto error;
-
- if (!parse_variables_const (lexer, examine.dict,
- &examine.dep_vars, &examine.n_dep_vars,
- PV_NO_DUPLICATE | PV_NUMERIC))
- goto error;
-
- if (lex_match (lexer, T_BY))
- {
- for (;;)
- {
- struct interaction *iact = parse_interaction (lexer, &examine);
- if (!iact)
- break;
-
- add_interaction (&examine, iact, &allocated_iacts);
- }
- }
-
- int nototals_ofs = 0;
- while (lex_token (lexer) != T_ENDCMD)
- {
- lex_match (lexer, T_SLASH);
-
- if (lex_match_id (lexer, "STATISTICS"))
- {
- lex_match (lexer, T_EQUALS);
-
- while (lex_token (lexer) != T_ENDCMD
- && lex_token (lexer) != T_SLASH)
- {
- if (lex_match_id (lexer, "DESCRIPTIVES"))
- examine.descriptives = true;
- else if (lex_match_id (lexer, "EXTREME"))
- {
- int extr = 5;
- if (lex_match (lexer, T_LPAREN))
- {
- if (!lex_force_int_range (lexer, "EXTREME", 0, INT_MAX))
- goto error;
- extr = lex_integer (lexer);
-
- lex_get (lexer);
- if (!lex_force_match (lexer, T_RPAREN))
- goto error;
- }
- examine.disp_extremes = extr;
- }
- else if (lex_match_id (lexer, "NONE"))
- {
- }
- else if (lex_match (lexer, T_ALL))
- {
- if (examine.disp_extremes == 0)
- examine.disp_extremes = 5;
- }
- else
- {
- lex_error_expecting (lexer, "DESCRIPTIVES", "EXTREME",
- "NONE", "ALL");
- goto error;
- }
- }
- }
- else if (lex_match_id (lexer, "PERCENTILES"))
- {
- percentiles_seen = true;
- if (lex_match (lexer, T_LPAREN))
- {
- size_t allocated_percentiles = examine.n_percentiles;
- while (lex_is_number (lexer))
- {
- if (!lex_force_num_range_open (lexer, "PERCENTILES", 0, 100))
- goto error;
- double p = lex_number (lexer);
-
- if (examine.n_percentiles >= allocated_percentiles)
- examine.ptiles = x2nrealloc (examine.ptiles,
- &allocated_percentiles,
- sizeof *examine.ptiles);
- examine.ptiles[examine.n_percentiles++] = p;
-
- lex_get (lexer);
- lex_match (lexer, T_COMMA);
- }
- if (!lex_force_match (lexer, T_RPAREN))
- goto error;
- }
-
- lex_match (lexer, T_EQUALS);
-
- while (lex_token (lexer) != T_ENDCMD
- && lex_token (lexer) != T_SLASH)
- {
- if (lex_match_id (lexer, "HAVERAGE"))
- examine.pc_alg = PC_HAVERAGE;
- else if (lex_match_id (lexer, "WAVERAGE"))
- examine.pc_alg = PC_WAVERAGE;
- else if (lex_match_id (lexer, "ROUND"))
- examine.pc_alg = PC_ROUND;
- else if (lex_match_id (lexer, "EMPIRICAL"))
- examine.pc_alg = PC_EMPIRICAL;
- else if (lex_match_id (lexer, "AEMPIRICAL"))
- examine.pc_alg = PC_AEMPIRICAL;
- else if (lex_match_id (lexer, "NONE"))
- examine.pc_alg = PC_NONE;
- else
- {
- lex_error_expecting (lexer, "HAVERAGE", "WAVERAGE",
- "ROUND", "EMPIRICAL", "AEMPIRICAL",
- "NONE");
- goto error;
- }
- }
- }
- else if (lex_match_id (lexer, "TOTAL"))
- totals_seen = true;
- else if (lex_match_id (lexer, "NOTOTAL"))
- {
- nototals_seen = true;
- nototals_ofs = lex_ofs (lexer) - 1;
- }
- else if (lex_match_id (lexer, "MISSING"))
- {
- lex_match (lexer, T_EQUALS);
-
- while (lex_token (lexer) != T_ENDCMD
- && lex_token (lexer) != T_SLASH)
- {
- if (lex_match_id (lexer, "LISTWISE"))
- examine.missing_pw = false;
- else if (lex_match_id (lexer, "PAIRWISE"))
- examine.missing_pw = true;
- else if (lex_match_id (lexer, "EXCLUDE"))
- examine.dep_excl = MV_ANY;
- else if (lex_match_id (lexer, "INCLUDE"))
- examine.dep_excl = MV_SYSTEM;
- else if (lex_match_id (lexer, "REPORT"))
- examine.fctr_excl = 0;
- else if (lex_match_id (lexer, "NOREPORT"))
- examine.fctr_excl = MV_ANY;
- else
- {
- lex_error_expecting (lexer, "LISTWISE", "PAIRWISE",
- "EXCLUDE", "INCLUDE", "REPORT",
- "NOREPORT");
- goto error;
- }
- }
- }
- else if (lex_match_id (lexer, "COMPARE"))
- {
- lex_match (lexer, T_EQUALS);
- if (lex_match_id (lexer, "VARIABLES"))
- examine.boxplot_mode = BP_VARIABLES;
- else if (lex_match_id (lexer, "GROUPS"))
- examine.boxplot_mode = BP_GROUPS;
- else
- {
- lex_error_expecting (lexer, "VARIABLES", "GROUPS");
- goto error;
- }
- }
- else if (lex_match_id (lexer, "PLOT"))
- {
- lex_match (lexer, T_EQUALS);
-
- while (lex_token (lexer) != T_ENDCMD
- && lex_token (lexer) != T_SLASH)
- {
- if (lex_match_id (lexer, "BOXPLOT"))
- examine.plot_boxplot = true;
- else if (lex_match_id (lexer, "NPPLOT"))
- examine.plot_npplot = true;
- else if (lex_match_id (lexer, "HISTOGRAM"))
- examine.plot_histogram = true;
- else if (lex_match_id (lexer, "SPREADLEVEL"))
- {
- examine.plot_spreadlevel = true;
- examine.sl_power = 0;
- if (lex_match (lexer, T_LPAREN) && lex_force_num (lexer))
- {
- examine.sl_power = lex_number (lexer);
-
- lex_get (lexer);
- if (!lex_force_match (lexer, T_RPAREN))
- goto error;
- }
- }
- else if (lex_match_id (lexer, "NONE"))
- examine.plot_boxplot = examine.plot_npplot
- = examine.plot_histogram = examine.plot_spreadlevel = false;
- else if (lex_match (lexer, T_ALL))
- examine.plot_boxplot = examine.plot_npplot
- = examine.plot_histogram = examine.plot_spreadlevel = true;
- else
- {
- lex_error_expecting (lexer, "BOXPLOT", "NPPLOT",
- "HISTOGRAM", "SPREADLEVEL",
- "NONE", "ALL");
- goto error;
- }
- lex_match (lexer, T_COMMA);
- }
- }
- else if (lex_match_id (lexer, "CINTERVAL"))
- {
- if (!lex_force_num (lexer))
- goto error;
-
- examine.conf = lex_number (lexer);
- lex_get (lexer);
- }
- else if (lex_match_id (lexer, "ID"))
- {
- lex_match (lexer, T_EQUALS);
-
- examine.id_var = parse_variable_const (lexer, examine.dict);
- if (!examine.id_var)
- goto error;
- }
- else
- {
- lex_error_expecting (lexer, "STATISTICS", "PERCENTILES",
- "TOTAL", "NOTOTAL", "MISSING", "COMPARE",
- "PLOT", "CINTERVAL", "ID");
- goto error;
- }
- }
-
-
- if (totals_seen && nototals_seen)
- {
- lex_ofs_error (lexer, nototals_ofs, nototals_ofs,
- _("%s and %s are mutually exclusive."),
- "TOTAL", "NOTOTAL");
- goto error;
- }
-
- /* If totals have been requested or if there are no factors
- in this analysis, then the totals need to be included. */
- if (nototals_seen && examine.n_iacts > 1)
- {
- interaction_destroy (examine.iacts[0]);
- examine.iacts++;
- examine.n_iacts--;
- }
-
- if (examine.id_var)
- {
- examine.id_idx = var_get_case_index (examine.id_var);
- examine.id_width = var_get_width (examine.id_var);
- }
-
- examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */
- examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */
- examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */
-
- if (examine.disp_extremes > 0)
- examine.calc_extremes = examine.disp_extremes;
-
- if (examine.descriptives && examine.calc_extremes == 0)
- {
- /* Descriptives always displays the max and min */
- examine.calc_extremes = 1;
- }
-
- if (percentiles_seen && examine.n_percentiles == 0)
- {
- examine.n_percentiles = 7;
- examine.ptiles = xmalloc (examine.n_percentiles * sizeof *examine.ptiles);
-
- examine.ptiles[0] = 5;
- examine.ptiles[1] = 10;
- examine.ptiles[2] = 25;
- examine.ptiles[3] = 50;
- examine.ptiles[4] = 75;
- examine.ptiles[5] = 90;
- examine.ptiles[6] = 95;
- }
-
- assert (examine.calc_extremes >= examine.disp_extremes);
-
- struct casegrouper *grouper = casegrouper_create_splits (proc_open (ds), examine.dict);
- struct casereader *group;
- while (casegrouper_get_next_group (grouper, &group))
- run_examine (&examine, group);
- bool ok = casegrouper_destroy (grouper);
- ok = proc_commit (ds) && ok;
-
- caseproto_unref (examine.ex_proto);
-
- for (size_t i = 0; i < examine.n_iacts; ++i)
- interaction_destroy (examine.iacts[i]);
- free (examine.ptiles);
- free (examine.dep_vars);
- pool_destroy (examine.pool);
-
- return CMD_SUCCESS;
-
- error:
- caseproto_unref (examine.ex_proto);
- for (size_t i = 0; i < examine.n_iacts; ++i)
- interaction_destroy (examine.iacts[i]);
- free (examine.dep_vars);
- free (examine.ptiles);
- pool_destroy (examine.pool);
-
- return CMD_FAILURE;
-}