-/* PSPP - EXAMINE data for normality . -*-c-*-
+/* PSPP - a program for statistical analysis.
+ Copyright (C) 2004, 2008, 2009, 2010 Free Software Foundation, Inc.
-Copyright (C) 2004 Free Software Foundation, Inc.
-Author: John Darrington 2004
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
+
#include <gsl/gsl_cdf.h>
-#include <libpspp/message.h>
+#include <math.h>
#include <stdio.h>
#include <stdlib.h>
-#include <math.h>
-#include <libpspp/alloc.h>
-#include <libpspp/str.h>
+
+#include <math/sort.h>
+#include <math/order-stats.h>
+#include <math/percentiles.h>
+#include <math/tukey-hinges.h>
+#include <math/box-whisker.h>
+#include <math/trimmed-mean.h>
+#include <math/extrema.h>
+#include <math/np.h>
#include <data/case.h>
+#include <data/casegrouper.h>
+#include <data/casereader.h>
+#include <data/casewriter.h>
#include <data/dictionary.h>
+#include <data/procedure.h>
+#include <data/subcase.h>
+#include <data/value-labels.h>
+#include <data/variable.h>
#include <language/command.h>
-#include <libpspp/compiler.h>
+#include <language/dictionary/split-file.h>
#include <language/lexer/lexer.h>
+#include <libpspp/compiler.h>
#include <libpspp/message.h>
-#include <libpspp/magic.h>
#include <libpspp/misc.h>
-#include <output/table.h>
-#include <output/manager.h>
-#include <data/value-labels.h>
-#include <data/variable.h>
-#include <procedure.h>
-#include <libpspp/hash.h>
-#include <data/casefile.h>
-#include <math/factor-stats.h>
+#include <libpspp/str.h>
#include <math/moments.h>
-#include <math/percentiles.h>
-#include <output/charts/box-whisker.h>
-#include <output/charts/cartesian.h>
+#include <output/chart-item.h>
+#include <output/charts/boxplot.h>
+#include <output/charts/np-plot.h>
+#include <output/tab.h>
+
+#include "minmax.h"
+#include "xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
#define N_(msgid) msgid
/* (headers) */
-#include <output/chart.h>
#include <output/charts/plot-hist.h>
-#include <output/charts/plot-chart.h>
+#include <math/histogram.h>
/* (specification)
"EXAMINE" (xmn_):
*^variables=custom;
+total=custom;
+nototal=custom;
- +missing=miss:pairwise/!listwise,
+ missing=miss:pairwise/!listwise,
rep:report/!noreport,
incl:include/!exclude;
+compare=cmp:variables/!groups;
/* (functions) */
-
static struct cmd_examine cmd;
-static struct variable **dependent_vars;
-
+static const struct variable **dependent_vars;
static size_t n_dependent_vars;
+/* PERCENTILES */
+
+static subc_list_double percentile_list;
+static enum pc_alg percentile_algorithm;
-struct factor
+struct factor_metrics
{
- /* The independent variable */
- struct variable *indep_var[2];
+ struct moments1 *moments;
+ struct percentile **ptl;
+ size_t n_ptiles;
- /* Hash table of factor stats indexed by 2 values */
- struct hsh_table *fstats;
+ struct tukey_hinges *tukey_hinges;
+ struct box_whisker *box_whisker;
+ struct trimmed_mean *trimmed_mean;
+ struct histogram *histogram;
+ struct np *np;
- /* The hash table after it has been crunched */
- struct factor_statistics **fs;
+ /* Three quartiles indexing into PTL */
+ struct percentile **quartiles;
- struct factor *next;
+ /* A reader sorted in ASCENDING order */
+ struct casereader *up_reader;
-};
+ /* The minimum value of all the weights */
+ double cmin;
-/* Linked list of factors */
-static struct factor *factors=0;
+ /* Sum of all weights, including those for missing values */
+ double n;
-static struct metrics *totals=0;
+ /* Sum of weights of non_missing values */
+ double n_valid;
-/* Parse the clause specifying the factors */
-static int examine_parse_independent_vars(struct cmd_examine *cmd);
+ double mean;
+ double variance;
+ double skewness;
-/* Output functions */
-static void show_summary(struct variable **dependent_var, int n_dep_var,
- const struct factor *f);
+ double kurtosis;
+
+ double se_mean;
+
+ struct extrema *minima;
+ struct extrema *maxima;
+};
+
+struct factor_result
+{
+ struct ll ll;
-static void show_extremes(struct variable **dependent_var,
- int n_dep_var,
- const struct factor *factor,
- int n_extremities);
+ union value value[2];
-static void show_descriptives(struct variable **dependent_var,
- int n_dep_var,
- struct factor *factor);
+ /* An array of factor metrics, one for each variable */
+ struct factor_metrics *metrics;
+};
-static void show_percentiles(struct variable **dependent_var,
- int n_dep_var,
- struct factor *factor);
+struct xfactor
+{
+ /* We need to make a list of this structure */
+ struct ll ll;
+ /* The independent variable */
+ const struct variable const* indep_var[2];
+ /* A list of results for this factor */
+ struct ll_list result_list ;
+};
-void np_plot(const struct metrics *m, const char *factorname);
+static void
+factor_destroy (struct xfactor *fctr)
+{
+ struct ll *ll = ll_head (&fctr->result_list);
+ while (ll != ll_null (&fctr->result_list))
+ {
+ int v;
+ struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
+ int i;
+ for (v = 0; v < n_dependent_vars; ++v)
+ {
+ int i;
+ moments1_destroy (result->metrics[v].moments);
+ extrema_destroy (result->metrics[v].minima);
+ extrema_destroy (result->metrics[v].maxima);
+ statistic_destroy (&result->metrics[v].trimmed_mean->parent.parent);
+ statistic_destroy (&result->metrics[v].tukey_hinges->parent.parent);
+ statistic_destroy (&result->metrics[v].box_whisker->parent.parent);
+ statistic_destroy (&result->metrics[v].histogram->parent);
+ for (i = 0 ; i < result->metrics[v].n_ptiles; ++i)
+ statistic_destroy (&result->metrics[v].ptl[i]->parent.parent);
+ free (result->metrics[v].ptl);
+ free (result->metrics[v].quartiles);
+ casereader_destroy (result->metrics[v].up_reader);
+ }
-void box_plot_group(const struct factor *fctr,
- const struct variable **vars, int n_vars,
- const struct variable *id
- ) ;
+ for (i = 0; i < 2; i++)
+ if (fctr->indep_var[i])
+ value_destroy (&result->value[i],
+ var_get_width (fctr->indep_var[i]));
+ free (result->metrics);
+ ll = ll_next (ll);
+ free (result);
+ }
+}
+static struct xfactor level0_factor;
+static struct ll_list factor_list;
-void box_plot_variables(const struct factor *fctr,
- const struct variable **vars, int n_vars,
- const struct variable *id
- );
+/* Parse the clause specifying the factors */
+static int examine_parse_independent_vars (struct lexer *lexer,
+ const struct dictionary *dict,
+ struct cmd_examine *cmd);
+/* Output functions */
+static void show_summary (const struct variable **dependent_var, int n_dep_var,
+ const struct dictionary *dict,
+ const struct xfactor *f);
-/* Per Split function */
-static bool run_examine(const struct casefile *cf, void *cmd_);
+static void show_descriptives (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *f);
-static void output_examine(void);
+static void show_percentiles (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *f);
-void factor_calc(struct ccase *c, int case_no,
- double weight, int case_missing);
+static void show_extremes (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *f);
-/* Represent a factor as a string, so it can be
- printed in a human readable fashion */
-const char * factor_to_string(const struct factor *fctr,
- struct factor_statistics *fs,
- const struct variable *var);
-/* Represent a factor as a string, so it can be
- printed in a human readable fashion,
- but sacrificing some readablility for the sake of brevity */
-const char *factor_to_string_concise(const struct factor *fctr,
- struct factor_statistics *fs);
+/* Per Split function */
+static void run_examine (struct cmd_examine *, struct casereader *,
+ struct dataset *);
+static void output_examine (const struct dictionary *dict);
-/* Function to use for testing for missing values */
-static is_missing_func *value_is_missing;
+void factor_calc (const struct ccase *c, int case_no,
+ double weight, bool case_missing);
-/* PERCENTILES */
+/* Represent a factor as a string, so it can be
+ printed in a human readable fashion */
+static void factor_to_string (const struct xfactor *fctr,
+ const struct factor_result *result,
+ struct string *str);
-static subc_list_double percentile_list;
+/* Represent a factor as a string, so it can be
+ printed in a human readable fashion,
+ but sacrificing some readablility for the sake of brevity */
+static void
+factor_to_string_concise (const struct xfactor *fctr,
+ const struct factor_result *result,
+ struct string *str
+ );
-static enum pc_alg percentile_algorithm;
-static short sbc_percentile;
+/* Categories of missing values to exclude. */
+static enum mv_class exclude_values;
int
-cmd_examine(void)
+cmd_examine (struct lexer *lexer, struct dataset *ds)
{
+ struct casegrouper *grouper;
+ struct casereader *group;
bool ok;
- subc_list_double_create(&percentile_list);
+ subc_list_double_create (&percentile_list);
percentile_algorithm = PC_HAVERAGE;
- if ( !parse_examine(&cmd) )
- return CMD_FAILURE;
+ ll_init (&factor_list);
+
+ if ( !parse_examine (lexer, ds, &cmd, NULL) )
+ {
+ subc_list_double_destroy (&percentile_list);
+ return CMD_FAILURE;
+ }
/* If /MISSING=INCLUDE is set, then user missing values are ignored */
- if (cmd.incl == XMN_INCLUDE )
- value_is_missing = mv_is_value_system_missing;
- else
- value_is_missing = mv_is_value_missing;
+ exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY;
- if ( cmd.st_n == SYSMIS )
+ if ( cmd.st_n == SYSMIS )
cmd.st_n = 5;
- if ( ! cmd.sbc_cinterval)
+ if ( ! cmd.sbc_cinterval)
cmd.n_cinterval[0] = 95.0;
- /* If descriptives have been requested, make sure the
+ /* If descriptives have been requested, make sure the
quartiles are calculated */
if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
{
- subc_list_double_push(&percentile_list, 25);
- subc_list_double_push(&percentile_list, 50);
- subc_list_double_push(&percentile_list, 75);
+ subc_list_double_push (&percentile_list, 25);
+ subc_list_double_push (&percentile_list, 50);
+ subc_list_double_push (&percentile_list, 75);
}
- ok = multipass_procedure_with_splits (run_examine, &cmd);
+ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds));
- if ( totals )
+ while (casegrouper_get_next_group (grouper, &group))
{
- free( totals );
+ struct casereader *reader =
+ casereader_create_arithmetic_sequence (group, 1, 1);
+
+ run_examine (&cmd, reader, ds);
}
-
- if ( dependent_vars )
- free (dependent_vars);
- {
- struct factor *f = factors ;
- while ( f )
- {
- struct factor *ff = f;
+ ok = casegrouper_destroy (grouper);
+ ok = proc_commit (ds) && ok;
- f = f->next;
- free ( ff->fs );
- hsh_destroy ( ff->fstats ) ;
- free ( ff ) ;
- }
- factors = 0;
- }
+ if ( dependent_vars )
+ free (dependent_vars);
- subc_list_double_destroy(&percentile_list);
+ subc_list_double_destroy (&percentile_list);
return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
};
-/* Show all the appropriate tables */
+
static void
-output_examine(void)
+show_npplot (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr)
{
- struct factor *fctr;
+ int v;
- /* Show totals if appropriate */
- if ( ! cmd.sbc_nototal || factors == 0 )
+ for (v = 0; v < n_dep_var; ++v)
{
- show_summary(dependent_vars, n_dependent_vars, 0);
-
- if ( cmd.sbc_statistics )
+ struct ll *ll;
+ for (ll = ll_head (&fctr->result_list);
+ ll != ll_null (&fctr->result_list);
+ ll = ll_next (ll))
{
- if ( cmd.a_statistics[XMN_ST_EXTREME])
- show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n);
-
- if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
- show_descriptives(dependent_vars, n_dependent_vars, 0);
-
+ struct string label;
+ const struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
+ struct chart_item *npp, *dnpp;
+ struct casereader *reader;
+ struct np *np;
+
+ ds_init_empty (&label);
+ ds_put_format (&label, "%s ", var_get_name (dependent_var[v]));
+ factor_to_string (fctr, result, &label);
+
+ np = result->metrics[v].np;
+ reader = casewriter_make_reader (np->writer);
+ npp = np_plot_create (np, reader, ds_cstr (&label));
+ dnpp = dnp_plot_create (np, reader, ds_cstr (&label));
+
+ ds_destroy (&label);
+
+ if (npp == NULL || dnpp == NULL)
+ {
+ msg (MW, _("Not creating NP plot because data set is empty."));
+ chart_item_unref (npp);
+ chart_item_unref (dnpp);
+ }
+ else
+ {
+ chart_item_submit (npp);
+ chart_item_submit (dnpp);
+ }
+
+ statistic_destroy (&np->parent.parent);
+ casereader_destroy (reader);
}
- if ( sbc_percentile )
- show_percentiles(dependent_vars, n_dependent_vars, 0);
-
- if ( cmd.sbc_plot)
- {
- int v;
- if ( cmd.a_plot[XMN_PLT_NPPLOT] )
- {
- for ( v = 0 ; v < n_dependent_vars; ++v )
- np_plot(&totals[v], var_to_string(dependent_vars[v]));
- }
+ }
+}
- if ( cmd.a_plot[XMN_PLT_BOXPLOT] )
- {
- if ( cmd.cmp == XMN_GROUPS )
- {
- box_plot_group(0, dependent_vars, n_dependent_vars,
- cmd.v_id);
- }
- else
- box_plot_variables(0, dependent_vars, n_dependent_vars,
- cmd.v_id);
- }
- if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
- {
- for ( v = 0 ; v < n_dependent_vars; ++v )
- {
- struct normal_curve normal;
-
- normal.N = totals[v].n;
- normal.mean = totals[v].mean;
- normal.stddev = totals[v].stddev;
-
- histogram_plot(totals[v].histogram,
- var_to_string(dependent_vars[v]),
- &normal, 0);
- }
- }
+static void
+show_histogram (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr)
+{
+ int v;
+ for (v = 0; v < n_dep_var; ++v)
+ {
+ struct ll *ll;
+ for (ll = ll_head (&fctr->result_list);
+ ll != ll_null (&fctr->result_list);
+ ll = ll_next (ll))
+ {
+ struct string str;
+ const struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
+ struct histogram *histogram;
+ double mean, var, n;
+
+ histogram = result->metrics[v].histogram;
+ if (histogram == NULL)
+ {
+ /* Probably all values are SYSMIS. */
+ continue;
+ }
+
+ ds_init_empty (&str);
+ ds_put_format (&str, "%s ", var_get_name (dependent_var[v]));
+
+ factor_to_string (fctr, result, &str);
+
+ moments1_calculate (result->metrics[v].moments,
+ &n, &mean, &var, NULL, NULL);
+ chart_item_submit (histogram_chart_create (histogram->gsl_hist,
+ ds_cstr (&str), n, mean,
+ sqrt (var), false));
+
+ ds_destroy (&str);
}
-
}
+}
+
- /* Show grouped statistics as appropriate */
- fctr = factors;
- while ( fctr )
+static void
+show_boxplot_groups (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr)
+{
+ int v;
+
+ for (v = 0; v < n_dep_var; ++v)
{
- show_summary(dependent_vars, n_dependent_vars, fctr);
+ const struct factor_result *result;
+ struct boxplot *boxplot;
+ double y_min = DBL_MAX;
+ double y_max = -DBL_MAX;
+ char *title;
- if ( cmd.sbc_statistics )
+ ll_for_each (result, struct factor_result, ll, &fctr->result_list)
{
- if ( cmd.a_statistics[XMN_ST_EXTREME])
- show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n);
+ struct factor_metrics *metrics = &result->metrics[v];
+ const struct ll_list *max_list = extrema_list (metrics->maxima);
+ const struct ll_list *min_list = extrema_list (metrics->minima);
+ const struct extremum *max, *min;
- if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES])
- show_descriptives(dependent_vars, n_dependent_vars, fctr);
- }
+ if ( ll_is_empty (max_list))
+ {
+ msg (MW, _("Not creating plot because data set is empty."));
+ continue;
+ }
+
+ max = ll_data (ll_head(max_list), struct extremum, ll);
+ min = ll_data (ll_head (min_list), struct extremum, ll);
- if ( sbc_percentile )
- show_percentiles(dependent_vars, n_dependent_vars, fctr);
+ y_max = MAX (y_max, max->value);
+ y_min = MIN (y_min, min->value);
+ }
+ if (fctr->indep_var[0])
+ title = xasprintf (_("Boxplot of %s vs. %s"),
+ var_to_string (dependent_var[v]),
+ var_to_string (fctr->indep_var[0]));
+ else
+ title = xasprintf (_("Boxplot of %s"),
+ var_to_string (dependent_var[v]));
+ boxplot = boxplot_create (y_min, y_max, title);
+ free (title);
- if ( cmd.sbc_plot)
+ ll_for_each (result, struct factor_result, ll, &fctr->result_list)
{
- size_t v;
+ struct factor_metrics *metrics = &result->metrics[v];
+ struct string str = DS_EMPTY_INITIALIZER;
+ factor_to_string_concise (fctr, result, &str);
+ boxplot_add_box (boxplot, metrics->box_whisker, ds_cstr (&str));
+ metrics->box_whisker = NULL;
+ ds_destroy (&str);
+ }
- struct factor_statistics **fs = fctr->fs ;
+ boxplot_submit (boxplot);
+ }
+}
- if ( cmd.a_plot[XMN_PLT_BOXPLOT] )
- {
- if ( cmd.cmp == XMN_VARIABLES )
- box_plot_variables(fctr, dependent_vars, n_dependent_vars,
- cmd.v_id);
- else
- box_plot_group(fctr, dependent_vars, n_dependent_vars,
- cmd.v_id);
- }
- for ( v = 0 ; v < n_dependent_vars; ++v )
- {
- for ( fs = fctr->fs ; *fs ; ++fs )
- {
- const char *s = factor_to_string(fctr, *fs, dependent_vars[v]);
+static void
+show_boxplot_variables (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr
+ )
- if ( cmd.a_plot[XMN_PLT_NPPLOT] )
- np_plot(&(*fs)->m[v], s);
+{
+ const struct factor_result *result;
+ int v;
- if ( cmd.a_plot[XMN_PLT_HISTOGRAM] )
- {
- struct normal_curve normal;
+ ll_for_each (result, struct factor_result, ll, &fctr->result_list)
+ {
+ struct string title;
+ double y_min = DBL_MAX;
+ double y_max = -DBL_MAX;
+ struct boxplot *boxplot;
- normal.N = (*fs)->m[v].n;
- normal.mean = (*fs)->m[v].mean;
- normal.stddev = (*fs)->m[v].stddev;
-
- histogram_plot((*fs)->m[v].histogram,
- s, &normal, 0);
- }
-
- } /* for ( fs .... */
+ for (v = 0; v < n_dep_var; ++v)
+ {
+ const struct factor_metrics *metrics = &result->metrics[v];
+ const struct ll *max_ll = ll_head (extrema_list (metrics->maxima));
+ const struct ll *min_ll = ll_head (extrema_list (metrics->minima));
+ const struct extremum *max = ll_data (max_ll, struct extremum, ll);
+ const struct extremum *min = ll_data (min_ll, struct extremum, ll);
+
+ y_max = MAX (y_max, max->value);
+ y_min = MIN (y_min, min->value);
+ }
- } /* for ( v = 0 ..... */
+ ds_init_empty (&title);
+ factor_to_string (fctr, result, &title);
+ boxplot = boxplot_create (y_min, y_max, ds_cstr (&title));
+ ds_destroy (&title);
+ for (v = 0; v < n_dep_var; ++v)
+ {
+ struct factor_metrics *metrics = &result->metrics[v];
+ boxplot_add_box (boxplot, metrics->box_whisker,
+ var_get_name (dependent_var[v]));
+ metrics->box_whisker = NULL;
}
- fctr = fctr->next;
+ boxplot_submit (boxplot);
}
-
}
-/* Create a hash table of percentiles and their values from the list of
- percentiles */
-static struct hsh_table *
-list_to_ptile_hash(const subc_list_double *l)
+/* Show all the appropriate tables */
+static void
+output_examine (const struct dictionary *dict)
{
- int i;
-
- struct hsh_table *h ;
+ struct ll *ll;
+
+ show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor);
+
+ if ( cmd.a_statistics[XMN_ST_EXTREME] )
+ show_extremes (dependent_vars, n_dependent_vars, &level0_factor);
- h = hsh_create(subc_list_double_count(l),
- (hsh_compare_func *) ptile_compare,
- (hsh_hash_func *) ptile_hash,
- (hsh_free_func *) free,
- 0);
+ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
+ show_descriptives (dependent_vars, n_dependent_vars, &level0_factor);
+ if ( cmd.sbc_percentiles)
+ show_percentiles (dependent_vars, n_dependent_vars, &level0_factor);
- for ( i = 0 ; i < subc_list_double_count(l) ; ++i )
+ if ( cmd.sbc_plot)
{
- struct percentile *p = xmalloc (sizeof *p);
-
- p->p = subc_list_double_at(l,i);
- p->v = SYSMIS;
+ if (cmd.a_plot[XMN_PLT_BOXPLOT])
+ show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor);
- hsh_insert(h, p);
+ if (cmd.a_plot[XMN_PLT_HISTOGRAM])
+ show_histogram (dependent_vars, n_dependent_vars, &level0_factor);
+ if (cmd.a_plot[XMN_PLT_NPPLOT])
+ show_npplot (dependent_vars, n_dependent_vars, &level0_factor);
}
- return h;
+ for (ll = ll_head (&factor_list);
+ ll != ll_null (&factor_list); ll = ll_next (ll))
+ {
+ struct xfactor *factor = ll_data (ll, struct xfactor, ll);
+ show_summary (dependent_vars, n_dependent_vars, dict, factor);
+
+ if ( cmd.a_statistics[XMN_ST_EXTREME] )
+ show_extremes (dependent_vars, n_dependent_vars, factor);
+
+ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
+ show_descriptives (dependent_vars, n_dependent_vars, factor);
+
+ if ( cmd.sbc_percentiles)
+ show_percentiles (dependent_vars, n_dependent_vars, factor);
+
+ if (cmd.a_plot[XMN_PLT_BOXPLOT])
+ {
+ if (cmd.cmp == XMN_GROUPS)
+ show_boxplot_groups (dependent_vars, n_dependent_vars, factor);
+ else if (cmd.cmp == XMN_VARIABLES)
+ show_boxplot_variables (dependent_vars, n_dependent_vars, factor);
+ }
+
+ if (cmd.a_plot[XMN_PLT_HISTOGRAM])
+ show_histogram (dependent_vars, n_dependent_vars, factor);
+ if (cmd.a_plot[XMN_PLT_NPPLOT])
+ show_npplot (dependent_vars, n_dependent_vars, factor);
+ }
}
/* Parse the PERCENTILES subcommand */
static int
-xmn_custom_percentiles(struct cmd_examine *p UNUSED)
+xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED,
+ struct cmd_examine *p UNUSED, void *aux UNUSED)
{
- sbc_percentile = 1;
-
- lex_match('=');
+ lex_match (lexer, T_EQUALS);
- lex_match('(');
+ lex_match (lexer, T_LPAREN);
- while ( lex_is_number() )
+ while ( lex_is_number (lexer) )
{
- subc_list_double_push(&percentile_list,lex_number());
+ subc_list_double_push (&percentile_list, lex_number (lexer));
- lex_get();
+ lex_get (lexer);
- lex_match(',') ;
+ lex_match (lexer, T_COMMA) ;
}
- lex_match(')');
+ lex_match (lexer, T_RPAREN);
- lex_match('=');
+ lex_match (lexer, T_EQUALS);
- if ( lex_match_id("HAVERAGE"))
- percentile_algorithm = PC_HAVERAGE;
+ if ( lex_match_id (lexer, "HAVERAGE"))
+ percentile_algorithm = PC_HAVERAGE;
- else if ( lex_match_id("WAVERAGE"))
- percentile_algorithm = PC_WAVERAGE;
+ else if ( lex_match_id (lexer, "WAVERAGE"))
+ percentile_algorithm = PC_WAVERAGE;
- else if ( lex_match_id("ROUND"))
+ else if ( lex_match_id (lexer, "ROUND"))
percentile_algorithm = PC_ROUND;
- else if ( lex_match_id("EMPIRICAL"))
+ else if ( lex_match_id (lexer, "EMPIRICAL"))
percentile_algorithm = PC_EMPIRICAL;
- else if ( lex_match_id("AEMPIRICAL"))
- percentile_algorithm = PC_AEMPIRICAL;
+ else if ( lex_match_id (lexer, "AEMPIRICAL"))
+ percentile_algorithm = PC_AEMPIRICAL;
- else if ( lex_match_id("NONE"))
- percentile_algorithm = PC_NONE;
+ else if ( lex_match_id (lexer, "NONE"))
+ percentile_algorithm = PC_NONE;
- if ( 0 == subc_list_double_count(&percentile_list))
+ if ( 0 == subc_list_double_count (&percentile_list))
{
- subc_list_double_push(&percentile_list, 5);
- subc_list_double_push(&percentile_list, 10);
- subc_list_double_push(&percentile_list, 25);
- subc_list_double_push(&percentile_list, 50);
- subc_list_double_push(&percentile_list, 75);
- subc_list_double_push(&percentile_list, 90);
- subc_list_double_push(&percentile_list, 95);
+ subc_list_double_push (&percentile_list, 5);
+ subc_list_double_push (&percentile_list, 10);
+ subc_list_double_push (&percentile_list, 25);
+ subc_list_double_push (&percentile_list, 50);
+ subc_list_double_push (&percentile_list, 75);
+ subc_list_double_push (&percentile_list, 90);
+ subc_list_double_push (&percentile_list, 95);
}
return 1;
/* TOTAL and NOTOTAL are simple, mutually exclusive flags */
static int
-xmn_custom_total(struct cmd_examine *p)
+xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
+ struct cmd_examine *p, void *aux UNUSED)
{
- if ( p->sbc_nototal )
+ if ( p->sbc_nototal )
{
msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
return 0;
}
static int
-xmn_custom_nototal(struct cmd_examine *p)
+xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED,
+ struct cmd_examine *p, void *aux UNUSED)
{
- if ( p->sbc_total )
+ if ( p->sbc_total )
{
- msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL");
+ msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL");
return 0;
}
-/* Parser for the variables sub command
+/* Parser for the variables sub command
Returns 1 on success */
static int
-xmn_custom_variables(struct cmd_examine *cmd )
+xmn_custom_variables (struct lexer *lexer, struct dataset *ds,
+ struct cmd_examine *cmd,
+ void *aux UNUSED)
{
- lex_match('=');
+ const struct dictionary *dict = dataset_dict (ds);
+ lex_match (lexer, T_EQUALS);
- if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
- && token != T_ALL)
+ if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL)
+ && lex_token (lexer) != T_ALL)
{
return 2;
}
-
- if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars,
- PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
+
+ if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars,
+ PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) )
{
free (dependent_vars);
return 0;
}
- assert(n_dependent_vars);
+ assert (n_dependent_vars);
- totals = xnmalloc (n_dependent_vars, sizeof *totals);
- if ( lex_match(T_BY))
+ if ( lex_match (lexer, T_BY))
{
- int success ;
- success = examine_parse_independent_vars(cmd);
- if ( success != 1 ) {
- free (dependent_vars);
- free (totals) ;
- }
+ int success ;
+ success = examine_parse_independent_vars (lexer, dict, cmd);
+ if ( success != 1 )
+ {
+ free (dependent_vars);
+ }
return success;
}
/* Parse the clause specifying the factors */
static int
-examine_parse_independent_vars(struct cmd_examine *cmd)
+examine_parse_independent_vars (struct lexer *lexer,
+ const struct dictionary *dict,
+ struct cmd_examine *cmd)
{
int success;
- struct factor *sf = xmalloc (sizeof *sf);
+ struct xfactor *sf = xmalloc (sizeof *sf);
+
+ ll_init (&sf->result_list);
- if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
- && token != T_ALL)
+ if ( (lex_token (lexer) != T_ID ||
+ dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL)
+ && lex_token (lexer) != T_ALL)
{
free ( sf ) ;
return 2;
}
+ sf->indep_var[0] = parse_variable (lexer, dict);
+ sf->indep_var[1] = NULL;
- sf->indep_var[0] = parse_variable();
- sf->indep_var[1] = 0;
-
- if ( token == T_BY )
+ if ( lex_token (lexer) == T_BY )
{
+ lex_match (lexer, T_BY);
- lex_match(T_BY);
-
- if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL)
- && token != T_ALL)
+ if ( (lex_token (lexer) != T_ID ||
+ dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL)
+ && lex_token (lexer) != T_ALL)
{
- free ( sf ) ;
+ free (sf);
return 2;
}
- sf->indep_var[1] = parse_variable();
+ sf->indep_var[1] = parse_variable (lexer, dict);
+ ll_push_tail (&factor_list, &sf->ll);
}
+ else
+ ll_push_tail (&factor_list, &sf->ll);
+ lex_match (lexer, T_COMMA);
- sf->fstats = hsh_create(4,
- (hsh_compare_func *) factor_statistics_compare,
- (hsh_hash_func *) factor_statistics_hash,
- (hsh_free_func *) factor_statistics_free,
- 0);
-
- sf->next = factors;
- factors = sf;
-
- lex_match(',');
-
- if ( token == '.' || token == '/' )
+ if ( lex_token (lexer) == T_ENDCMD || lex_token (lexer) == T_SLASH )
return 1;
- success = examine_parse_independent_vars(cmd);
-
- if ( success != 1 )
- free ( sf ) ;
+ success = examine_parse_independent_vars (lexer, dict, cmd);
+
+ if ( success != 1 )
+ free ( sf ) ;
return success;
}
+static void
+examine_group (struct cmd_examine *cmd, struct casereader *reader, int level,
+ const struct dictionary *dict, struct xfactor *factor)
+{
+ struct ccase *c;
+ const struct variable *wv = dict_get_weight (dict);
+ int v;
+ int n_extrema = 1;
+ struct factor_result *result = xzalloc (sizeof (*result));
+ int i;
+ for (i = 0; i < 2; i++)
+ if (factor->indep_var[i])
+ value_init (&result->value[i], var_get_width (factor->indep_var[i]));
+ result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics));
-void populate_percentiles(struct tab_table *tbl, int col, int row,
- const struct metrics *m);
-
-void populate_descriptives(struct tab_table *t, int col, int row,
- const struct metrics *fs);
+ if ( cmd->a_statistics[XMN_ST_EXTREME] )
+ n_extrema = cmd->st_n;
-void populate_extremes(struct tab_table *t, int col, int row, int n,
- const struct metrics *m);
-void populate_summary(struct tab_table *t, int col, int row,
- const struct metrics *m);
+ c = casereader_peek (reader, 0);
+ if (c != NULL)
+ {
+ if ( level > 0)
+ for (i = 0; i < 2; i++)
+ if (factor->indep_var[i])
+ value_copy (&result->value[i], case_data (c, factor->indep_var[i]),
+ var_get_width (factor->indep_var[i]));
+ case_unref (c);
+ }
+ for (v = 0; v < n_dependent_vars; ++v)
+ {
+ struct casewriter *writer;
+ struct casereader *input = casereader_clone (reader);
+
+ result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS);
+ result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA);
+ result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA);
+ result->metrics[v].cmin = DBL_MAX;
+
+ if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
+ cmd->a_plot[XMN_PLT_BOXPLOT] ||
+ cmd->a_plot[XMN_PLT_NPPLOT] ||
+ cmd->sbc_percentiles)
+ {
+ /* In this case, we need to sort the data, so we create a sorting
+ casewriter */
+ struct subcase up_ordering;
+ subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND);
+ writer = sort_create_writer (&up_ordering,
+ casereader_get_proto (reader));
+ subcase_destroy (&up_ordering);
+ }
+ else
+ {
+ /* but in this case, sorting is unnecessary, so an ordinary
+ casewriter is sufficient */
+ writer =
+ autopaging_writer_create (casereader_get_proto (reader));
+ }
+ /* Sort or just iterate, whilst calculating moments etc */
+ while ((c = casereader_read (input)) != NULL)
+ {
+ int n_vals = caseproto_get_n_widths (casereader_get_proto (reader));
+ const casenumber loc = case_data_idx (c, n_vals - 1)->f;
-static int bad_weight_warn = 1;
+ const double weight = wv ? case_data (c, wv)->f : 1.0;
+ const union value *value = case_data (c, dependent_vars[v]);
+ if (weight != SYSMIS)
+ minimize (&result->metrics[v].cmin, weight);
-/* Perform calculations for the sub factors */
-void
-factor_calc(struct ccase *c, int case_no, double weight, int case_missing)
-{
- size_t v;
- struct factor *fctr = factors;
+ moments1_add (result->metrics[v].moments,
+ value->f,
+ weight);
- while ( fctr)
- {
- struct factor_statistics **foo ;
- union value indep_vals[2] ;
+ result->metrics[v].n += weight;
- indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv);
+ if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) )
+ result->metrics[v].n_valid += weight;
- if ( fctr->indep_var[1] )
- indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv);
- else
- indep_vals[1].f = SYSMIS;
+ extrema_add (result->metrics[v].maxima,
+ value->f,
+ weight,
+ loc);
- assert(fctr->fstats);
+ extrema_add (result->metrics[v].minima,
+ value->f,
+ weight,
+ loc);
- foo = ( struct factor_statistics ** )
- hsh_probe(fctr->fstats, (void *) &indep_vals);
+ casewriter_write (writer, c);
+ }
+ casereader_destroy (input);
+ result->metrics[v].up_reader = casewriter_make_reader (writer);
+ }
- if ( !*foo )
+ /* If percentiles or descriptives have been requested, then a
+ second pass through the data (which has now been sorted)
+ is necessary */
+ if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] ||
+ cmd->a_plot[XMN_PLT_BOXPLOT] ||
+ cmd->a_plot[XMN_PLT_NPPLOT] ||
+ cmd->sbc_percentiles)
+ {
+ for (v = 0; v < n_dependent_vars; ++v)
{
+ int i;
+ int n_os;
+ struct order_stats **os ;
+ struct factor_metrics *metric = &result->metrics[v];
+
+ metric->n_ptiles = percentile_list.n_data;
+
+ metric->ptl = xcalloc (metric->n_ptiles, sizeof *metric->ptl);
- *foo = create_factor_statistics(n_dependent_vars,
- &indep_vals[0],
- &indep_vals[1]);
+ metric->quartiles = xcalloc (3, sizeof (*metric->quartiles));
- for ( v = 0 ; v < n_dependent_vars ; ++v )
+ for (i = 0 ; i < metric->n_ptiles; ++i)
{
- metrics_precalc( &(*foo)->m[v] );
+ metric->ptl[i] = percentile_create (percentile_list.data[i] / 100.0, metric->n_valid);
+
+ if ( percentile_list.data[i] == 25)
+ metric->quartiles[0] = metric->ptl[i];
+ else if ( percentile_list.data[i] == 50)
+ metric->quartiles[1] = metric->ptl[i];
+ else if ( percentile_list.data[i] == 75)
+ metric->quartiles[2] = metric->ptl[i];
}
- }
+ metric->tukey_hinges = tukey_hinges_create (metric->n_valid, metric->cmin);
+ metric->trimmed_mean = trimmed_mean_create (metric->n_valid, 0.05);
- for ( v = 0 ; v < n_dependent_vars ; ++v )
- {
- const struct variable *var = dependent_vars[v];
- const union value *val = case_data (c, var->fv);
-
- if ( value_is_missing (&var->miss, val) || case_missing )
- val = 0;
-
- metrics_calc( &(*foo)->m[v], val, weight, case_no);
-
- }
+ n_os = metric->n_ptiles + 2;
- fctr = fctr->next;
- }
+ if ( cmd->a_plot[XMN_PLT_NPPLOT] )
+ {
+ metric->np = np_create (metric->moments);
+ n_os ++;
+ }
+ os = xcalloc (n_os, sizeof *os);
-}
+ for (i = 0 ; i < metric->n_ptiles ; ++i )
+ {
+ os[i] = &metric->ptl[i]->parent;
+ }
-static bool
-run_examine(const struct casefile *cf, void *cmd_ )
-{
- struct casereader *r;
- struct ccase c;
- int v;
+ os[i] = &metric->tukey_hinges->parent;
+ os[i+1] = &metric->trimmed_mean->parent;
+
+ if (cmd->a_plot[XMN_PLT_NPPLOT])
+ os[i+2] = &metric->np->parent;
- const struct cmd_examine *cmd = (struct cmd_examine *) cmd_;
+ order_stats_accumulate (os, n_os,
+ casereader_clone (metric->up_reader),
+ wv, dependent_vars[v], MV_ANY);
+ free (os);
+ }
+ }
- /* Make sure we haven't got rubbish left over from a
- previous split */
- struct factor *fctr = factors;
- while (fctr)
+ /* FIXME: Do this in the above loop */
+ if ( cmd->a_plot[XMN_PLT_HISTOGRAM] )
{
- struct factor *next = fctr->next;
+ struct ccase *c;
+ struct casereader *input = casereader_clone (reader);
- hsh_clear(fctr->fstats);
+ for (v = 0; v < n_dependent_vars; ++v)
+ {
+ const struct extremum *max, *min;
+ struct factor_metrics *metric = &result->metrics[v];
- fctr->fs = 0;
+ const struct ll_list *max_list =
+ extrema_list (result->metrics[v].maxima);
- fctr = next;
- }
+ const struct ll_list *min_list =
+ extrema_list (result->metrics[v].minima);
+ if ( ll_is_empty (max_list))
+ {
+ msg (MW, _("Not creating plot because data set is empty."));
+ continue;
+ }
+ assert (! ll_is_empty (min_list));
- for ( v = 0 ; v < n_dependent_vars ; ++v )
- metrics_precalc(&totals[v]);
+ max = (const struct extremum *)
+ ll_data (ll_head(max_list), struct extremum, ll);
- for(r = casefile_get_reader (cf);
- casereader_read (r, &c) ;
- case_destroy (&c) )
- {
- int case_missing=0;
- const int case_no = casereader_cnum(r);
+ min = (const struct extremum *)
+ ll_data (ll_head (min_list), struct extremum, ll);
- const double weight =
- dict_get_case_weight(default_dict, &c, &bad_weight_warn);
+ metric->histogram = histogram_create (10, min->value, max->value);
+ }
- if ( cmd->miss == XMN_LISTWISE )
+ while ((c = casereader_read (input)) != NULL)
{
- for ( v = 0 ; v < n_dependent_vars ; ++v )
- {
- const struct variable *var = dependent_vars[v];
- const union value *val = case_data (&c, var->fv);
+ const double weight = wv ? case_data (c, wv)->f : 1.0;
- if ( value_is_missing(&var->miss, val))
- case_missing = 1;
-
+ for (v = 0; v < n_dependent_vars; ++v)
+ {
+ struct factor_metrics *metric = &result->metrics[v];
+ if ( metric->histogram)
+ histogram_add (metric->histogram,
+ case_data (c, dependent_vars[v])->f, weight);
}
+ case_unref (c);
}
+ casereader_destroy (input);
+ }
- for ( v = 0 ; v < n_dependent_vars ; ++v )
+ /* In this case, a third iteration is required */
+ if (cmd->a_plot[XMN_PLT_BOXPLOT])
+ {
+ for (v = 0; v < n_dependent_vars; ++v)
{
- const struct variable *var = dependent_vars[v];
- const union value *val = case_data (&c, var->fv);
-
- if ( value_is_missing(&var->miss, val) || case_missing )
- val = 0;
-
- metrics_calc(&totals[v], val, weight, case_no);
-
+ struct factor_metrics *metric = &result->metrics[v];
+ int n_vals = caseproto_get_n_widths (casereader_get_proto (
+ metric->up_reader));
+ struct order_stats *os;
+
+ metric->box_whisker =
+ box_whisker_create ( metric->tukey_hinges, cmd->v_id, n_vals - 1);
+
+ os = &metric->box_whisker->parent;
+ order_stats_accumulate ( &os, 1,
+ casereader_clone (metric->up_reader),
+ wv, dependent_vars[v], MV_ANY);
}
+ }
- factor_calc(&c, case_no, weight, case_missing);
+ ll_push_tail (&factor->result_list, &result->ll);
+ casereader_destroy (reader);
+}
- }
+static void
+run_examine (struct cmd_examine *cmd, struct casereader *input,
+ struct dataset *ds)
+{
+ struct ll *ll;
+ const struct dictionary *dict = dataset_dict (ds);
+ struct ccase *c;
+ struct casereader *level0 = casereader_clone (input);
- for ( v = 0 ; v < n_dependent_vars ; ++v)
+ c = casereader_peek (input, 0);
+ if (c == NULL)
{
- fctr = factors;
- while ( fctr )
- {
- struct hsh_iterator hi;
- struct factor_statistics *fs;
+ casereader_destroy (input);
+ return;
+ }
- for ( fs = hsh_first(fctr->fstats, &hi);
- fs != 0 ;
- fs = hsh_next(fctr->fstats, &hi))
- {
-
- fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list);
- fs->m[v].ptile_alg = percentile_algorithm;
- metrics_postcalc(&fs->m[v]);
- }
+ output_split_file_values (ds, c);
+ case_unref (c);
- fctr = fctr->next;
- }
+ ll_init (&level0_factor.result_list);
- totals[v].ptile_hash = list_to_ptile_hash(&percentile_list);
- totals[v].ptile_alg = percentile_algorithm;
- metrics_postcalc(&totals[v]);
- }
+ examine_group (cmd, level0, 0, dict, &level0_factor);
+ for (ll = ll_head (&factor_list);
+ ll != ll_null (&factor_list);
+ ll = ll_next (ll))
+ {
+ struct xfactor *factor = ll_data (ll, struct xfactor, ll);
- /* Make sure that the combination of factors are complete */
+ struct casereader *group = NULL;
+ struct casereader *level1;
+ struct casegrouper *grouper1 = NULL;
- fctr = factors;
- while ( fctr )
- {
- struct hsh_iterator hi;
- struct hsh_iterator hi0;
- struct hsh_iterator hi1;
- struct factor_statistics *fs;
-
- struct hsh_table *idh0=0;
- struct hsh_table *idh1=0;
- union value *val0;
- union value *val1;
-
- idh0 = hsh_create(4, (hsh_compare_func *) compare_values,
- (hsh_hash_func *) hash_value,
- 0,0);
-
- idh1 = hsh_create(4, (hsh_compare_func *) compare_values,
- (hsh_hash_func *) hash_value,
- 0,0);
-
-
- for ( fs = hsh_first(fctr->fstats, &hi);
- fs != 0 ;
- fs = hsh_next(fctr->fstats, &hi))
- {
- hsh_insert(idh0,(void *) &fs->id[0]);
- hsh_insert(idh1,(void *) &fs->id[1]);
- }
+ level1 = casereader_clone (input);
+ level1 = sort_execute_1var (level1, factor->indep_var[0]);
+ grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1);
- /* Ensure that the factors combination is complete */
- for ( val0 = hsh_first(idh0, &hi0);
- val0 != 0 ;
- val0 = hsh_next(idh0, &hi0))
+ while (casegrouper_get_next_group (grouper1, &group))
{
- for ( val1 = hsh_first(idh1, &hi1);
- val1 != 0 ;
- val1 = hsh_next(idh1, &hi1))
+ struct casereader *group_copy = casereader_clone (group);
+
+ if ( !factor->indep_var[1])
+ examine_group (cmd, group_copy, 1, dict, factor);
+ else
{
- struct factor_statistics **ffs;
- union value key[2];
- key[0] = *val0;
- key[1] = *val1;
-
- ffs = (struct factor_statistics **)
- hsh_probe(fctr->fstats, (void *) &key );
-
- if ( !*ffs ) {
- size_t i;
- (*ffs) = create_factor_statistics (n_dependent_vars,
- &key[0], &key[1]);
- for ( i = 0 ; i < n_dependent_vars ; ++i )
- metrics_precalc( &(*ffs)->m[i]);
- }
- }
- }
+ int n_groups = 0;
+ struct casereader *group2 = NULL;
+ struct casegrouper *grouper2 = NULL;
+
+ group_copy = sort_execute_1var (group_copy,
+ factor->indep_var[1]);
- hsh_destroy(idh0);
- hsh_destroy(idh1);
+ grouper2 = casegrouper_create_vars (group_copy,
+ &factor->indep_var[1], 1);
- fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats);
+ while (casegrouper_get_next_group (grouper2, &group2))
+ {
+ examine_group (cmd, group2, 2, dict, factor);
+ n_groups++;
+ }
+ casegrouper_destroy (grouper2);
+ }
- fctr = fctr->next;
+ casereader_destroy (group);
+ }
+ casegrouper_destroy (grouper1);
}
- output_examine();
+ casereader_destroy (input);
+ output_examine (dict);
- if ( totals )
- {
- size_t i;
- for ( i = 0 ; i < n_dependent_vars ; ++i )
- {
- metrics_destroy(&totals[i]);
- }
- }
+ factor_destroy (&level0_factor);
+
+ {
+ struct ll *ll;
+ for (ll = ll_head (&factor_list);
+ ll != ll_null (&factor_list);
+ ll = ll_next (ll))
+ {
+ struct xfactor *f = ll_data (ll, struct xfactor, ll);
+ factor_destroy (f);
+ }
+ }
- return true;
}
static void
-show_summary(struct variable **dependent_var, int n_dep_var,
- const struct factor *fctr)
+show_summary (const struct variable **dependent_var, int n_dep_var,
+ const struct dictionary *dict,
+ const struct xfactor *fctr)
{
+ const struct variable *wv = dict_get_weight (dict);
+ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0;
+
static const char *subtitle[]=
{
N_("Valid"),
N_("Total")
};
- int i;
- int heading_columns ;
+ int v, j;
+ int heading_columns = 1;
int n_cols;
const int heading_rows = 3;
struct tab_table *tbl;
int n_rows ;
- int n_factors = 1;
+ n_rows = n_dep_var;
+
+ assert (fctr);
- if ( fctr )
+ if ( fctr->indep_var[0] )
{
heading_columns = 2;
- n_factors = hsh_count(fctr->fstats);
- n_rows = n_dep_var * n_factors ;
if ( fctr->indep_var[1] )
- heading_columns = 3;
- }
- else
- {
- heading_columns = 1;
- n_rows = n_dep_var;
+ {
+ heading_columns = 3;
+ }
}
+ n_rows *= ll_count (&fctr->result_list);
n_rows += heading_rows;
n_cols = heading_columns + 6;
- tbl = tab_create (n_cols,n_rows,0);
+ tbl = tab_create (n_cols, n_rows);
tab_headers (tbl, heading_columns, 0, heading_rows, 0);
- tab_dim (tbl, tab_natural_dimensions);
-
/* Outline the box */
- tab_box (tbl,
+ tab_box (tbl,
TAL_2, TAL_2,
-1, -1,
0, 0,
n_cols - 1, n_rows - 1);
/* Vertical lines for the data only */
- tab_box (tbl,
+ tab_box (tbl,
-1, -1,
-1, TAL_1,
heading_columns, 0,
tab_title (tbl, _("Case Processing Summary"));
-
- tab_joint_text(tbl, heading_columns, 0,
- n_cols -1, 0,
- TAB_CENTER | TAT_TITLE,
- _("Cases"));
+ tab_joint_text (tbl, heading_columns, 0,
+ n_cols -1, 0,
+ TAB_CENTER | TAT_TITLE,
+ _("Cases"));
/* Remove lines ... */
- tab_box (tbl,
+ tab_box (tbl,
-1, -1,
TAL_0, TAL_0,
heading_columns, 0,
n_cols - 1, 0);
- for ( i = 0 ; i < 3 ; ++i )
+ for (j = 0 ; j < 3 ; ++j)
{
- tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE,
+ tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE,
_("N"));
- tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE,
+ tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
_("Percent"));
- tab_joint_text(tbl, heading_columns + i*2 , 1,
- heading_columns + i*2 + 1, 1,
- TAB_CENTER | TAT_TITLE,
- subtitle[i]);
+ tab_joint_text (tbl, heading_columns + j * 2 , 1,
+ heading_columns + j * 2 + 1, 1,
+ TAB_CENTER | TAT_TITLE,
+ subtitle[j]);
tab_box (tbl, -1, -1,
TAL_0, TAL_0,
- heading_columns + i*2, 1,
- heading_columns + i*2 + 1, 1);
-
+ heading_columns + j * 2, 1,
+ heading_columns + j * 2 + 1, 1);
}
/* Titles for the independent variables */
- if ( fctr )
+ if ( fctr->indep_var[0] )
{
- tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[0]));
+ tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string (fctr->indep_var[0]));
- if ( fctr->indep_var[1] )
+ if ( fctr->indep_var[1] )
{
- tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[1]));
+ tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
+ var_to_string (fctr->indep_var[1]));
}
-
}
-
- for ( i = 0 ; i < n_dep_var ; ++i )
+ for (v = 0 ; v < n_dep_var ; ++v)
{
- int n_factors = 1;
- if ( fctr )
- n_factors = hsh_count(fctr->fstats);
-
-
- if ( i > 0 )
- tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows);
-
- tab_text (tbl,
- 0, i * n_factors + heading_rows,
- TAB_LEFT | TAT_TITLE,
- var_to_string(dependent_var[i])
+ int j = 0;
+ struct ll *ll;
+ const union value *last_value = NULL;
+
+ if ( v > 0 )
+ tab_hline (tbl, TAL_1, 0, n_cols -1 ,
+ v * ll_count (&fctr->result_list)
+ + heading_rows);
+
+ tab_text (tbl,
+ 0,
+ v * ll_count (&fctr->result_list) + heading_rows,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string (dependent_var[v])
);
- if ( !fctr )
- populate_summary(tbl, heading_columns,
- (i * n_factors) + heading_rows,
- &totals[i]);
-
-
- else
+ for (ll = ll_head (&fctr->result_list);
+ ll != ll_null (&fctr->result_list); ll = ll_next (ll))
{
- struct factor_statistics **fs = fctr->fs;
- int count = 0 ;
+ double n;
+ const struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
- while (*fs)
+ if ( fctr->indep_var[0] )
{
- static union value prev;
-
- if ( 0 != compare_values(&prev, &(*fs)->id[0],
- fctr->indep_var[0]->width))
- {
- tab_text (tbl,
- 1,
- (i * n_factors ) + count +
- heading_rows,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[0], fctr->indep_var[0])
- );
-
- if (fctr->indep_var[1] && count > 0 )
- tab_hline(tbl, TAL_1, 1, n_cols - 1,
- (i * n_factors ) + count + heading_rows);
-
- }
-
- prev = (*fs)->id[0];
-
-
- if ( fctr->indep_var[1])
- tab_text (tbl,
- 2,
- (i * n_factors ) + count +
- heading_rows,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[1], fctr->indep_var[1])
- );
- populate_summary(tbl, heading_columns,
- (i * n_factors) + count
- + heading_rows,
- &(*fs)->m[i]);
-
- count++ ;
- fs++;
- }
- }
- }
+ if ( last_value == NULL ||
+ !value_equal (last_value, &result->value[0],
+ var_get_width (fctr->indep_var[0])))
+ {
+ struct string str;
- tab_submit (tbl);
-}
+ last_value = &result->value[0];
+ ds_init_empty (&str);
+ var_append_value_name (fctr->indep_var[0], &result->value[0],
+ &str);
-void
-populate_summary(struct tab_table *t, int col, int row,
- const struct metrics *m)
+ tab_text (tbl, 1,
+ heading_rows + j +
+ v * ll_count (&fctr->result_list),
+ TAB_LEFT | TAT_TITLE,
+ ds_cstr (&str));
-{
- const double total = m->n + m->n_missing ;
+ ds_destroy (&str);
- tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0);
- tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0);
- tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0);
+ if ( fctr->indep_var[1] && j > 0)
+ tab_hline (tbl, TAL_1, 1, n_cols - 1,
+ heading_rows + j +
+ v * ll_count (&fctr->result_list));
+ }
+ if ( fctr->indep_var[1])
+ {
+ struct string str;
- if ( total > 0 ) {
- tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
- 100.0 * m->n / total );
+ ds_init_empty (&str);
- tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
- 100.0 * m->n_missing / total );
+ var_append_value_name (fctr->indep_var[1],
+ &result->value[1], &str);
- /* This seems a bit pointless !!! */
- tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%",
- 100.0 * total / total );
+ tab_text (tbl, 2,
+ heading_rows + j +
+ v * ll_count (&fctr->result_list),
+ TAB_LEFT | TAT_TITLE,
+ ds_cstr (&str));
+ ds_destroy (&str);
+ }
+ }
- }
+ moments1_calculate (result->metrics[v].moments,
+ &n, &result->metrics[v].mean,
+ &result->metrics[v].variance,
+ &result->metrics[v].skewness,
+ &result->metrics[v].kurtosis);
+
+ result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ;
+
+ /* Total Valid */
+ tab_double (tbl, heading_columns,
+ heading_rows + j + v * ll_count (&fctr->result_list),
+ TAB_LEFT,
+ n, wfmt);
+
+ tab_text_format (tbl, heading_columns + 1,
+ heading_rows + j + v * ll_count (&fctr->result_list),
+ TAB_RIGHT,
+ "%g%%", n * 100.0 / result->metrics[v].n);
+
+ /* Total Missing */
+ tab_double (tbl, heading_columns + 2,
+ heading_rows + j + v * ll_count (&fctr->result_list),
+ TAB_LEFT,
+ result->metrics[v].n - n,
+ wfmt);
+
+ tab_text_format (tbl, heading_columns + 3,
+ heading_rows + j + v * ll_count (&fctr->result_list),
+ TAB_RIGHT,
+ "%g%%",
+ (result->metrics[v].n - n) * 100.0 / result->metrics[v].n
+ );
+
+ /* Total Valid + Missing */
+ tab_double (tbl, heading_columns + 4,
+ heading_rows + j + v * ll_count (&fctr->result_list),
+ TAB_LEFT,
+ result->metrics[v].n,
+ wfmt);
+
+ tab_text_format (tbl, heading_columns + 5,
+ heading_rows + j + v * ll_count (&fctr->result_list),
+ TAB_RIGHT,
+ "%g%%",
+ ((result->metrics[v].n) * 100.0
+ / result->metrics[v].n));
+
+ ++j;
+ }
+ }
-}
+ tab_submit (tbl);
+}
+#define DESCRIPTIVE_ROWS 13
-static void
-show_extremes(struct variable **dependent_var, int n_dep_var,
- const struct factor *fctr, int n_extremities)
+static void
+show_descriptives (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr)
{
- int i;
- int heading_columns ;
+ int v;
+ int heading_columns = 3;
int n_cols;
const int heading_rows = 1;
struct tab_table *tbl;
- int n_factors = 1;
int n_rows ;
+ n_rows = n_dep_var;
- if ( fctr )
- {
- heading_columns = 2;
- n_factors = hsh_count(fctr->fstats);
+ assert (fctr);
- n_rows = n_dep_var * 2 * n_extremities * n_factors;
+ if ( fctr->indep_var[0] )
+ {
+ heading_columns = 4;
if ( fctr->indep_var[1] )
- heading_columns = 3;
- }
- else
- {
- heading_columns = 1;
- n_rows = n_dep_var * 2 * n_extremities;
+ {
+ heading_columns = 5;
+ }
}
+ n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS;
n_rows += heading_rows;
- heading_columns += 2;
n_cols = heading_columns + 2;
- tbl = tab_create (n_cols,n_rows,0);
+ tbl = tab_create (n_cols, n_rows);
tab_headers (tbl, heading_columns, 0, heading_rows, 0);
- tab_dim (tbl, tab_natural_dimensions);
-
- /* Outline the box, No internal lines*/
- tab_box (tbl,
+ /* Outline the box */
+ tab_box (tbl,
TAL_2, TAL_2,
-1, -1,
0, 0,
n_cols - 1, n_rows - 1);
- tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
- tab_title (tbl, _("Extreme Values"));
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
+ tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
- tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1);
- tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1);
+ tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
- if ( fctr )
- {
- tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[0]));
- if ( fctr->indep_var[1] )
- tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[1]));
- }
+ if ( fctr->indep_var[0])
+ tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
- tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value"));
- tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number"));
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
- for ( i = 0 ; i < n_dep_var ; ++i )
+ for (v = 0 ; v < n_dep_var ; ++v )
{
+ struct ll *ll;
+ int i = 0;
- if ( i > 0 )
- tab_hline(tbl, TAL_1, 0, n_cols -1 ,
- i * 2 * n_extremities * n_factors + heading_rows);
-
- tab_text (tbl, 0,
- i * 2 * n_extremities * n_factors + heading_rows,
- TAB_LEFT | TAT_TITLE,
- var_to_string(dependent_var[i])
- );
-
+ const int row_var_start =
+ v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list);
- if ( !fctr )
- populate_extremes(tbl, heading_columns - 2,
- i * 2 * n_extremities * n_factors + heading_rows,
- n_extremities, &totals[i]);
+ tab_text (tbl,
+ 0,
+ heading_rows + row_var_start,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string (dependent_var[v])
+ );
- else
+ for (ll = ll_head (&fctr->result_list);
+ ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
{
- struct factor_statistics **fs = fctr->fs;
- int count = 0 ;
+ const struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
- while (*fs)
- {
- static union value prev ;
+ const double t =
+ gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0,
+ result->metrics[v].n - 1);
- const int row = heading_rows + ( 2 * n_extremities ) *
- ( ( i * n_factors ) + count );
-
-
- if ( 0 != compare_values(&prev, &(*fs)->id[0],
- fctr->indep_var[0]->width))
- {
-
- if ( count > 0 )
- tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
-
- tab_text (tbl,
- 1, row,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[0], fctr->indep_var[0])
- );
- }
-
- prev = (*fs)->id[0];
-
- if (fctr->indep_var[1] && count > 0 )
- tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
-
- if ( fctr->indep_var[1])
- tab_text (tbl, 2, row,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[1], fctr->indep_var[1])
- );
-
- populate_extremes(tbl, heading_columns - 2,
- row, n_extremities,
- &(*fs)->m[i]);
-
- count++ ;
- fs++;
+ if ( i > 0 || v > 0 )
+ {
+ const int left_col = (i == 0) ? 0 : 1;
+ tab_hline (tbl, TAL_1, left_col, n_cols - 1,
+ heading_rows + row_var_start + i * DESCRIPTIVE_ROWS);
}
- }
- }
-
- tab_submit(tbl);
-}
-
-
-
-/* Fill in the extremities table */
-void
-populate_extremes(struct tab_table *t,
- int col, int row, int n, const struct metrics *m)
-{
- int extremity;
- int idx=0;
-
-
- tab_text(t, col, row,
- TAB_RIGHT | TAT_TITLE ,
- _("Highest")
- );
- tab_text(t, col, row + n ,
- TAB_RIGHT | TAT_TITLE ,
- _("Lowest")
- );
-
-
- tab_hline(t, TAL_1, col, col + 3, row + n );
-
- for (extremity = 0; extremity < n ; ++extremity )
- {
- /* Highest */
- tab_float(t, col + 1, row + extremity,
- TAB_RIGHT,
- extremity + 1, 8, 0);
-
-
- /* Lowest */
- tab_float(t, col + 1, row + extremity + n,
- TAB_RIGHT,
- extremity + 1, 8, 0);
-
- }
-
-
- /* Lowest */
- for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx )
- {
- int j;
- const struct weighted_value *wv = m->wvp[idx];
- struct case_node *cn = wv->case_nos;
-
-
- for (j = 0 ; j < wv->w ; ++j )
- {
- if ( extremity + j >= n )
- break ;
+ if ( fctr->indep_var[0])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ &result->value[0], &vstr);
+
+ tab_text (tbl, 1,
+ heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ ds_cstr (&vstr)
+ );
- tab_float(t, col + 3, row + extremity + j + n,
- TAB_RIGHT,
- wv->v.f, 8, 2);
+ ds_destroy (&vstr);
+ }
- tab_float(t, col + 2, row + extremity + j + n,
- TAB_RIGHT,
- cn->num, 8, 0);
- if ( cn->next )
- cn = cn->next;
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Mean"));
+
+ tab_text_format (tbl, n_cols - 4,
+ heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("%g%% Confidence Interval for Mean"),
+ cmd.n_cinterval[0]);
+
+ tab_text (tbl, n_cols - 3,
+ heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Lower Bound"));
+ tab_text (tbl, n_cols - 3,
+ heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Upper Bound"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT, _("5% Trimmed Mean"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Median"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Variance"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Std. Deviation"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Minimum"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Maximum"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Range"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Interquartile Range"));
+
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Skewness"));
+
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
+ TAB_LEFT,
+ _("Kurtosis"));
+
+
+ /* Now the statistics ... */
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].mean,
+ NULL);
+
+ tab_double (tbl, n_cols - 1,
+ heading_rows + row_var_start + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].se_mean,
+ NULL);
+
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].mean - t *
+ result->metrics[v].se_mean,
+ NULL);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].mean + t *
+ result->metrics[v].se_mean,
+ NULL);
+
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ trimmed_mean_calculate (result->metrics[v].trimmed_mean),
+ NULL);
+
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm),
+ NULL);
+
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].variance,
+ NULL);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ sqrt (result->metrics[v].variance),
+ NULL);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ percentile_calculate (result->metrics[v].quartiles[2],
+ percentile_algorithm) -
+ percentile_calculate (result->metrics[v].quartiles[0],
+ percentile_algorithm),
+ NULL);
+
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].skewness,
+ NULL);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ result->metrics[v].kurtosis,
+ NULL);
+
+ tab_double (tbl, n_cols - 1,
+ heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ calc_seskew (result->metrics[v].n),
+ NULL);
+
+ tab_double (tbl, n_cols - 1,
+ heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ calc_sekurt (result->metrics[v].n),
+ NULL);
+
+ {
+ struct extremum *minimum, *maximum ;
+
+ struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima));
+ struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima));
+
+ maximum = ll_data (max_ll, struct extremum, ll);
+ minimum = ll_data (min_ll, struct extremum, ll);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ minimum->value,
+ NULL);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ maximum->value,
+ NULL);
+
+ tab_double (tbl, n_cols - 2,
+ heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS,
+ TAB_CENTER,
+ maximum->value - minimum->value,
+ NULL);
+ }
}
-
- extremity += wv->w ;
}
+ tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
- /* Highest */
- for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx )
- {
- int j;
- const struct weighted_value *wv = m->wvp[idx];
- struct case_node *cn = wv->case_nos;
-
- for (j = 0 ; j < wv->w ; ++j )
- {
- if ( extremity + j >= n )
- break ;
-
- tab_float(t, col + 3, row + extremity + j,
- TAB_RIGHT,
- wv->v.f, 8, 2);
-
- tab_float(t, col + 2, row + extremity + j,
- TAB_RIGHT,
- cn->num, 8, 0);
+ tab_title (tbl, _("Descriptives"));
- if ( cn->next )
- cn = cn->next;
+ tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
+ _("Statistic"));
- }
+ tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
+ _("Std. Error"));
- extremity += wv->w ;
- }
+ tab_submit (tbl);
}
-/* Show the descriptives table */
-void
-show_descriptives(struct variable **dependent_var,
- int n_dep_var,
- struct factor *fctr)
+
+static void
+show_extremes (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr)
{
- int i;
- int heading_columns ;
+ int v;
+ int heading_columns = 3;
int n_cols;
- const int n_stat_rows = 13;
-
const int heading_rows = 1;
-
struct tab_table *tbl;
- int n_factors = 1;
int n_rows ;
+ n_rows = n_dep_var;
+
+ assert (fctr);
- if ( fctr )
+ if ( fctr->indep_var[0] )
{
heading_columns = 4;
- n_factors = hsh_count(fctr->fstats);
-
- n_rows = n_dep_var * n_stat_rows * n_factors;
if ( fctr->indep_var[1] )
- heading_columns = 5;
- }
- else
- {
- heading_columns = 3;
- n_rows = n_dep_var * n_stat_rows;
+ {
+ heading_columns = 5;
+ }
}
+ n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2;
n_rows += heading_rows;
n_cols = heading_columns + 2;
+ tbl = tab_create (n_cols, n_rows);
+ tab_headers (tbl, heading_columns, 0, heading_rows, 0);
- tbl = tab_create (n_cols, n_rows, 0);
-
- tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0);
-
- tab_dim (tbl, tab_natural_dimensions);
-
- /* Outline the box and have no internal lines*/
- tab_box (tbl,
+ /* Outline the box */
+ tab_box (tbl,
TAL_2, TAL_2,
-1, -1,
0, 0,
n_cols - 1, n_rows - 1);
- tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
- tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1);
- tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1);
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
+ tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1);
- tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic"));
- tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error"));
-
- tab_title (tbl, _("Descriptives"));
+ if ( fctr->indep_var[0])
+ tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0]));
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1]));
- for ( i = 0 ; i < n_dep_var ; ++i )
+ for (v = 0 ; v < n_dep_var ; ++v )
{
- const int row = heading_rows + i * n_stat_rows * n_factors ;
-
- if ( i > 0 )
- tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
-
- tab_text (tbl, 0,
- i * n_stat_rows * n_factors + heading_rows,
- TAB_LEFT | TAT_TITLE,
- var_to_string(dependent_var[i])
+ struct ll *ll;
+ int i = 0;
+ const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list);
+
+ tab_text (tbl,
+ 0,
+ heading_rows + row_var_start,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string (dependent_var[v])
);
-
- if ( fctr )
+ for (ll = ll_head (&fctr->result_list);
+ ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
{
- struct factor_statistics **fs = fctr->fs;
- int count = 0;
+ int e ;
+ struct ll *min_ll;
+ struct ll *max_ll;
+ const int row_result_start = i * cmd.st_n * 2;
- tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[0]));
+ const struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
+ if (i > 0 || v > 0)
+ tab_hline (tbl, TAL_1, 1, n_cols - 1,
+ heading_rows + row_var_start + row_result_start);
- if ( fctr->indep_var[1])
- tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[1]));
+ tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1,
+ heading_rows + row_var_start + row_result_start + cmd.st_n);
- while( *fs )
+ for ( e = 1; e <= cmd.st_n; ++e )
{
+ tab_text_format (tbl, n_cols - 3,
+ heading_rows + row_var_start + row_result_start + e - 1,
+ TAB_RIGHT,
+ "%d", e);
+
+ tab_text_format (tbl, n_cols - 3,
+ heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1,
+ TAB_RIGHT,
+ "%d", e);
+ }
- static union value prev ;
-
- const int row = heading_rows + n_stat_rows *
- ( ( i * n_factors ) + count );
+ min_ll = ll_head (extrema_list (result->metrics[v].minima));
+ for (e = 0; e < cmd.st_n;)
+ {
+ struct extremum *minimum = ll_data (min_ll, struct extremum, ll);
+ double weight = minimum->weight;
- if ( 0 != compare_values(&prev, &(*fs)->id[0],
- fctr->indep_var[0]->width))
+ while (weight-- > 0 && e < cmd.st_n)
{
-
- if ( count > 0 )
- tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
-
- tab_text (tbl,
- 1, row,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[0], fctr->indep_var[0])
- );
+ tab_double (tbl, n_cols - 1,
+ heading_rows + row_var_start + row_result_start + cmd.st_n + e,
+ TAB_RIGHT,
+ minimum->value,
+ NULL);
+
+
+ tab_fixed (tbl, n_cols - 2,
+ heading_rows + row_var_start +
+ row_result_start + cmd.st_n + e,
+ TAB_RIGHT,
+ minimum->location,
+ 10, 0);
+ ++e;
}
- prev = (*fs)->id[0];
-
- if (fctr->indep_var[1] && count > 0 )
- tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
-
- if ( fctr->indep_var[1])
- tab_text (tbl, 2, row,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[1], fctr->indep_var[1])
- );
-
- populate_descriptives(tbl, heading_columns - 2,
- row, &(*fs)->m[i]);
-
- count++ ;
- fs++;
+ min_ll = ll_next (min_ll);
}
- }
-
- else
- {
-
- populate_descriptives(tbl, heading_columns - 2,
- i * n_stat_rows * n_factors + heading_rows,
- &totals[i]);
- }
- }
-
- tab_submit(tbl);
-
-}
-
-
-
-
-/* Fill in the descriptives data */
-void
-populate_descriptives(struct tab_table *tbl, int col, int row,
- const struct metrics *m)
-{
-
- const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \
- m->n -1);
-
-
- tab_text (tbl, col,
- row,
- TAB_LEFT | TAT_TITLE,
- _("Mean"));
-
- tab_float (tbl, col + 2,
- row,
- TAB_CENTER,
- m->mean,
- 8,2);
-
- tab_float (tbl, col + 3,
- row,
- TAB_CENTER,
- m->se_mean,
- 8,3);
-
-
- tab_text (tbl, col,
- row + 1,
- TAB_LEFT | TAT_TITLE | TAT_PRINTF,
- _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]);
-
-
- tab_text (tbl, col + 1,
- row + 1,
- TAB_LEFT | TAT_TITLE,
- _("Lower Bound"));
-
- tab_float (tbl, col + 2,
- row + 1,
- TAB_CENTER,
- m->mean - t * m->se_mean,
- 8,3);
-
- tab_text (tbl, col + 1,
- row + 2,
- TAB_LEFT | TAT_TITLE,
- _("Upper Bound"));
-
-
- tab_float (tbl, col + 2,
- row + 2,
- TAB_CENTER,
- m->mean + t * m->se_mean,
- 8,3);
-
- tab_text (tbl, col,
- row + 3,
- TAB_LEFT | TAT_TITLE | TAT_PRINTF,
- _("5%% Trimmed Mean"));
-
- tab_float (tbl, col + 2,
- row + 3,
- TAB_CENTER,
- m->trimmed_mean,
- 8,2);
-
- tab_text (tbl, col,
- row + 4,
- TAB_LEFT | TAT_TITLE,
- _("Median"));
-
- {
- struct percentile *p;
- double d = 50;
-
- p = hsh_find(m->ptile_hash, &d);
-
- assert(p);
-
-
- tab_float (tbl, col + 2,
- row + 4,
- TAB_CENTER,
- p->v,
- 8, 2);
- }
-
-
- tab_text (tbl, col,
- row + 5,
- TAB_LEFT | TAT_TITLE,
- _("Variance"));
-
- tab_float (tbl, col + 2,
- row + 5,
- TAB_CENTER,
- m->var,
- 8,3);
-
-
- tab_text (tbl, col,
- row + 6,
- TAB_LEFT | TAT_TITLE,
- _("Std. Deviation"));
-
-
- tab_float (tbl, col + 2,
- row + 6,
- TAB_CENTER,
- m->stddev,
- 8,3);
-
-
- tab_text (tbl, col,
- row + 7,
- TAB_LEFT | TAT_TITLE,
- _("Minimum"));
-
- tab_float (tbl, col + 2,
- row + 7,
- TAB_CENTER,
- m->min,
- 8,3);
-
- tab_text (tbl, col,
- row + 8,
- TAB_LEFT | TAT_TITLE,
- _("Maximum"));
-
- tab_float (tbl, col + 2,
- row + 8,
- TAB_CENTER,
- m->max,
- 8,3);
-
-
- tab_text (tbl, col,
- row + 9,
- TAB_LEFT | TAT_TITLE,
- _("Range"));
-
-
- tab_float (tbl, col + 2,
- row + 9,
- TAB_CENTER,
- m->max - m->min,
- 8,3);
-
- tab_text (tbl, col,
- row + 10,
- TAB_LEFT | TAT_TITLE,
- _("Interquartile Range"));
-
- {
- struct percentile *p1;
- struct percentile *p2;
-
- double d = 75;
- p1 = hsh_find(m->ptile_hash, &d);
-
- d = 25;
- p2 = hsh_find(m->ptile_hash, &d);
-
- assert(p1);
- assert(p2);
-
- tab_float (tbl, col + 2,
- row + 10,
- TAB_CENTER,
- p1->v - p2->v,
- 8, 2);
- }
-
-
-
- tab_text (tbl, col,
- row + 11,
- TAB_LEFT | TAT_TITLE,
- _("Skewness"));
-
-
- tab_float (tbl, col + 2,
- row + 11,
- TAB_CENTER,
- m->skewness,
- 8,3);
-
- /* stderr of skewness */
- tab_float (tbl, col + 3,
- row + 11,
- TAB_CENTER,
- calc_seskew(m->n),
- 8,3);
-
-
- tab_text (tbl, col,
- row + 12,
- TAB_LEFT | TAT_TITLE,
- _("Kurtosis"));
-
-
- tab_float (tbl, col + 2,
- row + 12,
- TAB_CENTER,
- m->kurtosis,
- 8,3);
-
- /* stderr of kurtosis */
- tab_float (tbl, col + 3,
- row + 12,
- TAB_CENTER,
- calc_sekurt(m->n),
- 8,3);
-
-
-}
-
-
-
-void
-box_plot_variables(const struct factor *fctr,
- const struct variable **vars, int n_vars,
- const struct variable *id)
-{
-
- int i;
- struct factor_statistics **fs ;
+ max_ll = ll_head (extrema_list (result->metrics[v].maxima));
+ for (e = 0; e < cmd.st_n;)
+ {
+ struct extremum *maximum = ll_data (max_ll, struct extremum, ll);
+ double weight = maximum->weight;
- if ( ! fctr )
- {
- box_plot_group(fctr, vars, n_vars, id);
- return;
- }
+ while (weight-- > 0 && e < cmd.st_n)
+ {
+ tab_double (tbl, n_cols - 1,
+ heading_rows + row_var_start +
+ row_result_start + e,
+ TAB_RIGHT,
+ maximum->value,
+ NULL);
+
+
+ tab_fixed (tbl, n_cols - 2,
+ heading_rows + row_var_start +
+ row_result_start + e,
+ TAB_RIGHT,
+ maximum->location,
+ 10, 0);
+ ++e;
+ }
- for ( fs = fctr->fs ; *fs ; ++fs )
- {
- double y_min = DBL_MAX;
- double y_max = -DBL_MAX;
- struct chart *ch = chart_create();
- const char *s = factor_to_string(fctr, *fs, 0 );
+ max_ll = ll_next (max_ll);
+ }
- chart_write_title(ch, s);
- for ( i = 0 ; i < n_vars ; ++i )
- {
- y_max = max(y_max, (*fs)->m[i].max);
- y_min = min(y_min, (*fs)->m[i].min);
- }
-
- boxplot_draw_yscale(ch, y_max, y_min);
-
- for ( i = 0 ; i < n_vars ; ++i )
- {
+ if ( fctr->indep_var[0])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ &result->value[0], &vstr);
+
+ tab_text (tbl, 1,
+ heading_rows + row_var_start + row_result_start,
+ TAB_LEFT,
+ ds_cstr (&vstr)
+ );
- const double box_width = (ch->data_right - ch->data_left)
- / (n_vars * 2.0 ) ;
+ ds_destroy (&vstr);
+ }
- const double box_centre = ( i * 2 + 1) * box_width
- + ch->data_left;
-
- boxplot_draw_boxplot(ch,
- box_centre, box_width,
- &(*fs)->m[i],
- var_to_string(vars[i]));
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + row_result_start,
+ TAB_RIGHT,
+ _("Highest"));
+ tab_text (tbl, n_cols - 4,
+ heading_rows + row_var_start + row_result_start + cmd.st_n,
+ TAB_RIGHT,
+ _("Lowest"));
}
-
- chart_submit(ch);
-
}
-}
-
-
-
-/* Do a box plot, grouping all factors into one plot ;
- each dependent variable has its own plot.
-*/
-void
-box_plot_group(const struct factor *fctr,
- const struct variable **vars,
- int n_vars,
- const struct variable *id UNUSED)
-{
-
- int i;
- for ( i = 0 ; i < n_vars ; ++i )
- {
- struct factor_statistics **fs ;
- struct chart *ch;
-
- ch = chart_create();
+ tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
- boxplot_draw_yscale(ch, totals[i].max, totals[i].min);
- if ( fctr )
- {
- int n_factors = 0;
- int f=0;
- for ( fs = fctr->fs ; *fs ; ++fs )
- ++n_factors;
+ tab_title (tbl, _("Extreme Values"));
- chart_write_title(ch, _("Boxplot of %s vs. %s"),
- var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) );
- for ( fs = fctr->fs ; *fs ; ++fs )
- {
-
- const char *s = factor_to_string_concise(fctr, *fs);
-
- const double box_width = (ch->data_right - ch->data_left)
- / (n_factors * 2.0 ) ;
-
- const double box_centre = ( f++ * 2 + 1) * box_width
- + ch->data_left;
-
- boxplot_draw_boxplot(ch,
- box_centre, box_width,
- &(*fs)->m[i],
- s);
- }
- }
- else if ( ch )
- {
- const double box_width = (ch->data_right - ch->data_left) / 3.0;
- const double box_centre = (ch->data_right + ch->data_left) / 2.0;
+ tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE,
+ _("Case Number"));
- chart_write_title(ch, _("Boxplot"));
- boxplot_draw_boxplot(ch,
- box_centre, box_width,
- &totals[i],
- var_to_string(vars[i]) );
-
- }
+ tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE,
+ _("Value"));
- chart_submit(ch);
- }
+ tab_submit (tbl);
}
+#define PERCENTILE_ROWS 2
-/* Plot the normal and detrended normal plots for m
- Label the plots with factorname */
-void
-np_plot(const struct metrics *m, const char *factorname)
+static void
+show_percentiles (const struct variable **dependent_var,
+ int n_dep_var,
+ const struct xfactor *fctr)
{
int i;
- double yfirst=0, ylast=0;
-
- /* Normal Plot */
- struct chart *np_chart;
-
- /* Detrended Normal Plot */
- struct chart *dnp_chart;
-
- /* The slope and intercept of the ideal normal probability line */
- const double slope = 1.0 / m->stddev;
- const double intercept = - m->mean / m->stddev;
-
- /* Cowardly refuse to plot an empty data set */
- if ( m->n_data == 0 )
- return ;
-
- np_chart = chart_create();
- dnp_chart = chart_create();
-
- if ( !np_chart || ! dnp_chart )
- return ;
-
- chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname);
- chart_write_xlabel(np_chart, _("Observed Value"));
- chart_write_ylabel(np_chart, _("Expected Normal"));
-
-
- chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"),
- factorname);
- chart_write_xlabel(dnp_chart, _("Observed Value"));
- chart_write_ylabel(dnp_chart, _("Dev from Normal"));
-
- yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1));
- ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1));
-
-
- {
- /* Need to make sure that both the scatter plot and the ideal fit into the
- plot */
- double x_lower = min(m->min, (yfirst - intercept) / slope) ;
- double x_upper = max(m->max, (ylast - intercept) / slope) ;
- double slack = (x_upper - x_lower) * 0.05 ;
-
- chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5);
-
- chart_write_xscale(dnp_chart, m->min, m->max, 5);
-
- }
-
- chart_write_yscale(np_chart, yfirst, ylast, 5);
-
- {
- /* We have to cache the detrended data, beacause we need to
- find its limits before we can plot it */
- double *d_data = xnmalloc (m->n_data, sizeof *d_data);
- double d_max = -DBL_MAX;
- double d_min = DBL_MAX;
- for ( i = 0 ; i < m->n_data; ++i )
- {
- const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1));
-
- chart_datum(np_chart, 0, m->wvp[i]->v.f, ns);
-
- d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns;
-
- if ( d_data[i] < d_min ) d_min = d_data[i];
- if ( d_data[i] > d_max ) d_max = d_data[i];
- }
- chart_write_yscale(dnp_chart, d_min, d_max, 5);
-
- for ( i = 0 ; i < m->n_data; ++i )
- chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]);
-
- free(d_data);
- }
-
- chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y);
- chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X);
-
- chart_submit(np_chart);
- chart_submit(dnp_chart);
-}
-
-
-
-
-/* Show the percentiles */
-void
-show_percentiles(struct variable **dependent_var,
- int n_dep_var,
- struct factor *fctr)
-{
+ int v;
+ int heading_columns = 2;
+ int n_cols;
+ const int n_percentiles = subc_list_double_count (&percentile_list);
+ const int heading_rows = 2;
struct tab_table *tbl;
- int i;
-
- int n_cols, n_rows;
- int n_factors;
- struct hsh_table *ptiles ;
-
- int n_heading_columns;
- const int n_heading_rows = 2;
- const int n_stat_rows = 2;
+ int n_rows ;
+ n_rows = n_dep_var;
- int n_ptiles ;
+ assert (fctr);
- if ( fctr )
+ if ( fctr->indep_var[0] )
{
- struct factor_statistics **fs = fctr->fs ;
- n_heading_columns = 3;
- n_factors = hsh_count(fctr->fstats);
-
- ptiles = (*fs)->m[0].ptile_hash;
+ heading_columns = 3;
if ( fctr->indep_var[1] )
- n_heading_columns = 4;
- }
- else
- {
- n_factors = 1;
- n_heading_columns = 2;
-
- ptiles = totals[0].ptile_hash;
+ {
+ heading_columns = 4;
+ }
}
- n_ptiles = hsh_count(ptiles);
-
- n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors;
-
- n_cols = n_heading_columns + n_ptiles ;
-
- tbl = tab_create (n_cols, n_rows, 0);
+ n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS;
+ n_rows += heading_rows;
- tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0);
+ n_cols = heading_columns + n_percentiles;
- tab_dim (tbl, tab_natural_dimensions);
+ tbl = tab_create (n_cols, n_rows);
+ tab_headers (tbl, heading_columns, 0, heading_rows, 0);
- /* Outline the box and have no internal lines*/
- tab_box (tbl,
+ /* Outline the box */
+ tab_box (tbl,
TAL_2, TAL_2,
-1, -1,
0, 0,
n_cols - 1, n_rows - 1);
- tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows );
-
- tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1);
-
-
- tab_title (tbl, _("Percentiles"));
-
-
- tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 );
-
-
- tab_box (tbl,
- -1, -1,
- -1, TAL_1,
- 0, n_heading_rows,
- n_heading_columns - 1, n_rows - 1);
-
-
- tab_box (tbl,
- -1, -1,
- -1, TAL_1,
- n_heading_columns, n_heading_rows - 1,
- n_cols - 1, n_rows - 1);
-
- tab_joint_text(tbl, n_heading_columns + 1, 0,
- n_cols - 1 , 0,
- TAB_CENTER | TAT_TITLE ,
- _("Percentiles"));
-
-
- {
- /* Put in the percentile break points as headings */
- struct percentile **p = (struct percentile **) hsh_sort(ptiles);
+ tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows );
+ tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows );
- i = 0;
- while ( (*p) )
- {
- tab_float(tbl, n_heading_columns + i++ , 1,
- TAB_CENTER,
- (*p)->p, 8, 0);
-
- p++;
- }
+ if ( fctr->indep_var[0])
+ tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0]));
- }
+ if ( fctr->indep_var[1])
+ tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1]));
- for ( i = 0 ; i < n_dep_var ; ++i )
+ for (v = 0 ; v < n_dep_var ; ++v )
{
- const int n_stat_rows = 2;
- const int row = n_heading_rows + i * n_stat_rows * n_factors ;
-
- if ( i > 0 )
- tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
-
- tab_text (tbl, 0,
- i * n_stat_rows * n_factors + n_heading_rows,
- TAB_LEFT | TAT_TITLE,
- var_to_string(dependent_var[i])
+ double hinges[3];
+ struct ll *ll;
+ int i = 0;
+
+ const int row_var_start =
+ v * PERCENTILE_ROWS * ll_count(&fctr->result_list);
+
+ tab_text (tbl,
+ 0,
+ heading_rows + row_var_start,
+ TAB_LEFT | TAT_TITLE,
+ var_to_string (dependent_var[v])
);
- if ( fctr )
+ for (ll = ll_head (&fctr->result_list);
+ ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll))
{
- struct factor_statistics **fs = fctr->fs;
- int count = 0;
+ int j;
+ const struct factor_result *result =
+ ll_data (ll, struct factor_result, ll);
- tab_text (tbl, 1, n_heading_rows - 1,
- TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[0]));
-
-
- if ( fctr->indep_var[1])
- tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE,
- var_to_string(fctr->indep_var[1]));
-
- while( *fs )
+ if ( i > 0 || v > 0 )
{
+ const int left_col = (i == 0) ? 0 : 1;
+ tab_hline (tbl, TAL_1, left_col, n_cols - 1,
+ heading_rows + row_var_start + i * PERCENTILE_ROWS);
+ }
- static union value prev ;
-
- const int row = n_heading_rows + n_stat_rows *
- ( ( i * n_factors ) + count );
-
-
- if ( 0 != compare_values(&prev, &(*fs)->id[0],
- fctr->indep_var[0]->width))
- {
-
- if ( count > 0 )
- tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
+ if ( fctr->indep_var[0])
+ {
+ struct string vstr;
+ ds_init_empty (&vstr);
+ var_append_value_name (fctr->indep_var[0],
+ &result->value[0], &vstr);
+
+ tab_text (tbl, 1,
+ heading_rows + row_var_start + i * PERCENTILE_ROWS,
+ TAB_LEFT,
+ ds_cstr (&vstr)
+ );
- tab_text (tbl,
- 1, row,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[0], fctr->indep_var[0])
- );
+ ds_destroy (&vstr);
+ }
- }
+ tab_text (tbl, n_cols - n_percentiles - 1,
+ heading_rows + row_var_start + i * PERCENTILE_ROWS,
+ TAB_LEFT,
+ ptile_alg_desc [percentile_algorithm]);
- prev = (*fs)->id[0];
- if (fctr->indep_var[1] && count > 0 )
- tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
+ tab_text (tbl, n_cols - n_percentiles - 1,
+ heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
+ TAB_LEFT,
+ _("Tukey's Hinges"));
- if ( fctr->indep_var[1])
- tab_text (tbl, 2, row,
- TAB_LEFT | TAT_TITLE,
- value_to_string(&(*fs)->id[1], fctr->indep_var[1])
- );
+ tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1);
- populate_percentiles(tbl, n_heading_columns - 1,
- row, &(*fs)->m[i]);
+ tukey_hinges_calculate (result->metrics[v].tukey_hinges, hinges);
+ for (j = 0; j < n_percentiles; ++j)
+ {
+ double hinge = SYSMIS;
+ tab_double (tbl, n_cols - n_percentiles + j,
+ heading_rows + row_var_start + i * PERCENTILE_ROWS,
+ TAB_CENTER,
+ percentile_calculate (result->metrics[v].ptl[j],
+ percentile_algorithm),
+ NULL
+ );
+
+ if ( result->metrics[v].ptl[j]->ptile == 0.5)
+ hinge = hinges[1];
+ else if ( result->metrics[v].ptl[j]->ptile == 0.25)
+ hinge = hinges[0];
+ else if ( result->metrics[v].ptl[j]->ptile == 0.75)
+ hinge = hinges[2];
+
+ if ( hinge != SYSMIS)
+ tab_double (tbl, n_cols - n_percentiles + j,
+ heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS,
+ TAB_CENTER,
+ hinge,
+ NULL
+ );
- count++ ;
- fs++;
}
-
-
- }
- else
- {
- populate_percentiles(tbl, n_heading_columns - 1,
- i * n_stat_rows * n_factors + n_heading_rows,
- &totals[i]);
}
-
-
}
+ tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1);
- tab_submit(tbl);
-
+ tab_title (tbl, _("Percentiles"));
-}
+ for (i = 0 ; i < n_percentiles; ++i )
+ {
+ tab_text_format (tbl, n_cols - n_percentiles + i, 1,
+ TAB_CENTER | TAT_TITLE,
+ _("%g"),
+ subc_list_double_at (&percentile_list, i));
+ }
-void
-populate_percentiles(struct tab_table *tbl, int col, int row,
- const struct metrics *m)
-{
- int i;
+ tab_joint_text (tbl,
+ n_cols - n_percentiles, 0,
+ n_cols - 1, 0,
+ TAB_CENTER | TAT_TITLE,
+ _("Percentiles"));
- struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash);
-
- tab_text (tbl,
- col, row + 1,
- TAB_LEFT | TAT_TITLE,
- _("Tukey\'s Hinges")
- );
+ /* Vertical lines for the data only */
+ tab_box (tbl,
+ -1, -1,
+ -1, TAL_1,
+ n_cols - n_percentiles, 1,
+ n_cols - 1, n_rows - 1);
- tab_text (tbl,
- col, row,
- TAB_LEFT | TAT_TITLE,
- ptile_alg_desc[m->ptile_alg]
- );
+ tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1);
- i = 0;
- while ( (*p) )
- {
- tab_float(tbl, col + i + 1 , row,
- TAB_CENTER,
- (*p)->v, 8, 2);
- if ( (*p)->p == 25 )
- tab_float(tbl, col + i + 1 , row + 1,
- TAB_CENTER,
- m->hinge[0], 8, 2);
+ tab_submit (tbl);
+}
- if ( (*p)->p == 50 )
- tab_float(tbl, col + i + 1 , row + 1,
- TAB_CENTER,
- m->hinge[1], 8, 2);
- if ( (*p)->p == 75 )
- tab_float(tbl, col + i + 1 , row + 1,
- TAB_CENTER,
- m->hinge[2], 8, 2);
+static void
+factor_to_string_concise (const struct xfactor *fctr,
+ const struct factor_result *result,
+ struct string *str
+ )
+{
+ if (fctr->indep_var[0])
+ {
+ var_append_value_name (fctr->indep_var[0], &result->value[0], str);
+ if ( fctr->indep_var[1] )
+ {
+ ds_put_cstr (str, ",");
- i++;
+ var_append_value_name (fctr->indep_var[1], &result->value[1], str);
- p++;
+ ds_put_cstr (str, ")");
+ }
}
-
}
-
-const char *
-factor_to_string(const struct factor *fctr,
- struct factor_statistics *fs,
- const struct variable *var)
+static void
+factor_to_string (const struct xfactor *fctr,
+ const struct factor_result *result,
+ struct string *str
+ )
{
+ if (fctr->indep_var[0])
+ {
+ ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0]));
- static char buf1[100];
- char buf2[100];
-
- strcpy(buf1,"");
+ var_append_value_name (fctr->indep_var[0], &result->value[0], str);
- if (var)
- sprintf(buf1, "%s (",var_to_string(var) );
+ if ( fctr->indep_var[1] )
+ {
+ ds_put_cstr (str, ",");
+ ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1]));
-
- snprintf(buf2, 100, "%s = %s",
- var_to_string(fctr->indep_var[0]),
- value_to_string(&fs->id[0],fctr->indep_var[0]));
-
- strcat(buf1, buf2);
-
- if ( fctr->indep_var[1] )
- {
- sprintf(buf2, "; %s = %s)",
- var_to_string(fctr->indep_var[1]),
- value_to_string(&fs->id[1],
- fctr->indep_var[1]));
- strcat(buf1, buf2);
- }
- else
- {
- if ( var )
- strcat(buf1, ")");
+ var_append_value_name (fctr->indep_var[1], &result->value[1], str);
+ }
+ ds_put_cstr (str, ")");
}
-
- return buf1;
}
-const char *
-factor_to_string_concise(const struct factor *fctr,
- struct factor_statistics *fs)
-
-{
-
- static char buf[100];
-
- char buf2[100];
- snprintf(buf, 100, "%s",
- value_to_string(&fs->id[0], fctr->indep_var[0]));
-
- if ( fctr->indep_var[1] )
- {
- sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) );
- strcat(buf, buf2);
- }
-
-
- return buf;
-}
+/*
+ Local Variables:
+ mode: c
+ End:
+*/