X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.q;h=e8333b1ceeaa5257d922b9be4700bc6aec8ef7d1;hb=2cf38ce51a9f34961d68a75e0b312a591b5c9abf;hp=940a4391fc733734a91965f4f988ce8a1a6b7142;hpb=dcf9b154cbcaa35c3d8459a201b77eec8bcb30bd;p=pspp-builds.git diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 940a4391..e8333b1c 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,66 +1,77 @@ -/* PSPP - EXAMINE data for normality . -*-c-*- +/* PSPP - a program for statistical analysis. + Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc. -Copyright (C) 2004 Free Software Foundation, Inc. -Author: John Darrington 2004 + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #include + #include -#include "message.h" +#include +#include #include #include -#include -#include "alloc.h" -#include "str.h" -#include "case.h" -#include "dictionary.h" -#include "command.h" -#include "lexer.h" -#include "message.h" -#include "magic.h" -#include "misc.h" -#include "table.h" -#include "manager.h" -#include "value-labels.h" -#include "variable.h" -#include "procedure.h" -#include "hash.h" -#include "casefile.h" -#include "factor-stats.h" -#include "moments.h" -#include "percentiles.h" -#include "box-whisker.h" -#include "cartesian.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "minmax.h" +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid /* (headers) */ -#include "chart.h" -#include "plot-hist.h" -#include "plot-chart.h" +#include +#include +#include +#include /* (specification) "EXAMINE" (xmn_): *^variables=custom; +total=custom; +nototal=custom; - +missing=miss:pairwise/!listwise, + missing=miss:pairwise/!listwise, rep:report/!noreport, incl:include/!exclude; +compare=cmp:variables/!groups; @@ -76,393 +87,660 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA /* (functions) */ - static struct cmd_examine cmd; -static struct variable **dependent_vars; - +static const struct variable **dependent_vars; static size_t n_dependent_vars; +/* PERCENTILES */ + +static subc_list_double percentile_list; +static enum pc_alg percentile_algorithm; -struct factor +struct factor_metrics { - /* The independent variable */ - struct variable *indep_var[2]; + struct moments1 *moments; + struct percentile **ptl; + size_t n_ptiles; - /* Hash table of factor stats indexed by 2 values */ - struct hsh_table *fstats; + struct statistic *tukey_hinges; + struct statistic *box_whisker; + struct statistic *trimmed_mean; + struct statistic *histogram; + struct order_stats *np; - /* The hash table after it has been crunched */ - struct factor_statistics **fs; + /* Three quartiles indexing into PTL */ + struct percentile **quartiles; - struct factor *next; + /* A reader sorted in ASCENDING order */ + struct casereader *up_reader; -}; + /* The minimum value of all the weights */ + double cmin; -/* Linked list of factors */ -static struct factor *factors=0; + /* Sum of all weights, including those for missing values */ + double n; -static struct metrics *totals=0; + /* Sum of weights of non_missing values */ + double n_valid; -/* Parse the clause specifying the factors */ -static int examine_parse_independent_vars(struct cmd_examine *cmd); + double mean; + double variance; + double skewness; -/* Output functions */ -static void show_summary(struct variable **dependent_var, int n_dep_var, - const struct factor *f); + double kurtosis; + + double se_mean; -static void show_extremes(struct variable **dependent_var, - int n_dep_var, - const struct factor *factor, - int n_extremities); + struct extrema *minima; + struct extrema *maxima; +}; -static void show_descriptives(struct variable **dependent_var, - int n_dep_var, - struct factor *factor); +struct factor_result +{ + struct ll ll; -static void show_percentiles(struct variable **dependent_var, - int n_dep_var, - struct factor *factor); + union value value[2]; + /* An array of factor metrics, one for each variable */ + struct factor_metrics *metrics; +}; +struct xfactor +{ + /* We need to make a list of this structure */ + struct ll ll; + /* The independent variable */ + const struct variable const* indep_var[2]; -void np_plot(const struct metrics *m, const char *factorname); + /* A list of results for this factor */ + struct ll_list result_list ; +}; -void box_plot_group(const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id - ) ; +static void +factor_destroy (struct xfactor *fctr) +{ + struct ll *ll = ll_head (&fctr->result_list); + while (ll != ll_null (&fctr->result_list)) + { + int v; + struct factor_result *result = + ll_data (ll, struct factor_result, ll); + int i; + for (v = 0; v < n_dependent_vars; ++v) + { + int i; + moments1_destroy (result->metrics[v].moments); + extrema_destroy (result->metrics[v].minima); + extrema_destroy (result->metrics[v].maxima); + statistic_destroy (result->metrics[v].trimmed_mean); + statistic_destroy (result->metrics[v].tukey_hinges); + statistic_destroy (result->metrics[v].box_whisker); + statistic_destroy (result->metrics[v].histogram); + for (i = 0 ; i < result->metrics[v].n_ptiles; ++i) + statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]); + free (result->metrics[v].ptl); + free (result->metrics[v].quartiles); + casereader_destroy (result->metrics[v].up_reader); + } -void box_plot_variables(const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id - ); + for (i = 0; i < 2; i++) + if (fctr->indep_var[i]) + value_destroy (&result->value[i], + var_get_width (fctr->indep_var[i])); + free (result->metrics); + ll = ll_next (ll); + free (result); + } +} +static struct xfactor level0_factor; +static struct ll_list factor_list; +/* Parse the clause specifying the factors */ +static int examine_parse_independent_vars (struct lexer *lexer, + const struct dictionary *dict, + struct cmd_examine *cmd); -/* Per Split function */ -static bool run_examine(const struct casefile *cf, void *cmd_); +/* Output functions */ +static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, + const struct xfactor *f); -static void output_examine(void); +static void show_descriptives (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); -void factor_calc(struct ccase *c, int case_no, - double weight, int case_missing); +static void show_percentiles (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); -/* Represent a factor as a string, so it can be - printed in a human readable fashion */ -const char * factor_to_string(const struct factor *fctr, - struct factor_statistics *fs, - const struct variable *var); +static void show_extremes (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); -/* Represent a factor as a string, so it can be - printed in a human readable fashion, - but sacrificing some readablility for the sake of brevity */ -const char *factor_to_string_concise(const struct factor *fctr, - struct factor_statistics *fs); +/* Per Split function */ +static void run_examine (struct cmd_examine *, struct casereader *, + struct dataset *); -/* Function to use for testing for missing values */ -static is_missing_func *value_is_missing; +static void output_examine (const struct dictionary *dict); -/* PERCENTILES */ +void factor_calc (const struct ccase *c, int case_no, + double weight, bool case_missing); -static subc_list_double percentile_list; -static enum pc_alg percentile_algorithm; +/* Represent a factor as a string, so it can be + printed in a human readable fashion */ +static void factor_to_string (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str); + +/* Represent a factor as a string, so it can be + printed in a human readable fashion, + but sacrificing some readablility for the sake of brevity */ +static void +factor_to_string_concise (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ); -static short sbc_percentile; +/* Categories of missing values to exclude. */ +static enum mv_class exclude_values; + int -cmd_examine(void) +cmd_examine (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; bool ok; - subc_list_double_create(&percentile_list); + subc_list_double_create (&percentile_list); percentile_algorithm = PC_HAVERAGE; - if ( !parse_examine(&cmd) ) - return CMD_FAILURE; + ll_init (&factor_list); + + if ( !parse_examine (lexer, ds, &cmd, NULL) ) + { + subc_list_double_destroy (&percentile_list); + return CMD_FAILURE; + } /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == XMN_INCLUDE ) - value_is_missing = mv_is_value_system_missing; - else - value_is_missing = mv_is_value_missing; + exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY; - if ( cmd.st_n == SYSMIS ) + if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; - if ( ! cmd.sbc_cinterval) + if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; - /* If descriptives have been requested, make sure the + /* If descriptives have been requested, make sure the quartiles are calculated */ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) { - subc_list_double_push(&percentile_list, 25); - subc_list_double_push(&percentile_list, 50); - subc_list_double_push(&percentile_list, 75); + subc_list_double_push (&percentile_list, 25); + subc_list_double_push (&percentile_list, 50); + subc_list_double_push (&percentile_list, 75); } - ok = multipass_procedure_with_splits (run_examine, &cmd); + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); - if ( totals ) + while (casegrouper_get_next_group (grouper, &group)) { - free( totals ); + struct casereader *reader = + casereader_create_arithmetic_sequence (group, 1, 1); + + run_examine (&cmd, reader, ds); } - - if ( dependent_vars ) + + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + + if ( dependent_vars ) free (dependent_vars); + subc_list_double_destroy (&percentile_list); + + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; +}; + + +/* Plot the normal and detrended normal plots for RESULT. + Label the plots with LABEL */ +static void +np_plot (struct np *np, const char *label) +{ + double yfirst = 0, ylast = 0; + + double x_lower; + double x_upper; + double slack; + + /* Normal Plot */ + struct chart *np_chart; + + /* Detrended Normal Plot */ + struct chart *dnp_chart; + + /* The slope and intercept of the ideal normal probability line */ + const double slope = 1.0 / np->stddev; + const double intercept = -np->mean / np->stddev; + + if ( np->n < 1.0 ) + { + msg (MW, _("Not creating plot because data set is empty.")); + return ; + } + + np_chart = chart_create (); + dnp_chart = chart_create (); + + if ( !np_chart || ! dnp_chart ) + return ; + + chart_write_title (np_chart, _("Normal Q-Q Plot of %s"), label); + chart_write_xlabel (np_chart, _("Observed Value")); + chart_write_ylabel (np_chart, _("Expected Normal")); + + chart_write_title (dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + label); + chart_write_xlabel (dnp_chart, _("Observed Value")); + chart_write_ylabel (dnp_chart, _("Dev from Normal")); + + yfirst = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1)); + ylast = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1)); + + /* Need to make sure that both the scatter plot and the ideal fit into the + plot */ + x_lower = MIN (np->y_min, (yfirst - intercept) / slope) ; + x_upper = MAX (np->y_max, (ylast - intercept) / slope) ; + slack = (x_upper - x_lower) * 0.05 ; + + chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5); + chart_write_xscale (dnp_chart, np->y_min, np->y_max, 5); + + chart_write_yscale (np_chart, yfirst, ylast, 5); + chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5); + { - struct factor *f = factors ; - while ( f ) + struct casereader *reader = casewriter_make_reader (np->writer); + struct ccase *c; + while ((c = casereader_read (reader)) != NULL) { - struct factor *ff = f; + chart_datum (np_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_NS)->f); + chart_datum (dnp_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_DNS)->f); - f = f->next; - free ( ff->fs ); - hsh_destroy ( ff->fstats ) ; - free ( ff ) ; + case_unref (c); } - factors = 0; + casereader_destroy (reader); } - subc_list_double_destroy(&percentile_list); - - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; -}; + chart_line (dnp_chart, 0, 0, np->y_min, np->y_max , CHART_DIM_X); + chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_submit (np_chart); + chart_submit (dnp_chart); +} -/* Show all the appropriate tables */ static void -output_examine(void) +show_npplot (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - struct factor *fctr; + int v; - /* Show totals if appropriate */ - if ( ! cmd.sbc_nototal || factors == 0 ) + for (v = 0; v < n_dep_var; ++v) { - show_summary(dependent_vars, n_dependent_vars, 0); + struct ll *ll; + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) + { + struct string str; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + ds_init_empty (&str); + ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + + factor_to_string (fctr, result, &str); - if ( cmd.sbc_statistics ) + np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str)); + + statistic_destroy ((struct statistic *)result->metrics[v].np); + + ds_destroy (&str); + } + } +} + + +static void +show_histogram (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; + + for (v = 0; v < n_dep_var; ++v) + { + struct ll *ll; + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n); + struct string str; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives(dependent_vars, n_dependent_vars, 0); + ds_init_empty (&str); + ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + factor_to_string (fctr, result, &str); + + histogram_plot ((struct histogram *) result->metrics[v].histogram, + ds_cstr (&str), + (struct moments1 *) result->metrics[v].moments); + + ds_destroy (&str); } - if ( sbc_percentile ) - show_percentiles(dependent_vars, n_dependent_vars, 0); + } +} - if ( cmd.sbc_plot) + + +static void +show_boxplot_groups (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; + + for (v = 0; v < n_dep_var; ++v) + { + struct ll *ll; + int f = 0; + struct chart *ch = chart_create (); + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - int v; - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - { - for ( v = 0 ; v < n_dependent_vars; ++v ) - np_plot(&totals[v], var_to_string(dependent_vars[v])); - } + const struct extremum *max, *min; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) - { - if ( cmd.cmp == XMN_GROUPS ) - { - box_plot_group(0, dependent_vars, n_dependent_vars, - cmd.v_id); - } - else - box_plot_variables(0, dependent_vars, n_dependent_vars, - cmd.v_id); - } + const struct ll_list *max_list = + extrema_list (result->metrics[v].maxima); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + const struct ll_list *min_list = + extrema_list (result->metrics[v].minima); + + if ( ll_is_empty (max_list)) { - for ( v = 0 ; v < n_dependent_vars; ++v ) - { - struct normal_curve normal; - - normal.N = totals[v].n; - normal.mean = totals[v].mean; - normal.stddev = totals[v].stddev; - - histogram_plot(totals[v].histogram, - var_to_string(dependent_vars[v]), - &normal, 0); - } + msg (MW, _("Not creating plot because data set is empty.")); + continue; } - } + max = (const struct extremum *) + ll_data (ll_head(max_list), struct extremum, ll); - } + min = (const struct extremum *) + ll_data (ll_head (min_list), struct extremum, ll); + + y_max = MAX (y_max, max->value); + y_min = MIN (y_min, min->value); + } + boxplot_draw_yscale (ch, y_max, y_min); - /* Show grouped statistics as appropriate */ - fctr = factors; - while ( fctr ) - { - show_summary(dependent_vars, n_dependent_vars, fctr); + if ( fctr->indep_var[0]) + chart_write_title (ch, _("Boxplot of %s vs. %s"), + var_to_string (dependent_var[v]), + var_to_string (fctr->indep_var[0]) ); + else + chart_write_title (ch, _("Boxplot of %s"), + var_to_string (dependent_var[v])); - if ( cmd.sbc_statistics ) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n); + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + struct string str; + const double box_width = (ch->data_right - ch->data_left) + / (ll_count (&fctr->result_list) * 2.0 ) ; + + const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left; + + ds_init_empty (&str); + factor_to_string_concise (fctr, result, &str); - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives(dependent_vars, n_dependent_vars, fctr); + boxplot_draw_boxplot (ch, + box_centre, box_width, + (const struct box_whisker *) + result->metrics[v].box_whisker, + ds_cstr (&str)); + + ds_destroy (&str); } - if ( sbc_percentile ) - show_percentiles(dependent_vars, n_dependent_vars, fctr); + chart_submit (ch); + } +} + - if ( cmd.sbc_plot) +static void +show_boxplot_variables (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr + ) + +{ + int v; + struct ll *ll; + const struct ll_list *result_list = &fctr->result_list; + + for (ll = ll_head (result_list); + ll != ll_null (result_list); + ll = ll_next (ll)) + + { + struct string title; + struct chart *ch = chart_create (); + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + const double box_width = (ch->data_right - ch->data_left) + / (n_dep_var * 2.0 ) ; + + for (v = 0; v < n_dep_var; ++v) { - size_t v; + const struct ll *max_ll = + ll_head (extrema_list (result->metrics[v].maxima)); + const struct ll *min_ll = + ll_head (extrema_list (result->metrics[v].minima)); - struct factor_statistics **fs = fctr->fs ; + const struct extremum *max = + (const struct extremum *) ll_data (max_ll, struct extremum, ll); - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) - { - if ( cmd.cmp == XMN_VARIABLES ) - box_plot_variables(fctr, dependent_vars, n_dependent_vars, - cmd.v_id); - else - box_plot_group(fctr, dependent_vars, n_dependent_vars, - cmd.v_id); - } + const struct extremum *min = + (const struct extremum *) ll_data (min_ll, struct extremum, ll); - for ( v = 0 ; v < n_dependent_vars; ++v ) - { + y_max = MAX (y_max, max->value); + y_min = MIN (y_min, min->value); + } - for ( fs = fctr->fs ; *fs ; ++fs ) - { - const char *s = factor_to_string(fctr, *fs, dependent_vars[v]); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot(&(*fs)->m[v], s); + boxplot_draw_yscale (ch, y_max, y_min); + + ds_init_empty (&title); + factor_to_string (fctr, result, &title); + +#if 0 + ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0])); + var_append_value_name (fctr->indep_var[0], &result->value[0], &title); +#endif + + chart_write_title (ch, ds_cstr (&title)); + ds_destroy (&title); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) - { - struct normal_curve normal; + for (v = 0; v < n_dep_var; ++v) + { + struct string str; + const double box_centre = (v * 2 + 1) * box_width + ch->data_left; - normal.N = (*fs)->m[v].n; - normal.mean = (*fs)->m[v].mean; - normal.stddev = (*fs)->m[v].stddev; - - histogram_plot((*fs)->m[v].histogram, - s, &normal, 0); - } - - } /* for ( fs .... */ + ds_init_empty (&str); + ds_init_cstr (&str, var_get_name (dependent_var[v])); - } /* for ( v = 0 ..... */ + boxplot_draw_boxplot (ch, + box_centre, box_width, + (const struct box_whisker *) result->metrics[v].box_whisker, + ds_cstr (&str)); + ds_destroy (&str); } - fctr = fctr->next; + chart_submit (ch); } - } -/* Create a hash table of percentiles and their values from the list of - percentiles */ -static struct hsh_table * -list_to_ptile_hash(const subc_list_double *l) +/* Show all the appropriate tables */ +static void +output_examine (const struct dictionary *dict) { - int i; - - struct hsh_table *h ; + struct ll *ll; + + show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor); - h = hsh_create(subc_list_double_count(l), - (hsh_compare_func *) ptile_compare, - (hsh_hash_func *) ptile_hash, - (hsh_free_func *) free, - 0); + if ( cmd.a_statistics[XMN_ST_EXTREME] ) + show_extremes (dependent_vars, n_dependent_vars, &level0_factor); + + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + show_descriptives (dependent_vars, n_dependent_vars, &level0_factor); + if ( cmd.sbc_percentiles) + show_percentiles (dependent_vars, n_dependent_vars, &level0_factor); - for ( i = 0 ; i < subc_list_double_count(l) ; ++i ) + if ( cmd.sbc_plot) { - struct percentile *p = xmalloc (sizeof *p); - - p->p = subc_list_double_at(l,i); - p->v = SYSMIS; + if (cmd.a_plot[XMN_PLT_BOXPLOT]) + show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor); - hsh_insert(h, p); + if (cmd.a_plot[XMN_PLT_HISTOGRAM]) + show_histogram (dependent_vars, n_dependent_vars, &level0_factor); + if (cmd.a_plot[XMN_PLT_NPPLOT]) + show_npplot (dependent_vars, n_dependent_vars, &level0_factor); } - return h; + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); ll = ll_next (ll)) + { + struct xfactor *factor = ll_data (ll, struct xfactor, ll); + show_summary (dependent_vars, n_dependent_vars, dict, factor); + + if ( cmd.a_statistics[XMN_ST_EXTREME] ) + show_extremes (dependent_vars, n_dependent_vars, factor); + + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + show_descriptives (dependent_vars, n_dependent_vars, factor); + + if ( cmd.sbc_percentiles) + show_percentiles (dependent_vars, n_dependent_vars, factor); + if (cmd.a_plot[XMN_PLT_BOXPLOT] && + cmd.cmp == XMN_GROUPS) + show_boxplot_groups (dependent_vars, n_dependent_vars, factor); + + + if (cmd.a_plot[XMN_PLT_BOXPLOT] && + cmd.cmp == XMN_VARIABLES) + show_boxplot_variables (dependent_vars, n_dependent_vars, + factor); + + if (cmd.a_plot[XMN_PLT_HISTOGRAM]) + show_histogram (dependent_vars, n_dependent_vars, factor); + + if (cmd.a_plot[XMN_PLT_NPPLOT]) + show_npplot (dependent_vars, n_dependent_vars, factor); + } } /* Parse the PERCENTILES subcommand */ static int -xmn_custom_percentiles(struct cmd_examine *p UNUSED) +xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, + struct cmd_examine *p UNUSED, void *aux UNUSED) { - sbc_percentile = 1; - - lex_match('='); + lex_match (lexer, '='); - lex_match('('); + lex_match (lexer, '('); - while ( lex_is_number() ) + while ( lex_is_number (lexer) ) { - subc_list_double_push(&percentile_list,lex_number()); + subc_list_double_push (&percentile_list, lex_number (lexer)); - lex_get(); + lex_get (lexer); - lex_match(',') ; + lex_match (lexer, ',') ; } - lex_match(')'); + lex_match (lexer, ')'); - lex_match('='); + lex_match (lexer, '='); - if ( lex_match_id("HAVERAGE")) - percentile_algorithm = PC_HAVERAGE; + if ( lex_match_id (lexer, "HAVERAGE")) + percentile_algorithm = PC_HAVERAGE; - else if ( lex_match_id("WAVERAGE")) - percentile_algorithm = PC_WAVERAGE; + else if ( lex_match_id (lexer, "WAVERAGE")) + percentile_algorithm = PC_WAVERAGE; - else if ( lex_match_id("ROUND")) + else if ( lex_match_id (lexer, "ROUND")) percentile_algorithm = PC_ROUND; - else if ( lex_match_id("EMPIRICAL")) + else if ( lex_match_id (lexer, "EMPIRICAL")) percentile_algorithm = PC_EMPIRICAL; - else if ( lex_match_id("AEMPIRICAL")) - percentile_algorithm = PC_AEMPIRICAL; + else if ( lex_match_id (lexer, "AEMPIRICAL")) + percentile_algorithm = PC_AEMPIRICAL; - else if ( lex_match_id("NONE")) - percentile_algorithm = PC_NONE; + else if ( lex_match_id (lexer, "NONE")) + percentile_algorithm = PC_NONE; - if ( 0 == subc_list_double_count(&percentile_list)) + if ( 0 == subc_list_double_count (&percentile_list)) { - subc_list_double_push(&percentile_list, 5); - subc_list_double_push(&percentile_list, 10); - subc_list_double_push(&percentile_list, 25); - subc_list_double_push(&percentile_list, 50); - subc_list_double_push(&percentile_list, 75); - subc_list_double_push(&percentile_list, 90); - subc_list_double_push(&percentile_list, 95); + subc_list_double_push (&percentile_list, 5); + subc_list_double_push (&percentile_list, 10); + subc_list_double_push (&percentile_list, 25); + subc_list_double_push (&percentile_list, 50); + subc_list_double_push (&percentile_list, 75); + subc_list_double_push (&percentile_list, 90); + subc_list_double_push (&percentile_list, 95); } return 1; @@ -470,9 +748,10 @@ xmn_custom_percentiles(struct cmd_examine *p UNUSED) /* TOTAL and NOTOTAL are simple, mutually exclusive flags */ static int -xmn_custom_total(struct cmd_examine *p) +xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, + struct cmd_examine *p, void *aux UNUSED) { - if ( p->sbc_nototal ) + if ( p->sbc_nototal ) { msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); return 0; @@ -482,11 +761,12 @@ xmn_custom_total(struct cmd_examine *p) } static int -xmn_custom_nototal(struct cmd_examine *p) +xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, + struct cmd_examine *p, void *aux UNUSED) { - if ( p->sbc_total ) + if ( p->sbc_total ) { - msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL"); return 0; } @@ -495,38 +775,40 @@ xmn_custom_nototal(struct cmd_examine *p) -/* Parser for the variables sub command +/* Parser for the variables sub command Returns 1 on success */ static int -xmn_custom_variables(struct cmd_examine *cmd ) +xmn_custom_variables (struct lexer *lexer, struct dataset *ds, + struct cmd_examine *cmd, + void *aux UNUSED) { - lex_match('='); + const struct dictionary *dict = dataset_dict (ds); + lex_match (lexer, '='); - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) + if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { return 2; } - - if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars, - PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) + + if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars, + PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) { free (dependent_vars); return 0; } - assert(n_dependent_vars); + assert (n_dependent_vars); - totals = xnmalloc (n_dependent_vars, sizeof *totals); - if ( lex_match(T_BY)) + if ( lex_match (lexer, T_BY)) { - int success ; - success = examine_parse_independent_vars(cmd); - if ( success != 1 ) { - free (dependent_vars); - free (totals) ; - } + int success ; + success = examine_parse_independent_vars (lexer, dict, cmd); + if ( success != 1 ) + { + free (dependent_vars); + } return success; } @@ -537,369 +819,443 @@ xmn_custom_variables(struct cmd_examine *cmd ) /* Parse the clause specifying the factors */ static int -examine_parse_independent_vars(struct cmd_examine *cmd) +examine_parse_independent_vars (struct lexer *lexer, + const struct dictionary *dict, + struct cmd_examine *cmd) { int success; - struct factor *sf = xmalloc (sizeof *sf); + struct xfactor *sf = xmalloc (sizeof *sf); + + ll_init (&sf->result_list); - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) + if ( (lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { free ( sf ) ; return 2; } + sf->indep_var[0] = parse_variable (lexer, dict); + sf->indep_var[1] = NULL; - sf->indep_var[0] = parse_variable(); - sf->indep_var[1] = 0; - - if ( token == T_BY ) + if ( lex_token (lexer) == T_BY ) { + lex_match (lexer, T_BY); - lex_match(T_BY); - - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) + if ( (lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { - free ( sf ) ; + free (sf); return 2; } - sf->indep_var[1] = parse_variable(); + sf->indep_var[1] = parse_variable (lexer, dict); + ll_push_tail (&factor_list, &sf->ll); } + else + ll_push_tail (&factor_list, &sf->ll); + lex_match (lexer, ','); - sf->fstats = hsh_create(4, - (hsh_compare_func *) factor_statistics_compare, - (hsh_hash_func *) factor_statistics_hash, - (hsh_free_func *) factor_statistics_free, - 0); - - sf->next = factors; - factors = sf; - - lex_match(','); - - if ( token == '.' || token == '/' ) + if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' ) return 1; - success = examine_parse_independent_vars(cmd); - - if ( success != 1 ) - free ( sf ) ; + success = examine_parse_independent_vars (lexer, dict, cmd); + + if ( success != 1 ) + free ( sf ) ; return success; } +static void +examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, + const struct dictionary *dict, struct xfactor *factor) +{ + struct ccase *c; + const struct variable *wv = dict_get_weight (dict); + int v; + int n_extrema = 1; + struct factor_result *result = xzalloc (sizeof (*result)); + int i; + for (i = 0; i < 2; i++) + if (factor->indep_var[i]) + value_init (&result->value[i], var_get_width (factor->indep_var[i])); + result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics)); -void populate_percentiles(struct tab_table *tbl, int col, int row, - const struct metrics *m); - -void populate_descriptives(struct tab_table *t, int col, int row, - const struct metrics *fs); - -void populate_extremes(struct tab_table *t, int col, int row, int n, - const struct metrics *m); - -void populate_summary(struct tab_table *t, int col, int row, - const struct metrics *m); - - - - -static int bad_weight_warn = 1; - + if ( cmd->a_statistics[XMN_ST_EXTREME] ) + n_extrema = cmd->st_n; -/* Perform calculations for the sub factors */ -void -factor_calc(struct ccase *c, int case_no, double weight, int case_missing) -{ - size_t v; - struct factor *fctr = factors; - while ( fctr) + c = casereader_peek (reader, 0); + if (c != NULL) { - struct factor_statistics **foo ; - union value indep_vals[2] ; - - indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv); + if ( level > 0) + for (i = 0; i < 2; i++) + if (factor->indep_var[i]) + value_copy (&result->value[i], case_data (c, factor->indep_var[i]), + var_get_width (factor->indep_var[i])); + case_unref (c); + } - if ( fctr->indep_var[1] ) - indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv); + for (v = 0; v < n_dependent_vars; ++v) + { + struct casewriter *writer; + struct casereader *input = casereader_clone (reader); + + result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS); + result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA); + result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA); + result->metrics[v].cmin = DBL_MAX; + + if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] || + cmd->a_plot[XMN_PLT_BOXPLOT] || + cmd->a_plot[XMN_PLT_NPPLOT] || + cmd->sbc_percentiles) + { + /* In this case, we need to sort the data, so we create a sorting + casewriter */ + struct subcase up_ordering; + subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND); + writer = sort_create_writer (&up_ordering, + casereader_get_proto (reader)); + subcase_destroy (&up_ordering); + } else - indep_vals[1].f = SYSMIS; - - assert(fctr->fstats); + { + /* but in this case, sorting is unnecessary, so an ordinary + casewriter is sufficient */ + writer = + autopaging_writer_create (casereader_get_proto (reader)); + } - foo = ( struct factor_statistics ** ) - hsh_probe(fctr->fstats, (void *) &indep_vals); - if ( !*foo ) + /* Sort or just iterate, whilst calculating moments etc */ + while ((c = casereader_read (input)) != NULL) { + int n_vals = caseproto_get_n_widths (casereader_get_proto (reader)); + const casenumber loc = case_data_idx (c, n_vals - 1)->f; - *foo = create_factor_statistics(n_dependent_vars, - &indep_vals[0], - &indep_vals[1]); - - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - metrics_precalc( &(*foo)->m[v] ); - } + const double weight = wv ? case_data (c, wv)->f : 1.0; + const union value *value = case_data (c, dependent_vars[v]); - } + if (weight != SYSMIS) + minimize (&result->metrics[v].cmin, weight); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - const union value *val = case_data (c, var->fv); - - if ( value_is_missing (&var->miss, val) || case_missing ) - val = 0; - - metrics_calc( &(*foo)->m[v], val, weight, case_no); - - } + moments1_add (result->metrics[v].moments, + value->f, + weight); - fctr = fctr->next; - } + result->metrics[v].n += weight; + if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) ) + result->metrics[v].n_valid += weight; -} + extrema_add (result->metrics[v].maxima, + value->f, + weight, + loc); -static bool -run_examine(const struct casefile *cf, void *cmd_ ) -{ - struct casereader *r; - struct ccase c; - int v; + extrema_add (result->metrics[v].minima, + value->f, + weight, + loc); - const struct cmd_examine *cmd = (struct cmd_examine *) cmd_; + casewriter_write (writer, c); + } + casereader_destroy (input); + result->metrics[v].up_reader = casewriter_make_reader (writer); + } - /* Make sure we haven't got rubbish left over from a - previous split */ - struct factor *fctr = factors; - while (fctr) + /* If percentiles or descriptives have been requested, then a + second pass through the data (which has now been sorted) + is necessary */ + if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] || + cmd->a_plot[XMN_PLT_BOXPLOT] || + cmd->a_plot[XMN_PLT_NPPLOT] || + cmd->sbc_percentiles) { - struct factor *next = fctr->next; + for (v = 0; v < n_dependent_vars; ++v) + { + int i; + int n_os; + struct order_stats **os ; + struct factor_metrics *metric = &result->metrics[v]; - hsh_clear(fctr->fstats); + metric->n_ptiles = percentile_list.n_data; - fctr->fs = 0; + metric->ptl = xcalloc (metric->n_ptiles, + sizeof (struct percentile *)); - fctr = next; - } + metric->quartiles = xcalloc (3, sizeof (*metric->quartiles)); + for (i = 0 ; i < metric->n_ptiles; ++i) + { + metric->ptl[i] = (struct percentile *) + percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); + + if ( percentile_list.data[i] == 25) + metric->quartiles[0] = metric->ptl[i]; + else if ( percentile_list.data[i] == 50) + metric->quartiles[1] = metric->ptl[i]; + else if ( percentile_list.data[i] == 75) + metric->quartiles[2] = metric->ptl[i]; + } + metric->tukey_hinges = tukey_hinges_create (metric->n_valid, metric->cmin); + metric->trimmed_mean = trimmed_mean_create (metric->n_valid, 0.05); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - metrics_precalc(&totals[v]); + n_os = metric->n_ptiles + 2; - for(r = casefile_get_reader (cf); - casereader_read (r, &c) ; - case_destroy (&c) ) - { - int case_missing=0; - const int case_no = casereader_cnum(r); + if ( cmd->a_plot[XMN_PLT_NPPLOT] ) + { + metric->np = np_create (metric->moments); + n_os ++; + } - const double weight = - dict_get_case_weight(default_dict, &c, &bad_weight_warn); + os = xcalloc (sizeof (struct order_stats *), n_os); - if ( cmd->miss == XMN_LISTWISE ) - { - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for (i = 0 ; i < metric->n_ptiles ; ++i ) { - const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); - - if ( value_is_missing(&var->miss, val)) - case_missing = 1; - + os[i] = (struct order_stats *) metric->ptl[i]; } - } - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); + os[i] = (struct order_stats *) metric->tukey_hinges; + os[i+1] = (struct order_stats *) metric->trimmed_mean; - if ( value_is_missing(&var->miss, val) || case_missing ) - val = 0; + if (cmd->a_plot[XMN_PLT_NPPLOT]) + os[i+2] = metric->np; - metrics_calc(&totals[v], val, weight, case_no); - + order_stats_accumulate (os, n_os, + casereader_clone (metric->up_reader), + wv, dependent_vars[v], MV_ANY); + free (os); } - - factor_calc(&c, case_no, weight, case_missing); - } - - for ( v = 0 ; v < n_dependent_vars ; ++v) + /* FIXME: Do this in the above loop */ + if ( cmd->a_plot[XMN_PLT_HISTOGRAM] ) { - fctr = factors; - while ( fctr ) + struct ccase *c; + struct casereader *input = casereader_clone (reader); + + for (v = 0; v < n_dependent_vars; ++v) { - struct hsh_iterator hi; - struct factor_statistics *fs; + const struct extremum *max, *min; + struct factor_metrics *metric = &result->metrics[v]; + + const struct ll_list *max_list = + extrema_list (result->metrics[v].maxima); + + const struct ll_list *min_list = + extrema_list (result->metrics[v].minima); - for ( fs = hsh_first(fctr->fstats, &hi); - fs != 0 ; - fs = hsh_next(fctr->fstats, &hi)) + if ( ll_is_empty (max_list)) { - - fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list); - fs->m[v].ptile_alg = percentile_algorithm; - metrics_postcalc(&fs->m[v]); + msg (MW, _("Not creating plot because data set is empty.")); + continue; } - fctr = fctr->next; - } - - totals[v].ptile_hash = list_to_ptile_hash(&percentile_list); - totals[v].ptile_alg = percentile_algorithm; - metrics_postcalc(&totals[v]); - } + assert (! ll_is_empty (min_list)); + max = (const struct extremum *) + ll_data (ll_head(max_list), struct extremum, ll); - /* Make sure that the combination of factors are complete */ + min = (const struct extremum *) + ll_data (ll_head (min_list), struct extremum, ll); - fctr = factors; - while ( fctr ) - { - struct hsh_iterator hi; - struct hsh_iterator hi0; - struct hsh_iterator hi1; - struct factor_statistics *fs; - - struct hsh_table *idh0=0; - struct hsh_table *idh1=0; - union value *val0; - union value *val1; - - idh0 = hsh_create(4, (hsh_compare_func *) compare_values, - (hsh_hash_func *) hash_value, - 0,0); - - idh1 = hsh_create(4, (hsh_compare_func *) compare_values, - (hsh_hash_func *) hash_value, - 0,0); - - - for ( fs = hsh_first(fctr->fstats, &hi); - fs != 0 ; - fs = hsh_next(fctr->fstats, &hi)) - { - hsh_insert(idh0,(void *) &fs->id[0]); - hsh_insert(idh1,(void *) &fs->id[1]); + metric->histogram = histogram_create (10, min->value, max->value); } - /* Ensure that the factors combination is complete */ - for ( val0 = hsh_first(idh0, &hi0); - val0 != 0 ; - val0 = hsh_next(idh0, &hi0)) + while ((c = casereader_read (input)) != NULL) { - for ( val1 = hsh_first(idh1, &hi1); - val1 != 0 ; - val1 = hsh_next(idh1, &hi1)) + const double weight = wv ? case_data (c, wv)->f : 1.0; + + for (v = 0; v < n_dependent_vars; ++v) { - struct factor_statistics **ffs; - union value key[2]; - key[0] = *val0; - key[1] = *val1; - - ffs = (struct factor_statistics **) - hsh_probe(fctr->fstats, (void *) &key ); - - if ( !*ffs ) { - size_t i; - (*ffs) = create_factor_statistics (n_dependent_vars, - &key[0], &key[1]); - for ( i = 0 ; i < n_dependent_vars ; ++i ) - metrics_precalc( &(*ffs)->m[i]); - } + struct factor_metrics *metric = &result->metrics[v]; + if ( metric->histogram) + histogram_add ((struct histogram *) metric->histogram, + case_data (c, dependent_vars[v])->f, weight); } + case_unref (c); } - - hsh_destroy(idh0); - hsh_destroy(idh1); - - fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats); - - fctr = fctr->next; + casereader_destroy (input); } - output_examine(); - - - if ( totals ) + /* In this case, a third iteration is required */ + if (cmd->a_plot[XMN_PLT_BOXPLOT]) { - size_t i; - for ( i = 0 ; i < n_dependent_vars ; ++i ) + for (v = 0; v < n_dependent_vars; ++v) { - metrics_destroy(&totals[i]); + struct factor_metrics *metric = &result->metrics[v]; + int n_vals = caseproto_get_n_widths (casereader_get_proto ( + metric->up_reader)); + + metric->box_whisker = + box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges, + cmd->v_id, n_vals - 1); + + order_stats_accumulate ((struct order_stats **) &metric->box_whisker, + 1, + casereader_clone (metric->up_reader), + wv, dependent_vars[v], MV_ANY); } } - return true; + ll_push_tail (&factor->result_list, &result->ll); + casereader_destroy (reader); } static void -show_summary(struct variable **dependent_var, int n_dep_var, - const struct factor *fctr) +run_examine (struct cmd_examine *cmd, struct casereader *input, + struct dataset *ds) { - static const char *subtitle[]= + struct ll *ll; + const struct dictionary *dict = dataset_dict (ds); + struct ccase *c; + struct casereader *level0 = casereader_clone (input); + + c = casereader_peek (input, 0); + if (c == NULL) + { + casereader_destroy (input); + return; + } + + output_split_file_values (ds, c); + case_unref (c); + + ll_init (&level0_factor.result_list); + + examine_group (cmd, level0, 0, dict, &level0_factor); + + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); + ll = ll_next (ll)) + { + struct xfactor *factor = ll_data (ll, struct xfactor, ll); + + struct casereader *group = NULL; + struct casereader *level1; + struct casegrouper *grouper1 = NULL; + + level1 = casereader_clone (input); + level1 = sort_execute_1var (level1, factor->indep_var[0]); + grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1); + + while (casegrouper_get_next_group (grouper1, &group)) + { + struct casereader *group_copy = casereader_clone (group); + + if ( !factor->indep_var[1]) + examine_group (cmd, group_copy, 1, dict, factor); + else + { + int n_groups = 0; + struct casereader *group2 = NULL; + struct casegrouper *grouper2 = NULL; + + group_copy = sort_execute_1var (group_copy, + factor->indep_var[1]); + + grouper2 = casegrouper_create_vars (group_copy, + &factor->indep_var[1], 1); + + while (casegrouper_get_next_group (grouper2, &group2)) + { + examine_group (cmd, group2, 2, dict, factor); + n_groups++; + } + casegrouper_destroy (grouper2); + } + + casereader_destroy (group); + } + casegrouper_destroy (grouper1); + } + + casereader_destroy (input); + + output_examine (dict); + + factor_destroy (&level0_factor); + + { + struct ll *ll; + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); + ll = ll_next (ll)) + { + struct xfactor *f = ll_data (ll, struct xfactor, ll); + factor_destroy (f); + } + } + +} + + +static void +show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, + const struct xfactor *fctr) +{ + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + + static const char *subtitle[]= { N_("Valid"), N_("Missing"), N_("Total") }; - int i; - int heading_columns ; + int v, j; + int heading_columns = 1; int n_cols; const int heading_rows = 3; struct tab_table *tbl; int n_rows ; - int n_factors = 1; + n_rows = n_dep_var; - if ( fctr ) + assert (fctr); + + if ( fctr->indep_var[0] ) { heading_columns = 2; - n_factors = hsh_count(fctr->fstats); - n_rows = n_dep_var * n_factors ; if ( fctr->indep_var[1] ) - heading_columns = 3; - } - else - { - heading_columns = 1; - n_rows = n_dep_var; + { + heading_columns = 3; + } } + n_rows *= ll_count (&fctr->result_list); n_rows += heading_rows; n_cols = heading_columns + 6; - tbl = tab_create (n_cols,n_rows,0); + tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions); - + tab_dim (tbl, tab_natural_dimensions, NULL); + /* Outline the box */ - tab_box (tbl, + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); /* Vertical lines for the data only */ - tab_box (tbl, + tab_box (tbl, -1, -1, -1, TAL_1, heading_columns, 0, @@ -913,1296 +1269,934 @@ show_summary(struct variable **dependent_var, int n_dep_var, tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - tab_title (tbl, 0, _("Case Processing Summary")); - + tab_title (tbl, _("Case Processing Summary")); - tab_joint_text(tbl, heading_columns, 0, - n_cols -1, 0, - TAB_CENTER | TAT_TITLE, - _("Cases")); + tab_joint_text (tbl, heading_columns, 0, + n_cols -1, 0, + TAB_CENTER | TAT_TITLE, + _("Cases")); /* Remove lines ... */ - tab_box (tbl, + tab_box (tbl, -1, -1, TAL_0, TAL_0, heading_columns, 0, n_cols - 1, 0); - for ( i = 0 ; i < 3 ; ++i ) + for (j = 0 ; j < 3 ; ++j) { - tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, + tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE, _("N")); - tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE, + tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE, _("Percent")); - tab_joint_text(tbl, heading_columns + i*2 , 1, - heading_columns + i*2 + 1, 1, - TAB_CENTER | TAT_TITLE, - subtitle[i]); + tab_joint_text (tbl, heading_columns + j * 2 , 1, + heading_columns + j * 2 + 1, 1, + TAB_CENTER | TAT_TITLE, + subtitle[j]); tab_box (tbl, -1, -1, TAL_0, TAL_0, - heading_columns + i*2, 1, - heading_columns + i*2 + 1, 1); - + heading_columns + j * 2, 1, + heading_columns + j * 2 + 1, 1); } /* Titles for the independent variables */ - if ( fctr ) + if ( fctr->indep_var[0] ) { - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[0])); + tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string (fctr->indep_var[0])); - if ( fctr->indep_var[1] ) + if ( fctr->indep_var[1] ) { - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[1])); + tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string (fctr->indep_var[1])); } - } - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v) { - int n_factors = 1; - if ( fctr ) - n_factors = hsh_count(fctr->fstats); - - - if ( i > 0 ) - tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows); - - tab_text (tbl, - 0, i * n_factors + heading_rows, - TAB_LEFT | TAT_TITLE, - var_to_string(dependent_var[i]) + int j = 0; + struct ll *ll; + const union value *last_value = NULL; + + if ( v > 0 ) + tab_hline (tbl, TAL_1, 0, n_cols -1 , + v * ll_count (&fctr->result_list) + + heading_rows); + + tab_text (tbl, + 0, + v * ll_count (&fctr->result_list) + heading_rows, + TAB_LEFT | TAT_TITLE, + var_to_string (dependent_var[v]) ); - if ( !fctr ) - populate_summary(tbl, heading_columns, - (i * n_factors) + heading_rows, - &totals[i]); - - - else + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0 ; + double n; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - while (*fs) + if ( fctr->indep_var[0] ) { - static union value prev; - - if ( 0 != compare_values(&prev, &(*fs)->id[0], - fctr->indep_var[0]->width)) - { - tab_text (tbl, - 1, - (i * n_factors ) + count + - heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); - - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 1, n_cols - 1, - (i * n_factors ) + count + heading_rows); - - } - - prev = (*fs)->id[0]; - - - if ( fctr->indep_var[1]) - tab_text (tbl, - 2, - (i * n_factors ) + count + - heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[1], fctr->indep_var[1]) - ); - populate_summary(tbl, heading_columns, - (i * n_factors) + count - + heading_rows, - &(*fs)->m[i]); + if ( last_value == NULL || + !value_equal (last_value, &result->value[0], + var_get_width (fctr->indep_var[0]))) + { + struct string str; - count++ ; - fs++; - } - } - } + last_value = &result->value[0]; + ds_init_empty (&str); - tab_submit (tbl); -} + var_append_value_name (fctr->indep_var[0], &result->value[0], + &str); + tab_text (tbl, 1, + heading_rows + j + + v * ll_count (&fctr->result_list), + TAB_LEFT | TAT_TITLE, + ds_cstr (&str)); -void -populate_summary(struct tab_table *t, int col, int row, - const struct metrics *m) + ds_destroy (&str); -{ - const double total = m->n + m->n_missing ; - - tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0); - tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0); - tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0); + if ( fctr->indep_var[1] && j > 0) + tab_hline (tbl, TAL_1, 1, n_cols - 1, + heading_rows + j + + v * ll_count (&fctr->result_list)); + } + if ( fctr->indep_var[1]) + { + struct string str; - if ( total > 0 ) { - tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * m->n / total ); + ds_init_empty (&str); - tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * m->n_missing / total ); + var_append_value_name (fctr->indep_var[1], + &result->value[1], &str); - /* This seems a bit pointless !!! */ - tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * total / total ); + tab_text (tbl, 2, + heading_rows + j + + v * ll_count (&fctr->result_list), + TAB_LEFT | TAT_TITLE, + ds_cstr (&str)); + ds_destroy (&str); + } + } - } + moments1_calculate (result->metrics[v].moments, + &n, &result->metrics[v].mean, + &result->metrics[v].variance, + &result->metrics[v].skewness, + &result->metrics[v].kurtosis); + + result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ; + + /* Total Valid */ + tab_double (tbl, heading_columns, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + n, wfmt); + + tab_text_format (tbl, heading_columns + 1, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", n * 100.0 / result->metrics[v].n); + + /* Total Missing */ + tab_double (tbl, heading_columns + 2, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + result->metrics[v].n - n, + wfmt); + + tab_text_format (tbl, heading_columns + 3, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", + (result->metrics[v].n - n) * 100.0 / result->metrics[v].n + ); + + /* Total Valid + Missing */ + tab_double (tbl, heading_columns + 4, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + result->metrics[v].n, + wfmt); + + tab_text_format (tbl, heading_columns + 5, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", + ((result->metrics[v].n) * 100.0 + / result->metrics[v].n)); + + ++j; + } + } -} + tab_submit (tbl); +} +#define DESCRIPTIVE_ROWS 13 -static void -show_extremes(struct variable **dependent_var, int n_dep_var, - const struct factor *fctr, int n_extremities) +static void +show_descriptives (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - int i; - int heading_columns ; + int v; + int heading_columns = 3; int n_cols; const int heading_rows = 1; struct tab_table *tbl; - int n_factors = 1; int n_rows ; + n_rows = n_dep_var; - if ( fctr ) - { - heading_columns = 2; - n_factors = hsh_count(fctr->fstats); + assert (fctr); - n_rows = n_dep_var * 2 * n_extremities * n_factors; + if ( fctr->indep_var[0] ) + { + heading_columns = 4; if ( fctr->indep_var[1] ) - heading_columns = 3; - } - else - { - heading_columns = 1; - n_rows = n_dep_var * 2 * n_extremities; + { + heading_columns = 5; + } } + n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS; n_rows += heading_rows; - heading_columns += 2; n_cols = heading_columns + 2; - tbl = tab_create (n_cols,n_rows,0); + tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions); - - /* Outline the box, No internal lines*/ - tab_box (tbl, + tab_dim (tbl, tab_natural_dimensions, NULL); + + /* Outline the box */ + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - tab_title (tbl, 0, _("Extreme Values")); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); - tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); + tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); - if ( fctr ) - { - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[0])); - if ( fctr->indep_var[1] ) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[1])); - } + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0])); - tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); - tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1])); - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { + struct ll *ll; + int i = 0; - if ( i > 0 ) - tab_hline(tbl, TAL_1, 0, n_cols -1 , - i * 2 * n_extremities * n_factors + heading_rows); - - tab_text (tbl, 0, - i * 2 * n_extremities * n_factors + heading_rows, - TAB_LEFT | TAT_TITLE, - var_to_string(dependent_var[i]) - ); + const int row_var_start = + v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list); + tab_text (tbl, + 0, + heading_rows + row_var_start, + TAB_LEFT | TAT_TITLE, + var_to_string (dependent_var[v]) + ); - if ( !fctr ) - populate_extremes(tbl, heading_columns - 2, - i * 2 * n_extremities * n_factors + heading_rows, - n_extremities, &totals[i]); - - else + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0 ; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - while (*fs) - { - static union value prev ; - - const int row = heading_rows + ( 2 * n_extremities ) * - ( ( i * n_factors ) + count ); - - - if ( 0 != compare_values(&prev, &(*fs)->id[0], - fctr->indep_var[0]->width)) - { - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); - } + const double t = + gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0, + result->metrics[v].n - 1); - prev = (*fs)->id[0]; - - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 2, n_cols - 1, row); - - if ( fctr->indep_var[1]) - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[1], fctr->indep_var[1]) - ); - - populate_extremes(tbl, heading_columns - 2, - row, n_extremities, - &(*fs)->m[i]); - - count++ ; - fs++; + if ( i > 0 || v > 0 ) + { + const int left_col = (i == 0) ? 0 : 1; + tab_hline (tbl, TAL_1, left_col, n_cols - 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS); } - } - } - - tab_submit(tbl); -} - - - -/* Fill in the extremities table */ -void -populate_extremes(struct tab_table *t, - int col, int row, int n, const struct metrics *m) -{ - int extremity; - int idx=0; - - - tab_text(t, col, row, - TAB_RIGHT | TAT_TITLE , - _("Highest") - ); - - tab_text(t, col, row + n , - TAB_RIGHT | TAT_TITLE , - _("Lowest") - ); - - - tab_hline(t, TAL_1, col, col + 3, row + n ); - - for (extremity = 0; extremity < n ; ++extremity ) - { - /* Highest */ - tab_float(t, col + 1, row + extremity, - TAB_RIGHT, - extremity + 1, 8, 0); - - - /* Lowest */ - tab_float(t, col + 1, row + extremity + n, - TAB_RIGHT, - extremity + 1, 8, 0); - - } - - - /* Lowest */ - for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx ) - { - int j; - const struct weighted_value *wv = m->wvp[idx]; - struct case_node *cn = wv->case_nos; - - for (j = 0 ; j < wv->w ; ++j ) - { - if ( extremity + j >= n ) - break ; + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + &result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + ds_cstr (&vstr) + ); - tab_float(t, col + 3, row + extremity + j + n, - TAB_RIGHT, - wv->v.f, 8, 2); + ds_destroy (&vstr); + } - tab_float(t, col + 2, row + extremity + j + n, - TAB_RIGHT, - cn->num, 8, 0); - if ( cn->next ) - cn = cn->next; + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Mean")); + + tab_text_format (tbl, n_cols - 4, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("%g%% Confidence Interval for Mean"), + cmd.n_cinterval[0]); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Lower Bound")); + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Upper Bound")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, _("5% Trimmed Mean")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Median")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Variance")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Std. Deviation")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Minimum")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Maximum")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Range")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Interquartile Range")); + + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Skewness")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Kurtosis")); + + + /* Now the statistics ... */ + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean, + NULL); + + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].se_mean, + NULL); + + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean - t * + result->metrics[v].se_mean, + NULL); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean + t * + result->metrics[v].se_mean, + NULL); + + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), + NULL); + + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm), + NULL); + + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].variance, + NULL); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + sqrt (result->metrics[v].variance), + NULL); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].quartiles[2], + percentile_algorithm) - + percentile_calculate (result->metrics[v].quartiles[0], + percentile_algorithm), + NULL); + + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].skewness, + NULL); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].kurtosis, + NULL); + + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + calc_seskew (result->metrics[v].n), + NULL); + + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + calc_sekurt (result->metrics[v].n), + NULL); + + { + struct extremum *minimum, *maximum ; + + struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima)); + struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima)); + + maximum = ll_data (max_ll, struct extremum, ll); + minimum = ll_data (min_ll, struct extremum, ll); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + minimum->value, + NULL); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + maximum->value, + NULL); + + tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + maximum->value - minimum->value, + NULL); + } } - - extremity += wv->w ; } + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - /* Highest */ - for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) - { - int j; - const struct weighted_value *wv = m->wvp[idx]; - struct case_node *cn = wv->case_nos; - - for (j = 0 ; j < wv->w ; ++j ) - { - if ( extremity + j >= n ) - break ; - - tab_float(t, col + 3, row + extremity + j, - TAB_RIGHT, - wv->v.f, 8, 2); + tab_title (tbl, _("Descriptives")); - tab_float(t, col + 2, row + extremity + j, - TAB_RIGHT, - cn->num, 8, 0); + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, + _("Statistic")); - if ( cn->next ) - cn = cn->next; + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + _("Std. Error")); - } - - extremity += wv->w ; - } + tab_submit (tbl); } -/* Show the descriptives table */ -void -show_descriptives(struct variable **dependent_var, - int n_dep_var, - struct factor *fctr) + +static void +show_extremes (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - int i; - int heading_columns ; + int v; + int heading_columns = 3; int n_cols; - const int n_stat_rows = 13; - const int heading_rows = 1; - struct tab_table *tbl; - int n_factors = 1; int n_rows ; + n_rows = n_dep_var; - if ( fctr ) + assert (fctr); + + if ( fctr->indep_var[0] ) { heading_columns = 4; - n_factors = hsh_count(fctr->fstats); - - n_rows = n_dep_var * n_stat_rows * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 5; - } - else - { - heading_columns = 3; - n_rows = n_dep_var * n_stat_rows; + { + heading_columns = 5; + } } + n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2; n_rows += heading_rows; n_cols = heading_columns + 2; - tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0); - - tab_dim (tbl, tab_natural_dimensions); + tab_dim (tbl, tab_natural_dimensions, NULL); - /* Outline the box and have no internal lines*/ - tab_box (tbl, + /* Outline the box */ + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); - tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic")); - tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error")); + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0])); - tab_title (tbl, 0, _("Descriptives")); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1])); - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { - const int row = heading_rows + i * n_stat_rows * n_factors ; - - if ( i > 0 ) - tab_hline(tbl, TAL_1, 0, n_cols - 1, row ); - - tab_text (tbl, 0, - i * n_stat_rows * n_factors + heading_rows, - TAB_LEFT | TAT_TITLE, - var_to_string(dependent_var[i]) + struct ll *ll; + int i = 0; + const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list); + + tab_text (tbl, + 0, + heading_rows + row_var_start, + TAB_LEFT | TAT_TITLE, + var_to_string (dependent_var[v]) ); - - if ( fctr ) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0; + int e ; + struct ll *min_ll; + struct ll *max_ll; + const int row_result_start = i * cmd.st_n * 2; - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[0])); + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + if (i > 0 || v > 0) + tab_hline (tbl, TAL_1, 1, n_cols - 1, + heading_rows + row_var_start + row_result_start); - if ( fctr->indep_var[1]) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[1])); + tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1, + heading_rows + row_var_start + row_result_start + cmd.st_n); - while( *fs ) + for ( e = 1; e <= cmd.st_n; ++e ) { + tab_text_format (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + e - 1, + TAB_RIGHT, + "%d", e); + + tab_text_format (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, + TAB_RIGHT, + "%d", e); + } - static union value prev ; - - const int row = heading_rows + n_stat_rows * - ( ( i * n_factors ) + count ); + min_ll = ll_head (extrema_list (result->metrics[v].minima)); + for (e = 0; e < cmd.st_n;) + { + struct extremum *minimum = ll_data (min_ll, struct extremum, ll); + double weight = minimum->weight; - if ( 0 != compare_values(&prev, &(*fs)->id[0], - fctr->indep_var[0]->width)) + while (weight-- > 0 && e < cmd.st_n) { - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + row_result_start + cmd.st_n + e, + TAB_RIGHT, + minimum->value, + NULL); + + + tab_fixed (tbl, n_cols - 2, + heading_rows + row_var_start + + row_result_start + cmd.st_n + e, + TAB_RIGHT, + minimum->location, + 10, 0); + ++e; } - prev = (*fs)->id[0]; - - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 2, n_cols - 1, row); - - if ( fctr->indep_var[1]) - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[1], fctr->indep_var[1]) - ); - - populate_descriptives(tbl, heading_columns - 2, - row, &(*fs)->m[i]); - - count++ ; - fs++; + min_ll = ll_next (min_ll); } - } - - else - { - - populate_descriptives(tbl, heading_columns - 2, - i * n_stat_rows * n_factors + heading_rows, - &totals[i]); - } - } - - tab_submit(tbl); - -} - - - - -/* Fill in the descriptives data */ -void -populate_descriptives(struct tab_table *tbl, int col, int row, - const struct metrics *m) -{ - - const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \ - m->n -1); - - - tab_text (tbl, col, - row, - TAB_LEFT | TAT_TITLE, - _("Mean")); - - tab_float (tbl, col + 2, - row, - TAB_CENTER, - m->mean, - 8,2); - - tab_float (tbl, col + 3, - row, - TAB_CENTER, - m->se_mean, - 8,3); - - - tab_text (tbl, col, - row + 1, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]); - - - tab_text (tbl, col + 1, - row + 1, - TAB_LEFT | TAT_TITLE, - _("Lower Bound")); - - tab_float (tbl, col + 2, - row + 1, - TAB_CENTER, - m->mean - t * m->se_mean, - 8,3); - - tab_text (tbl, col + 1, - row + 2, - TAB_LEFT | TAT_TITLE, - _("Upper Bound")); - - - tab_float (tbl, col + 2, - row + 2, - TAB_CENTER, - m->mean + t * m->se_mean, - 8,3); - - tab_text (tbl, col, - row + 3, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _("5%% Trimmed Mean")); - - tab_float (tbl, col + 2, - row + 3, - TAB_CENTER, - m->trimmed_mean, - 8,2); - - tab_text (tbl, col, - row + 4, - TAB_LEFT | TAT_TITLE, - _("Median")); - - { - struct percentile *p; - double d = 50; - - p = hsh_find(m->ptile_hash, &d); - - assert(p); - - - tab_float (tbl, col + 2, - row + 4, - TAB_CENTER, - p->v, - 8, 2); - } - - - tab_text (tbl, col, - row + 5, - TAB_LEFT | TAT_TITLE, - _("Variance")); - - tab_float (tbl, col + 2, - row + 5, - TAB_CENTER, - m->var, - 8,3); - - - tab_text (tbl, col, - row + 6, - TAB_LEFT | TAT_TITLE, - _("Std. Deviation")); - - - tab_float (tbl, col + 2, - row + 6, - TAB_CENTER, - m->stddev, - 8,3); - - - tab_text (tbl, col, - row + 7, - TAB_LEFT | TAT_TITLE, - _("Minimum")); - - tab_float (tbl, col + 2, - row + 7, - TAB_CENTER, - m->min, - 8,3); - - tab_text (tbl, col, - row + 8, - TAB_LEFT | TAT_TITLE, - _("Maximum")); - - tab_float (tbl, col + 2, - row + 8, - TAB_CENTER, - m->max, - 8,3); - - - tab_text (tbl, col, - row + 9, - TAB_LEFT | TAT_TITLE, - _("Range")); - - tab_float (tbl, col + 2, - row + 9, - TAB_CENTER, - m->max - m->min, - 8,3); - - tab_text (tbl, col, - row + 10, - TAB_LEFT | TAT_TITLE, - _("Interquartile Range")); - - { - struct percentile *p1; - struct percentile *p2; - - double d = 75; - p1 = hsh_find(m->ptile_hash, &d); - - d = 25; - p2 = hsh_find(m->ptile_hash, &d); - - assert(p1); - assert(p2); - - tab_float (tbl, col + 2, - row + 10, - TAB_CENTER, - p1->v - p2->v, - 8, 2); - } - - - - tab_text (tbl, col, - row + 11, - TAB_LEFT | TAT_TITLE, - _("Skewness")); - - - tab_float (tbl, col + 2, - row + 11, - TAB_CENTER, - m->skewness, - 8,3); - - /* stderr of skewness */ - tab_float (tbl, col + 3, - row + 11, - TAB_CENTER, - calc_seskew(m->n), - 8,3); - - - tab_text (tbl, col, - row + 12, - TAB_LEFT | TAT_TITLE, - _("Kurtosis")); - - - tab_float (tbl, col + 2, - row + 12, - TAB_CENTER, - m->kurtosis, - 8,3); - - /* stderr of kurtosis */ - tab_float (tbl, col + 3, - row + 12, - TAB_CENTER, - calc_sekurt(m->n), - 8,3); - - -} - - - -void -box_plot_variables(const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id) -{ - - int i; - struct factor_statistics **fs ; + max_ll = ll_head (extrema_list (result->metrics[v].maxima)); + for (e = 0; e < cmd.st_n;) + { + struct extremum *maximum = ll_data (max_ll, struct extremum, ll); + double weight = maximum->weight; - if ( ! fctr ) - { - box_plot_group(fctr, vars, n_vars, id); - return; - } + while (weight-- > 0 && e < cmd.st_n) + { + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + + row_result_start + e, + TAB_RIGHT, + maximum->value, + NULL); + + + tab_fixed (tbl, n_cols - 2, + heading_rows + row_var_start + + row_result_start + e, + TAB_RIGHT, + maximum->location, + 10, 0); + ++e; + } - for ( fs = fctr->fs ; *fs ; ++fs ) - { - double y_min = DBL_MAX; - double y_max = -DBL_MAX; - struct chart *ch = chart_create(); - const char *s = factor_to_string(fctr, *fs, 0 ); + max_ll = ll_next (max_ll); + } - chart_write_title(ch, s); - for ( i = 0 ; i < n_vars ; ++i ) - { - y_max = max(y_max, (*fs)->m[i].max); - y_min = min(y_min, (*fs)->m[i].min); - } - - boxplot_draw_yscale(ch, y_max, y_min); - - for ( i = 0 ; i < n_vars ; ++i ) - { + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + &result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + row_result_start, + TAB_LEFT, + ds_cstr (&vstr) + ); - const double box_width = (ch->data_right - ch->data_left) - / (n_vars * 2.0 ) ; + ds_destroy (&vstr); + } - const double box_centre = ( i * 2 + 1) * box_width - + ch->data_left; - - boxplot_draw_boxplot(ch, - box_centre, box_width, - &(*fs)->m[i], - var_to_string(vars[i])); + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + row_result_start, + TAB_RIGHT, + _("Highest")); + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + row_result_start + cmd.st_n, + TAB_RIGHT, + _("Lowest")); } - - chart_submit(ch); - } -} - - - -/* Do a box plot, grouping all factors into one plot ; - each dependent variable has its own plot. -*/ -void -box_plot_group(const struct factor *fctr, - const struct variable **vars, - int n_vars, - const struct variable *id UNUSED) -{ - int i; - - for ( i = 0 ; i < n_vars ; ++i ) - { - struct factor_statistics **fs ; - struct chart *ch; - - ch = chart_create(); + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - boxplot_draw_yscale(ch, totals[i].max, totals[i].min); - if ( fctr ) - { - int n_factors = 0; - int f=0; - for ( fs = fctr->fs ; *fs ; ++fs ) - ++n_factors; + tab_title (tbl, _("Extreme Values")); - chart_write_title(ch, _("Boxplot of %s vs. %s"), - var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) ); - for ( fs = fctr->fs ; *fs ; ++fs ) - { - - const char *s = factor_to_string_concise(fctr, *fs); - - const double box_width = (ch->data_right - ch->data_left) - / (n_factors * 2.0 ) ; - - const double box_centre = ( f++ * 2 + 1) * box_width - + ch->data_left; - - boxplot_draw_boxplot(ch, - box_centre, box_width, - &(*fs)->m[i], - s); - } - } - else if ( ch ) - { - const double box_width = (ch->data_right - ch->data_left) / 3.0; - const double box_centre = (ch->data_right + ch->data_left) / 2.0; + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, + _("Case Number")); - chart_write_title(ch, _("Boxplot")); - boxplot_draw_boxplot(ch, - box_centre, box_width, - &totals[i], - var_to_string(vars[i]) ); - - } + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + _("Value")); - chart_submit(ch); - } + tab_submit (tbl); } +#define PERCENTILE_ROWS 2 -/* Plot the normal and detrended normal plots for m - Label the plots with factorname */ -void -np_plot(const struct metrics *m, const char *factorname) +static void +show_percentiles (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { int i; - double yfirst=0, ylast=0; - - /* Normal Plot */ - struct chart *np_chart; - - /* Detrended Normal Plot */ - struct chart *dnp_chart; - - /* The slope and intercept of the ideal normal probability line */ - const double slope = 1.0 / m->stddev; - const double intercept = - m->mean / m->stddev; - - /* Cowardly refuse to plot an empty data set */ - if ( m->n_data == 0 ) - return ; - - np_chart = chart_create(); - dnp_chart = chart_create(); - - if ( !np_chart || ! dnp_chart ) - return ; - - chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(np_chart, _("Observed Value")); - chart_write_ylabel(np_chart, _("Expected Normal")); - - - chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"), - factorname); - chart_write_xlabel(dnp_chart, _("Observed Value")); - chart_write_ylabel(dnp_chart, _("Dev from Normal")); - - yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); - ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); - - - { - /* Need to make sure that both the scatter plot and the ideal fit into the - plot */ - double x_lower = min(m->min, (yfirst - intercept) / slope) ; - double x_upper = max(m->max, (ylast - intercept) / slope) ; - double slack = (x_upper - x_lower) * 0.05 ; - - chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5); - - chart_write_xscale(dnp_chart, m->min, m->max, 5); - - } - - chart_write_yscale(np_chart, yfirst, ylast, 5); - - { - /* We have to cache the detrended data, beacause we need to - find its limits before we can plot it */ - double *d_data = xnmalloc (m->n_data, sizeof *d_data); - double d_max = -DBL_MAX; - double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) - { - const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); - - chart_datum(np_chart, 0, m->wvp[i]->v.f, ns); - - d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; - - if ( d_data[i] < d_min ) d_min = d_data[i]; - if ( d_data[i] > d_max ) d_max = d_data[i]; - } - chart_write_yscale(dnp_chart, d_min, d_max, 5); - - for ( i = 0 ; i < m->n_data; ++i ) - chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); - - free(d_data); - } - - chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); - chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); - - chart_submit(np_chart); - chart_submit(dnp_chart); -} - - - - -/* Show the percentiles */ -void -show_percentiles(struct variable **dependent_var, - int n_dep_var, - struct factor *fctr) -{ + int v; + int heading_columns = 2; + int n_cols; + const int n_percentiles = subc_list_double_count (&percentile_list); + const int heading_rows = 2; struct tab_table *tbl; - int i; - - int n_cols, n_rows; - int n_factors; - - struct hsh_table *ptiles ; - int n_heading_columns; - const int n_heading_rows = 2; - const int n_stat_rows = 2; + int n_rows ; + n_rows = n_dep_var; - int n_ptiles ; + assert (fctr); - if ( fctr ) + if ( fctr->indep_var[0] ) { - struct factor_statistics **fs = fctr->fs ; - n_heading_columns = 3; - n_factors = hsh_count(fctr->fstats); - - ptiles = (*fs)->m[0].ptile_hash; + heading_columns = 3; if ( fctr->indep_var[1] ) - n_heading_columns = 4; - } - else - { - n_factors = 1; - n_heading_columns = 2; - - ptiles = totals[0].ptile_hash; + { + heading_columns = 4; + } } - n_ptiles = hsh_count(ptiles); - - n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors; + n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS; + n_rows += heading_rows; - n_cols = n_heading_columns + n_ptiles ; + n_cols = heading_columns + n_percentiles; tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0); - - tab_dim (tbl, tab_natural_dimensions); + tab_dim (tbl, tab_natural_dimensions, NULL); - /* Outline the box and have no internal lines*/ - tab_box (tbl, + /* Outline the box */ + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows ); - - tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1); - - - tab_title (tbl, 0, _("Percentiles")); - - - tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 ); - - - tab_box (tbl, - -1, -1, - -1, TAL_1, - 0, n_heading_rows, - n_heading_columns - 1, n_rows - 1); - - - tab_box (tbl, - -1, -1, - -1, TAL_1, - n_heading_columns, n_heading_rows - 1, - n_cols - 1, n_rows - 1); - - tab_joint_text(tbl, n_heading_columns + 1, 0, - n_cols - 1 , 0, - TAB_CENTER | TAT_TITLE , - _("Percentiles")); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); - { - /* Put in the percentile break points as headings */ + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0])); - struct percentile **p = (struct percentile **) hsh_sort(ptiles); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1])); - i = 0; - while ( (*p) ) - { - tab_float(tbl, n_heading_columns + i++ , 1, - TAB_CENTER, - (*p)->p, 8, 0); - - p++; - } - - } - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { - const int n_stat_rows = 2; - const int row = n_heading_rows + i * n_stat_rows * n_factors ; - - if ( i > 0 ) - tab_hline(tbl, TAL_1, 0, n_cols - 1, row ); - - tab_text (tbl, 0, - i * n_stat_rows * n_factors + n_heading_rows, - TAB_LEFT | TAT_TITLE, - var_to_string(dependent_var[i]) + double hinges[3]; + struct ll *ll; + int i = 0; + + const int row_var_start = + v * PERCENTILE_ROWS * ll_count(&fctr->result_list); + + tab_text (tbl, + 0, + heading_rows + row_var_start, + TAB_LEFT | TAT_TITLE, + var_to_string (dependent_var[v]) ); - if ( fctr ) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0; - - tab_text (tbl, 1, n_heading_rows - 1, - TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[0])); + int j; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - - if ( fctr->indep_var[1]) - tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(fctr->indep_var[1])); - - while( *fs ) + if ( i > 0 || v > 0 ) { + const int left_col = (i == 0) ? 0 : 1; + tab_hline (tbl, TAL_1, left_col, n_cols - 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS); + } - static union value prev ; - - const int row = n_heading_rows + n_stat_rows * - ( ( i * n_factors ) + count ); - - - if ( 0 != compare_values(&prev, &(*fs)->id[0], - fctr->indep_var[0]->width)) - { - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + &result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_LEFT, + ds_cstr (&vstr) + ); - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); + ds_destroy (&vstr); + } - } + tab_text (tbl, n_cols - n_percentiles - 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_LEFT, + ptile_alg_desc [percentile_algorithm]); - prev = (*fs)->id[0]; - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 2, n_cols - 1, row); + tab_text (tbl, n_cols - n_percentiles - 1, + heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, + TAB_LEFT, + _("Tukey's Hinges")); - if ( fctr->indep_var[1]) - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[1], fctr->indep_var[1]) - ); + tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1); - populate_percentiles(tbl, n_heading_columns - 1, - row, &(*fs)->m[i]); + tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges, + hinges); + for (j = 0; j < n_percentiles; ++j) + { + double hinge = SYSMIS; + tab_double (tbl, n_cols - n_percentiles + j, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].ptl[j], + percentile_algorithm), + NULL + ); + + if ( result->metrics[v].ptl[j]->ptile == 0.5) + hinge = hinges[1]; + else if ( result->metrics[v].ptl[j]->ptile == 0.25) + hinge = hinges[0]; + else if ( result->metrics[v].ptl[j]->ptile == 0.75) + hinge = hinges[2]; + + if ( hinge != SYSMIS) + tab_double (tbl, n_cols - n_percentiles + j, + heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, + TAB_CENTER, + hinge, + NULL + ); - count++ ; - fs++; } - - - } - else - { - populate_percentiles(tbl, n_heading_columns - 1, - i * n_stat_rows * n_factors + n_heading_rows, - &totals[i]); } - - } + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - tab_submit(tbl); - + tab_title (tbl, _("Percentiles")); -} + for (i = 0 ; i < n_percentiles; ++i ) + { + tab_text_format (tbl, n_cols - n_percentiles + i, 1, + TAB_CENTER | TAT_TITLE, + _("%g"), + subc_list_double_at (&percentile_list, i)); + } -void -populate_percentiles(struct tab_table *tbl, int col, int row, - const struct metrics *m) -{ - int i; + tab_joint_text (tbl, + n_cols - n_percentiles, 0, + n_cols - 1, 0, + TAB_CENTER | TAT_TITLE, + _("Percentiles")); - struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash); - - tab_text (tbl, - col, row + 1, - TAB_LEFT | TAT_TITLE, - _("Tukey\'s Hinges") - ); + /* Vertical lines for the data only */ + tab_box (tbl, + -1, -1, + -1, TAL_1, + n_cols - n_percentiles, 1, + n_cols - 1, n_rows - 1); - tab_text (tbl, - col, row, - TAB_LEFT | TAT_TITLE, - ptile_alg_desc[m->ptile_alg] - ); + tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1); - i = 0; - while ( (*p) ) - { - tab_float(tbl, col + i + 1 , row, - TAB_CENTER, - (*p)->v, 8, 2); - if ( (*p)->p == 25 ) - tab_float(tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[0], 8, 2); + tab_submit (tbl); +} - if ( (*p)->p == 50 ) - tab_float(tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[1], 8, 2); - if ( (*p)->p == 75 ) - tab_float(tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[2], 8, 2); +static void +factor_to_string_concise (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ) +{ + if (fctr->indep_var[0]) + { + var_append_value_name (fctr->indep_var[0], &result->value[0], str); + if ( fctr->indep_var[1] ) + { + ds_put_cstr (str, ","); - i++; + var_append_value_name (fctr->indep_var[1], &result->value[1], str); - p++; + ds_put_cstr (str, ")"); + } } - } - -const char * -factor_to_string(const struct factor *fctr, - struct factor_statistics *fs, - const struct variable *var) +static void +factor_to_string (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ) { + if (fctr->indep_var[0]) + { + ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0])); - static char buf1[100]; - char buf2[100]; - - strcpy(buf1,""); + var_append_value_name (fctr->indep_var[0], &result->value[0], str); - if (var) - sprintf(buf1, "%s (",var_to_string(var) ); + if ( fctr->indep_var[1] ) + { + ds_put_cstr (str, ","); + ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1])); - - snprintf(buf2, 100, "%s = %s", - var_to_string(fctr->indep_var[0]), - value_to_string(&fs->id[0],fctr->indep_var[0])); - - strcat(buf1, buf2); - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, "; %s = %s)", - var_to_string(fctr->indep_var[1]), - value_to_string(&fs->id[1], - fctr->indep_var[1])); - strcat(buf1, buf2); - } - else - { - if ( var ) - strcat(buf1, ")"); + var_append_value_name (fctr->indep_var[1], &result->value[1], str); + } + ds_put_cstr (str, ")"); } - - return buf1; } -const char * -factor_to_string_concise(const struct factor *fctr, - struct factor_statistics *fs) - -{ - - static char buf[100]; - - char buf2[100]; - snprintf(buf, 100, "%s", - value_to_string(&fs->id[0], fctr->indep_var[0])); - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) ); - strcat(buf, buf2); - } - - - return buf; -} +/* + Local Variables: + mode: c + End: +*/