From ade89522516fe8cc47299d8da6e28e129d96de94 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 7 Dec 2006 06:31:04 +0000 Subject: [PATCH] Changed factors' independent variables to be copies on the heap. Fixes part of bug #15911. --- src/data/ChangeLog | 4 + src/data/variable.c | 15 + src/data/variable.h | 2 + src/language/stats/ChangeLog | 5 + src/language/stats/examine.q | 772 ++++++++++++++++++----------------- src/math/ChangeLog | 5 + src/math/factor-stats.c | 102 +++-- src/math/factor-stats.h | 29 +- src/math/sort.c | 1 - 9 files changed, 494 insertions(+), 441 deletions(-) diff --git a/src/data/ChangeLog b/src/data/ChangeLog index 805a466f..3be6d529 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,7 @@ +Thu Dec 7 15:22:38 WST 2006 John Darrington + + * variable.c variable.h (value_dup): New function. + Mon Dec 4 22:20:17 2006 Ben Pfaff Start converting struct variable to an opaque. In this phase, we diff --git a/src/data/variable.c b/src/data/variable.c index 8582308d..b43fce7e 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -123,6 +123,21 @@ var_dtor_free (struct variable *v) free (v->aux); } +/* Duplicate a value. + The caller is responsible for freeing the returned value +*/ +union value * +value_dup (const union value *val, int width) +{ + size_t bytes = MAX(width, sizeof *val); + + union value *v = xmalloc (bytes); + memcpy (v, val, bytes); + return v; +} + + + /* Compares A and B, which both have the given WIDTH, and returns a strcmp()-type result. */ int diff --git a/src/data/variable.h b/src/data/variable.h index 04305aa7..685c5104 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -221,5 +221,7 @@ const char * var_to_string(const struct variable *var); int width_to_bytes(int width); +union value * value_dup (const union value *val, int width); + #endif /* !variable.h */ diff --git a/src/language/stats/ChangeLog b/src/language/stats/ChangeLog index c0c9708a..fdfa62d7 100644 --- a/src/language/stats/ChangeLog +++ b/src/language/stats/ChangeLog @@ -1,3 +1,8 @@ +Thu Dec 7 15:26:25 WST 2006 John Darrington + + * examine.q: Allocated the categorical values for the dependent and + independent variables, on the heap. Hence they can be of any width. + Wed Dec 6 21:14:26 2006 Ben Pfaff * regression.q (reg_inserted): Compare variable pointers instead diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 988ef779..b337131c 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,7 +1,7 @@ /* PSPP - EXAMINE data for normality . -*-c-*- Copyright (C) 2004 Free Software Foundation, Inc. -Author: John Darrington 2004 +Author: John Darrington 2004, 2006 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -90,7 +90,7 @@ static struct variable **dependent_vars; static size_t n_dependent_vars; -struct factor +struct factor { /* The independent variable */ struct variable *indep_var[2]; @@ -107,9 +107,9 @@ struct factor }; /* Linked list of factors */ -static struct factor *factors=0; +static struct factor *factors = 0; -static struct metrics *totals=0; +static struct metrics *totals = 0; /* Parse the clause specifying the factors */ static int examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *dict, struct cmd_examine *cmd); @@ -117,20 +117,20 @@ static int examine_parse_independent_vars (struct lexer *lexer, const struct dic /* Output functions */ -static void show_summary (struct variable **dependent_var, int n_dep_var, +static void show_summary (struct variable **dependent_var, int n_dep_var, const struct factor *f); -static void show_extremes (struct variable **dependent_var, - int n_dep_var, +static void show_extremes (struct variable **dependent_var, + int n_dep_var, const struct factor *factor, int n_extremities); -static void show_descriptives (struct variable **dependent_var, - int n_dep_var, +static void show_descriptives (struct variable **dependent_var, + int n_dep_var, struct factor *factor); -static void show_percentiles (struct variable **dependent_var, - int n_dep_var, +static void show_percentiles (struct variable **dependent_var, + int n_dep_var, struct factor *factor); @@ -139,14 +139,14 @@ static void show_percentiles (struct variable **dependent_var, void np_plot (const struct metrics *m, const char *factorname); -void box_plot_group (const struct factor *fctr, +void box_plot_group (const struct factor *fctr, const struct variable **vars, int n_vars, const struct variable *id ) ; -void box_plot_variables (const struct factor *fctr, - const struct variable **vars, int n_vars, +void box_plot_variables (const struct factor *fctr, + const struct variable **vars, int n_vars, const struct variable *id ); @@ -159,21 +159,21 @@ static bool run_examine (const struct ccase *, static void output_examine (void); -void factor_calc (struct ccase *c, int case_no, - double weight, int case_missing); +void factor_calc (const struct ccase *c, int case_no, + double weight, int case_missing); /* Represent a factor as a string, so it can be printed in a human readable fashion */ -const char * factor_to_string (const struct factor *fctr, - struct factor_statistics *fs, - const struct variable *var); +const char * factor_to_string (const struct factor *fctr, + const struct factor_statistics *fs, + const struct variable *var); /* Represent a factor as a string, so it can be printed in a human readable fashion, but sacrificing some readablility for the sake of brevity */ -const char *factor_to_string_concise (const struct factor *fctr, +const char *factor_to_string_concise (const struct factor *fctr, struct factor_statistics *fs); @@ -207,18 +207,18 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == XMN_INCLUDE ) + if (cmd.incl == XMN_INCLUDE ) value_is_missing = var_is_value_system_missing; else value_is_missing = var_is_value_missing; - if ( cmd.st_n == SYSMIS ) + if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; - if ( ! cmd.sbc_cinterval) + if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; - /* If descriptives have been requested, make sure the + /* If descriptives have been requested, make sure the quartiles are calculated */ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) { @@ -229,17 +229,17 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) ok = multipass_procedure_with_splits (ds, run_examine, &cmd); - if ( totals ) + if ( totals ) { free ( totals ); } - - if ( dependent_vars ) + + if ( dependent_vars ) free (dependent_vars); { struct factor *f = factors ; - while ( f ) + while ( f ) { struct factor *ff = f; @@ -269,36 +269,36 @@ output_examine (void) { show_summary (dependent_vars, n_dependent_vars, 0); - if ( cmd.sbc_statistics ) + if ( cmd.sbc_statistics ) { - if ( cmd.a_statistics[XMN_ST_EXTREME]) + if ( cmd.a_statistics[XMN_ST_EXTREME]) show_extremes (dependent_vars, n_dependent_vars, 0, cmd.st_n); - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) show_descriptives (dependent_vars, n_dependent_vars, 0); } - if ( sbc_percentile ) + if ( sbc_percentile ) show_percentiles (dependent_vars, n_dependent_vars, 0); - if ( cmd.sbc_plot) + if ( cmd.sbc_plot) { int v; - if ( cmd.a_plot[XMN_PLT_STEMLEAF] ) + if ( cmd.a_plot[XMN_PLT_STEMLEAF] ) msg (SW, _ ("%s is not currently supported."), "STEMLEAF"); - if ( cmd.a_plot[XMN_PLT_SPREADLEVEL] ) + if ( cmd.a_plot[XMN_PLT_SPREADLEVEL] ) msg (SW, _ ("%s is not currently supported."), "SPREADLEVEL"); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) { - for ( v = 0 ; v < n_dependent_vars; ++v ) + for ( v = 0 ; v < n_dependent_vars; ++v ) np_plot (&totals[v], var_to_string (dependent_vars[v])); } - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) { - if ( cmd.cmp == XMN_GROUPS ) + if ( cmd.cmp == XMN_GROUPS ) { box_plot_group (0, (const struct variable **) dependent_vars, n_dependent_vars, cmd.v_id); @@ -309,17 +309,17 @@ output_examine (void) n_dependent_vars, cmd.v_id); } - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { - for ( v = 0 ; v < n_dependent_vars; ++v ) + for ( v = 0 ; v < n_dependent_vars; ++v ) { struct normal_curve normal; normal.N = totals[v].n; normal.mean = totals[v].mean; normal.stddev = totals[v].stddev; - - histogram_plot (totals[v].histogram, + + histogram_plot (totals[v].histogram, var_to_string (dependent_vars[v]), &normal, 0); } @@ -332,24 +332,24 @@ output_examine (void) /* Show grouped statistics as appropriate */ fctr = factors; - while ( fctr ) + while ( fctr ) { show_summary (dependent_vars, n_dependent_vars, fctr); - if ( cmd.sbc_statistics ) + if ( cmd.sbc_statistics ) { - if ( cmd.a_statistics[XMN_ST_EXTREME]) + if ( cmd.a_statistics[XMN_ST_EXTREME]) show_extremes (dependent_vars, n_dependent_vars, fctr, cmd.st_n); - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) show_descriptives (dependent_vars, n_dependent_vars, fctr); } - if ( sbc_percentile ) + if ( sbc_percentile ) show_percentiles (dependent_vars, n_dependent_vars, fctr); - if ( cmd.sbc_plot) + if ( cmd.sbc_plot) { size_t v; @@ -357,7 +357,7 @@ output_examine (void) if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) { - if ( cmd.cmp == XMN_VARIABLES ) + if ( cmd.cmp == XMN_VARIABLES ) box_plot_variables (fctr, (const struct variable **) dependent_vars, n_dependent_vars, cmd.v_id); @@ -370,25 +370,25 @@ output_examine (void) for ( v = 0 ; v < n_dependent_vars; ++v ) { - for ( fs = fctr->fs ; *fs ; ++fs ) + for ( fs = fctr->fs ; *fs ; ++fs ) { const char *s = factor_to_string (fctr, *fs, dependent_vars[v]); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) np_plot (& (*fs)->m[v], s); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { struct normal_curve normal; normal.N = (*fs)->m[v].n; normal.mean = (*fs)->m[v].mean; normal.stddev = (*fs)->m[v].stddev; - - histogram_plot ((*fs)->m[v].histogram, + + histogram_plot ((*fs)->m[v].histogram, s, &normal, 0); } - + } /* for ( fs .... */ } /* for ( v = 0 ..... */ @@ -407,12 +407,12 @@ static struct hsh_table * list_to_ptile_hash (const subc_list_double *l) { int i; - - struct hsh_table *h ; - h = hsh_create (subc_list_double_count (l), + struct hsh_table *h ; + + h = hsh_create (subc_list_double_count (l), (hsh_compare_func *) ptile_compare, - (hsh_hash_func *) ptile_hash, + (hsh_hash_func *) ptile_hash, (hsh_free_func *) free, 0); @@ -420,7 +420,7 @@ list_to_ptile_hash (const subc_list_double *l) for ( i = 0 ; i < subc_list_double_count (l) ; ++i ) { struct percentile *p = xmalloc (sizeof *p); - + p->p = subc_list_double_at (l,i); p->v = SYSMIS; @@ -434,7 +434,7 @@ list_to_ptile_hash (const subc_list_double *l) /* Parse the PERCENTILES subcommand */ static int -xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, +xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, struct cmd_examine *p UNUSED, void *aux UNUSED) { sbc_percentile = 1; @@ -443,7 +443,7 @@ xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, lex_match (lexer, '('); - while ( lex_is_number (lexer) ) + while ( lex_is_number (lexer) ) { subc_list_double_push (&percentile_list, lex_number (lexer)); @@ -456,10 +456,10 @@ xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, lex_match (lexer, '='); if ( lex_match_id (lexer, "HAVERAGE")) - percentile_algorithm = PC_HAVERAGE; + percentile_algorithm = PC_HAVERAGE; else if ( lex_match_id (lexer, "WAVERAGE")) - percentile_algorithm = PC_WAVERAGE; + percentile_algorithm = PC_WAVERAGE; else if ( lex_match_id (lexer, "ROUND")) percentile_algorithm = PC_ROUND; @@ -468,10 +468,10 @@ xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, percentile_algorithm = PC_EMPIRICAL; else if ( lex_match_id (lexer, "AEMPIRICAL")) - percentile_algorithm = PC_AEMPIRICAL; + percentile_algorithm = PC_AEMPIRICAL; else if ( lex_match_id (lexer, "NONE")) - percentile_algorithm = PC_NONE; + percentile_algorithm = PC_NONE; if ( 0 == subc_list_double_count (&percentile_list)) @@ -492,7 +492,7 @@ xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, static int xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, struct cmd_examine *p, void *aux UNUSED) { - if ( p->sbc_nototal ) + if ( p->sbc_nototal ) { msg (SE, _ ("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); return 0; @@ -502,10 +502,10 @@ xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, struct } static int -xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, +xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, struct cmd_examine *p, void *aux UNUSED) { - if ( p->sbc_total ) + if ( p->sbc_total ) { msg (SE, _ ("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); return 0; @@ -516,7 +516,7 @@ xmn_custom_nototal (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, -/* Parser for the variables sub command +/* Parser for the variables sub command Returns 1 on success */ static int xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examine *cmd, void *aux UNUSED) @@ -529,7 +529,7 @@ xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examin { return 2; } - + if (!parse_variables (lexer, dict, &dependent_vars, &n_dependent_vars, PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) { @@ -543,11 +543,11 @@ xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examin if ( lex_match (lexer, T_BY)) { - int success ; + int success ; success = examine_parse_independent_vars (lexer, dict, cmd); if ( success != 1 ) { free (dependent_vars); - free (totals) ; + free (totals) ; } return success; } @@ -575,7 +575,7 @@ examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *di sf->indep_var[0] = parse_variable (lexer, dict); sf->indep_var[1] = 0; - if ( lex_token (lexer) == T_BY ) + if ( lex_token (lexer) == T_BY ) { lex_match (lexer, T_BY); @@ -600,16 +600,16 @@ examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *di sf->next = factors; factors = sf; - + lex_match (lexer, ','); - if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' ) + if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' ) return 1; success = examine_parse_independent_vars (lexer, dict, cmd); - - if ( success != 1 ) - free ( sf ) ; + + if ( success != 1 ) + free ( sf ) ; return success; } @@ -617,13 +617,13 @@ examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *di -void populate_percentiles (struct tab_table *tbl, int col, int row, +void populate_percentiles (struct tab_table *tbl, int col, int row, const struct metrics *m); -void populate_descriptives (struct tab_table *t, int col, int row, +void populate_descriptives (struct tab_table *t, int col, int row, const struct metrics *fs); -void populate_extremes (struct tab_table *t, int col, int row, int n, +void populate_extremes (struct tab_table *t, int col, int row, int n, const struct metrics *m); void populate_summary (struct tab_table *t, int col, int row, @@ -637,62 +637,82 @@ static bool bad_weight_warn = true; /* Perform calculations for the sub factors */ void -factor_calc (struct ccase *c, int case_no, double weight, int case_missing) +factor_calc (const struct ccase *c, int case_no, double weight, + int case_missing) { size_t v; struct factor *fctr = factors; - while ( fctr) + while ( fctr) { struct factor_statistics **foo ; - union value indep_vals[2] ; + union value *indep_vals[2] ; - indep_vals[0] = * case_data (c, fctr->indep_var[0]->fv); + indep_vals[0] = value_dup ( + case_data (c, fctr->indep_var[0]->fv), + var_get_width (fctr->indep_var[0]) + ); - if ( fctr->indep_var[1] ) - indep_vals[1] = * case_data (c, fctr->indep_var[1]->fv); + if ( fctr->indep_var[1] ) + indep_vals[1] = value_dup ( + case_data (c, fctr->indep_var[1]->fv), + var_get_width (fctr->indep_var[1]) + ); else - indep_vals[1].f = SYSMIS; + { + const union value sm = {SYSMIS}; + indep_vals[1] = value_dup (&sm, 0); + } assert (fctr->fstats); - foo = ( struct factor_statistics ** ) + foo = ( struct factor_statistics ** ) hsh_probe (fctr->fstats, (void *) &indep_vals); - if ( !*foo ) + if ( !*foo ) { - *foo = create_factor_statistics (n_dependent_vars, - &indep_vals[0], - &indep_vals[1]); + *foo = create_factor_statistics (n_dependent_vars, + indep_vals[0], + indep_vals[1]); - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for ( v = 0 ; v < n_dependent_vars ; ++v ) { metrics_precalc ( & (*foo)->m[v] ); } } + else + { + free (indep_vals[0]); + free (indep_vals[1]); + } - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for ( v = 0 ; v < n_dependent_vars ; ++v ) { const struct variable *var = dependent_vars[v]; - const union value *val = case_data (c, var->fv); + union value *val = value_dup ( + case_data (c, var->fv), + var_get_width (var) + ); + + if ( value_is_missing (var, val) || case_missing ) + { + free (val); + continue; + } - if ( value_is_missing (var, val) || case_missing ) - val = 0; - metrics_calc ( & (*foo)->m[v], val, weight, case_no); - + + free (val); } fctr = fctr->next; } - - } -static bool -run_examine (const struct ccase *first, const struct casefile *cf, +static bool +run_examine (const struct ccase *first, const struct casefile *cf, void *cmd_, const struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); @@ -706,10 +726,10 @@ run_examine (const struct ccase *first, const struct casefile *cf, output_split_file_values (ds, first); - /* Make sure we haven't got rubbish left over from a + /* Make sure we haven't got rubbish left over from a previous split */ fctr = factors; - while (fctr) + while (fctr) { struct factor *next = fctr->next; @@ -720,53 +740,62 @@ run_examine (const struct ccase *first, const struct casefile *cf, fctr = next; } - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for ( v = 0 ; v < n_dependent_vars ; ++v ) metrics_precalc (&totals[v]); for (r = casefile_get_reader (cf, NULL); casereader_read (r, &c) ; - case_destroy (&c) ) + case_destroy (&c) ) { int case_missing=0; const int case_no = casereader_cnum (r); - const double weight = + const double weight = dict_get_case_weight (dict, &c, &bad_weight_warn); - if ( cmd->miss == XMN_LISTWISE ) + if ( cmd->miss == XMN_LISTWISE ) { - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for ( v = 0 ; v < n_dependent_vars ; ++v ) { const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); + union value *val = value_dup ( + case_data (&c, var->fv), + var_get_width (var) + ); if ( value_is_missing (var, val)) case_missing = 1; - + + free (val); } } - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for ( v = 0 ; v < n_dependent_vars ; ++v ) { const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); + union value *val = value_dup ( + case_data (&c, var->fv), + var_get_width (var) + ); - if ( value_is_missing (var, val) || case_missing ) - val = 0; + if ( value_is_missing (var, val) || case_missing ) + { + free (val) ; + continue ; + } metrics_calc (&totals[v], val, weight, case_no); - + + free (val); } factor_calc (&c, case_no, weight, case_missing); - } - for ( v = 0 ; v < n_dependent_vars ; ++v) { fctr = factors; - while ( fctr ) + while ( fctr ) { struct hsh_iterator hi; struct factor_statistics *fs; @@ -775,7 +804,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, fs != 0 ; fs = hsh_next (fctr->fstats, &hi)) { - + fs->m[v].ptile_hash = list_to_ptile_hash (&percentile_list); fs->m[v].ptile_alg = percentile_algorithm; metrics_postcalc (&fs->m[v]); @@ -793,7 +822,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, /* Make sure that the combination of factors are complete */ fctr = factors; - while ( fctr ) + while ( fctr ) { struct hsh_iterator hi; struct hsh_iterator hi0; @@ -804,7 +833,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, struct hsh_table *idh1=0; union value *val0; union value *val1; - + idh0 = hsh_create (4, (hsh_compare_func *) compare_values, (hsh_hash_func *) hash_value, 0,0); @@ -835,15 +864,15 @@ run_examine (const struct ccase *first, const struct casefile *cf, union value key[2]; key[0] = *val0; key[1] = *val1; - - ffs = (struct factor_statistics **) + + ffs = (struct factor_statistics **) hsh_probe (fctr->fstats, (void *) &key ); if ( !*ffs ) { size_t i; (*ffs) = create_factor_statistics (n_dependent_vars, &key[0], &key[1]); - for ( i = 0 ; i < n_dependent_vars ; ++i ) + for ( i = 0 ; i < n_dependent_vars ; ++i ) metrics_precalc ( & (*ffs)->m[i]); } } @@ -860,10 +889,10 @@ run_examine (const struct ccase *first, const struct casefile *cf, output_examine (); - if ( totals ) + if ( totals ) { size_t i; - for ( i = 0 ; i < n_dependent_vars ; ++i ) + for ( i = 0 ; i < n_dependent_vars ; ++i ) { metrics_destroy (&totals[i]); } @@ -874,7 +903,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, static void -show_summary (struct variable **dependent_var, int n_dep_var, +show_summary (struct variable **dependent_var, int n_dep_var, const struct factor *fctr) { static const char *subtitle[]= @@ -916,16 +945,16 @@ show_summary (struct variable **dependent_var, int n_dep_var, tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - + /* Outline the box */ - tab_box (tbl, + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); /* Vertical lines for the data only */ - tab_box (tbl, + tab_box (tbl, -1, -1, -1, TAL_1, heading_columns, 0, @@ -940,26 +969,26 @@ show_summary (struct variable **dependent_var, int n_dep_var, tab_title (tbl, _ ("Case Processing Summary")); - - tab_joint_text (tbl, heading_columns, 0, + + tab_joint_text (tbl, heading_columns, 0, n_cols -1, 0, TAB_CENTER | TAT_TITLE, _ ("Cases")); /* Remove lines ... */ - tab_box (tbl, + tab_box (tbl, -1, -1, TAL_0, TAL_0, heading_columns, 0, n_cols - 1, 0); - for ( i = 0 ; i < 3 ; ++i ) + for ( i = 0 ; i < 3 ; ++i ) { - tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, + tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE, + tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE, _ ("Percent")); tab_joint_text (tbl, heading_columns + i*2 , 1, @@ -976,39 +1005,39 @@ show_summary (struct variable **dependent_var, int n_dep_var, /* Titles for the independent variables */ - if ( fctr ) + if ( fctr ) { - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, + tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[0])); - if ( fctr->indep_var[1] ) + if ( fctr->indep_var[1] ) { - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, + tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[1])); } - + } - for ( i = 0 ; i < n_dep_var ; ++i ) + for ( i = 0 ; i < n_dep_var ; ++i ) { int n_factors = 1; - if ( fctr ) + if ( fctr ) n_factors = hsh_count (fctr->fstats); - - if ( i > 0 ) + + if ( i > 0 ) tab_hline (tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows); - - tab_text (tbl, + + tab_text (tbl, 0, i * n_factors + heading_rows, - TAB_LEFT | TAT_TITLE, + TAB_LEFT | TAT_TITLE, var_to_string (dependent_var[i]) ); - if ( !fctr ) - populate_summary (tbl, heading_columns, + if ( !fctr ) + populate_summary (tbl, heading_columns, (i * n_factors) + heading_rows, &totals[i]); @@ -1017,46 +1046,47 @@ show_summary (struct variable **dependent_var, int n_dep_var, { struct factor_statistics **fs = fctr->fs; int count = 0 ; + const union value *prev = NULL; - while (*fs) + while (*fs) { - static union value prev; - - if ( 0 != compare_values (&prev, & (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) + if ( !prev || + 0 != compare_values (prev, (*fs)->id[0], + var_get_width (fctr->indep_var[0]))) { - tab_text (tbl, + tab_text (tbl, 1, - (i * n_factors ) + count + + (i * n_factors ) + count + heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[0], fctr->indep_var[0]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[0], + fctr->indep_var[0]) ); - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, + if (fctr->indep_var[1] && count > 0 ) + tab_hline (tbl, TAL_1, 1, n_cols - 1, (i * n_factors ) + count + heading_rows); } - + prev = (*fs)->id[0]; - if ( fctr->indep_var[1]) - tab_text (tbl, + if ( fctr->indep_var[1]) + tab_text (tbl, 2, - (i * n_factors ) + count + + (i * n_factors ) + count + heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[1], fctr->indep_var[1]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[1], fctr->indep_var[1]) ); - populate_summary (tbl, heading_columns, - (i * n_factors) + count + populate_summary (tbl, heading_columns, + (i * n_factors) + count + heading_rows, & (*fs)->m[i]); - count++ ; + count++ ; fs++; } } @@ -1066,12 +1096,12 @@ show_summary (struct variable **dependent_var, int n_dep_var, } -void +void populate_summary (struct tab_table *t, int col, int row, const struct metrics *m) { - const double total = m->n + m->n_missing ; + const double total = m->n + m->n_missing ; tab_float (t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0); tab_float (t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0); @@ -1079,26 +1109,26 @@ populate_summary (struct tab_table *t, int col, int row, if ( total > 0 ) { - tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", + tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", 100.0 * m->n / total ); - tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", + tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", 100.0 * m->n_missing / total ); /* This seems a bit pointless !!! */ - tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", + tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", 100.0 * total / total ); } -} +} -static void -show_extremes (struct variable **dependent_var, int n_dep_var, +static void +show_extremes (struct variable **dependent_var, int n_dep_var, const struct factor *fctr, int n_extremities) { int i; @@ -1135,9 +1165,9 @@ show_extremes (struct variable **dependent_var, int n_dep_var, tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - + /* Outline the box, No internal lines*/ - tab_box (tbl, + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, @@ -1150,35 +1180,35 @@ show_extremes (struct variable **dependent_var, int n_dep_var, tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); - if ( fctr ) + if ( fctr ) { - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, + tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[0])); - if ( fctr->indep_var[1] ) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, + if ( fctr->indep_var[1] ) + tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[1])); } tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _ ("Value")); tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _ ("Case Number")); - for ( i = 0 ; i < n_dep_var ; ++i ) + for ( i = 0 ; i < n_dep_var ; ++i ) { - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols -1 , + if ( i > 0 ) + tab_hline (tbl, TAL_1, 0, n_cols -1 , i * 2 * n_extremities * n_factors + heading_rows); - + tab_text (tbl, 0, i * 2 * n_extremities * n_factors + heading_rows, - TAB_LEFT | TAT_TITLE, + TAB_LEFT | TAT_TITLE, var_to_string (dependent_var[i]) ); - if ( !fctr ) - populate_extremes (tbl, heading_columns - 2, + if ( !fctr ) + populate_extremes (tbl, heading_columns - 2, i * 2 * n_extremities * n_factors + heading_rows, n_extremities, &totals[i]); @@ -1186,45 +1216,44 @@ show_extremes (struct variable **dependent_var, int n_dep_var, { struct factor_statistics **fs = fctr->fs; int count = 0 ; + const union value *prev = NULL; - while (*fs) + while (*fs) { - static union value prev ; - - const int row = heading_rows + ( 2 * n_extremities ) * + const int row = heading_rows + ( 2 * n_extremities ) * ( ( i * n_factors ) + count ); - if ( 0 != compare_values (&prev, & (*fs)->id[0], + if ( !prev || 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { - - if ( count > 0 ) + + if ( count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - tab_text (tbl, + tab_text (tbl, 1, row, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[0], fctr->indep_var[0]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[0], fctr->indep_var[0]) ); } prev = (*fs)->id[0]; - if (fctr->indep_var[1] && count > 0 ) + if (fctr->indep_var[1] && count > 0 ) tab_hline (tbl, TAL_1, 2, n_cols - 1, row); - if ( fctr->indep_var[1]) + if ( fctr->indep_var[1]) tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[1], fctr->indep_var[1]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[1], fctr->indep_var[1]) ); - populate_extremes (tbl, heading_columns - 2, + populate_extremes (tbl, heading_columns - 2, row, n_extremities, & (*fs)->m[i]); - count++ ; + count++ ; fs++; } } @@ -1236,8 +1265,8 @@ show_extremes (struct variable **dependent_var, int n_dep_var, /* Fill in the extremities table */ -void -populate_extremes (struct tab_table *t, +void +populate_extremes (struct tab_table *t, int col, int row, int n, const struct metrics *m) { int extremity; @@ -1256,8 +1285,8 @@ populate_extremes (struct tab_table *t, tab_hline (t, TAL_1, col, col + 3, row + n ); - - for (extremity = 0; extremity < n ; ++extremity ) + + for (extremity = 0; extremity < n ; ++extremity ) { /* Highest */ tab_float (t, col + 1, row + extremity, @@ -1274,16 +1303,16 @@ populate_extremes (struct tab_table *t, /* Lowest */ - for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx ) + for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx ) { int j; const struct weighted_value *wv = m->wvp[idx]; struct case_node *cn = wv->case_nos; - + for (j = 0 ; j < wv->w ; ++j ) { - if ( extremity + j >= n ) + if ( extremity + j >= n ) break ; tab_float (t, col + 3, row + extremity + j + n, @@ -1294,7 +1323,7 @@ populate_extremes (struct tab_table *t, TAB_RIGHT, cn->num, 8, 0); - if ( cn->next ) + if ( cn->next ) cn = cn->next; } @@ -1304,7 +1333,7 @@ populate_extremes (struct tab_table *t, /* Highest */ - for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) + for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) { int j; const struct weighted_value *wv = m->wvp[idx]; @@ -1312,7 +1341,7 @@ populate_extremes (struct tab_table *t, for (j = 0 ; j < wv->w ; ++j ) { - if ( extremity + j >= n ) + if ( extremity + j >= n ) break ; tab_float (t, col + 3, row + extremity + j, @@ -1323,7 +1352,7 @@ populate_extremes (struct tab_table *t, TAB_RIGHT, cn->num, 8, 0); - if ( cn->next ) + if ( cn->next ) cn = cn->next; } @@ -1335,8 +1364,8 @@ populate_extremes (struct tab_table *t, /* Show the descriptives table */ void -show_descriptives (struct variable **dependent_var, - int n_dep_var, +show_descriptives (struct variable **dependent_var, + int n_dep_var, struct factor *fctr) { int i; @@ -1379,7 +1408,7 @@ show_descriptives (struct variable **dependent_var, tab_dim (tbl, tab_natural_dimensions); /* Outline the box and have no internal lines*/ - tab_box (tbl, + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, @@ -1397,7 +1426,7 @@ show_descriptives (struct variable **dependent_var, tab_title (tbl, _ ("Descriptives")); - for ( i = 0 ; i < n_dep_var ; ++i ) + for ( i = 0 ; i < n_dep_var ; ++i ) { const int row = heading_rows + i * n_stat_rows * n_factors ; @@ -1406,71 +1435,70 @@ show_descriptives (struct variable **dependent_var, tab_text (tbl, 0, i * n_stat_rows * n_factors + heading_rows, - TAB_LEFT | TAT_TITLE, + TAB_LEFT | TAT_TITLE, var_to_string (dependent_var[i]) ); if ( fctr ) { + const union value *prev = NULL; + struct factor_statistics **fs = fctr->fs; int count = 0; - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, + tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[0])); if ( fctr->indep_var[1]) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, + tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[1])); - while ( *fs ) + while ( *fs ) { - - static union value prev ; - - const int row = heading_rows + n_stat_rows * + const int row = heading_rows + n_stat_rows * ( ( i * n_factors ) + count ); - if ( 0 != compare_values (&prev, & (*fs)->id[0], + if ( !prev || 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { - - if ( count > 0 ) + + if ( count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - tab_text (tbl, + tab_text (tbl, 1, row, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[0], fctr->indep_var[0]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[0], fctr->indep_var[0]) ); } prev = (*fs)->id[0]; - if (fctr->indep_var[1] && count > 0 ) + if (fctr->indep_var[1] && count > 0 ) tab_hline (tbl, TAL_1, 2, n_cols - 1, row); - if ( fctr->indep_var[1]) + if ( fctr->indep_var[1]) tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[1], fctr->indep_var[1]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[1], fctr->indep_var[1]) ); - populate_descriptives (tbl, heading_columns - 2, + populate_descriptives (tbl, heading_columns - 2, row, & (*fs)->m[i]); - count++ ; + count++ ; fs++; } } - else + else { - - populate_descriptives (tbl, heading_columns - 2, + + populate_descriptives (tbl, heading_columns - 2, i * n_stat_rows * n_factors + heading_rows, &totals[i]); } @@ -1485,7 +1513,7 @@ show_descriptives (struct variable **dependent_var, /* Fill in the descriptives data */ void -populate_descriptives (struct tab_table *tbl, int col, int row, +populate_descriptives (struct tab_table *tbl, int col, int row, const struct metrics *m) { @@ -1493,7 +1521,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, m->n -1); - tab_text (tbl, col, + tab_text (tbl, col, row, TAB_LEFT | TAT_TITLE, _ ("Mean")); @@ -1503,21 +1531,21 @@ populate_descriptives (struct tab_table *tbl, int col, int row, TAB_CENTER, m->mean, 8,2); - + tab_float (tbl, col + 3, row, TAB_CENTER, m->se_mean, 8,3); - - tab_text (tbl, col, + + tab_text (tbl, col, row + 1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, _ ("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]); - tab_text (tbl, col + 1, + tab_text (tbl, col + 1, row + 1, TAB_LEFT | TAT_TITLE, _ ("Lower Bound")); @@ -1525,10 +1553,10 @@ populate_descriptives (struct tab_table *tbl, int col, int row, tab_float (tbl, col + 2, row + 1, TAB_CENTER, - m->mean - t * m->se_mean, + m->mean - t * m->se_mean, 8,3); - tab_text (tbl, col + 1, + tab_text (tbl, col + 1, row + 2, TAB_LEFT | TAT_TITLE, _ ("Upper Bound")); @@ -1537,21 +1565,21 @@ populate_descriptives (struct tab_table *tbl, int col, int row, tab_float (tbl, col + 2, row + 2, TAB_CENTER, - m->mean + t * m->se_mean, + m->mean + t * m->se_mean, 8,3); - tab_text (tbl, col, + tab_text (tbl, col, row + 3, TAB_LEFT | TAT_TITLE | TAT_PRINTF, _ ("5%% Trimmed Mean")); - tab_float (tbl, col + 2, + tab_float (tbl, col + 2, row + 3, TAB_CENTER, m->trimmed_mean, 8,2); - tab_text (tbl, col, + tab_text (tbl, col, row + 4, TAB_LEFT | TAT_TITLE, _ ("Median")); @@ -1559,21 +1587,21 @@ populate_descriptives (struct tab_table *tbl, int col, int row, { struct percentile *p; double d = 50; - + p = hsh_find (m->ptile_hash, &d); - + assert (p); - tab_float (tbl, col + 2, + tab_float (tbl, col + 2, row + 4, TAB_CENTER, p->v, 8, 2); } - - tab_text (tbl, col, + + tab_text (tbl, col, row + 5, TAB_LEFT | TAT_TITLE, _ ("Variance")); @@ -1585,7 +1613,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, 8,3); - tab_text (tbl, col, + tab_text (tbl, col, row + 6, TAB_LEFT | TAT_TITLE, _ ("Std. Deviation")); @@ -1597,8 +1625,8 @@ populate_descriptives (struct tab_table *tbl, int col, int row, m->stddev, 8,3); - - tab_text (tbl, col, + + tab_text (tbl, col, row + 7, TAB_LEFT | TAT_TITLE, _ ("Minimum")); @@ -1609,7 +1637,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, m->min, 8,3); - tab_text (tbl, col, + tab_text (tbl, col, row + 8, TAB_LEFT | TAT_TITLE, _ ("Maximum")); @@ -1621,7 +1649,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, 8,3); - tab_text (tbl, col, + tab_text (tbl, col, row + 9, TAB_LEFT | TAT_TITLE, _ ("Range")); @@ -1633,7 +1661,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, m->max - m->min, 8,3); - tab_text (tbl, col, + tab_text (tbl, col, row + 10, TAB_LEFT | TAT_TITLE, _ ("Interquartile Range")); @@ -1651,7 +1679,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, assert (p1); assert (p2); - tab_float (tbl, col + 2, + tab_float (tbl, col + 2, row + 10, TAB_CENTER, p1->v - p2->v, @@ -1660,7 +1688,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, - tab_text (tbl, col, + tab_text (tbl, col, row + 11, TAB_LEFT | TAT_TITLE, _ ("Skewness")); @@ -1680,7 +1708,7 @@ populate_descriptives (struct tab_table *tbl, int col, int row, 8,3); - tab_text (tbl, col, + tab_text (tbl, col, row + 12, TAB_LEFT | TAT_TITLE, _ ("Kurtosis")); @@ -1705,21 +1733,21 @@ populate_descriptives (struct tab_table *tbl, int col, int row, void -box_plot_variables (const struct factor *fctr, - const struct variable **vars, int n_vars, +box_plot_variables (const struct factor *fctr, + const struct variable **vars, int n_vars, const struct variable *id) { int i; struct factor_statistics **fs ; - if ( ! fctr ) + if ( ! fctr ) { box_plot_group (fctr, vars, n_vars, id); return; } - for ( fs = fctr->fs ; *fs ; ++fs ) + for ( fs = fctr->fs ; *fs ; ++fs ) { double y_min = DBL_MAX; double y_max = -DBL_MAX; @@ -1728,23 +1756,23 @@ box_plot_variables (const struct factor *fctr, chart_write_title (ch, s); - for ( i = 0 ; i < n_vars ; ++i ) + for ( i = 0 ; i < n_vars ; ++i ) { y_max = MAX (y_max, (*fs)->m[i].max); y_min = MIN (y_min, (*fs)->m[i].min); } - + boxplot_draw_yscale (ch, y_max, y_min); - - for ( i = 0 ; i < n_vars ; ++i ) + + for ( i = 0 ; i < n_vars ; ++i ) { - const double box_width = (ch->data_right - ch->data_left) + const double box_width = (ch->data_right - ch->data_left) / (n_vars * 2.0 ) ; - const double box_centre = ( i * 2 + 1) * box_width + const double box_centre = ( i * 2 + 1) * box_width + ch->data_left; - + boxplot_draw_boxplot (ch, box_centre, box_width, & (*fs)->m[i], @@ -1764,15 +1792,15 @@ box_plot_variables (const struct factor *fctr, each dependent variable has its own plot. */ void -box_plot_group (const struct factor *fctr, - const struct variable **vars, +box_plot_group (const struct factor *fctr, + const struct variable **vars, int n_vars, const struct variable *id UNUSED) { int i; - for ( i = 0 ; i < n_vars ; ++i ) + for ( i = 0 ; i < n_vars ; ++i ) { struct factor_statistics **fs ; struct chart *ch; @@ -1781,27 +1809,27 @@ box_plot_group (const struct factor *fctr, boxplot_draw_yscale (ch, totals[i].max, totals[i].min); - if ( fctr ) + if ( fctr ) { int n_factors = 0; int f=0; - for ( fs = fctr->fs ; *fs ; ++fs ) + for ( fs = fctr->fs ; *fs ; ++fs ) ++n_factors; - chart_write_title (ch, _ ("Boxplot of %s vs. %s"), + chart_write_title (ch, _ ("Boxplot of %s vs. %s"), var_to_string (vars[i]), var_to_string (fctr->indep_var[0]) ); - for ( fs = fctr->fs ; *fs ; ++fs ) + for ( fs = fctr->fs ; *fs ; ++fs ) { - + const char *s = factor_to_string_concise (fctr, *fs); - const double box_width = (ch->data_right - ch->data_left) + const double box_width = (ch->data_right - ch->data_left) / (n_factors * 2.0 ) ; - const double box_centre = ( f++ * 2 + 1) * box_width + const double box_centre = ( f++ * 2 + 1) * box_width + ch->data_left; - + boxplot_draw_boxplot (ch, box_centre, box_width, & (*fs)->m[i], @@ -1816,10 +1844,10 @@ box_plot_group (const struct factor *fctr, chart_write_title (ch, _ ("Boxplot")); boxplot_draw_boxplot (ch, - box_centre, box_width, + box_centre, box_width, &totals[i], var_to_string (vars[i]) ); - + } chart_submit (ch); @@ -1846,13 +1874,13 @@ np_plot (const struct metrics *m, const char *factorname) const double intercept = - m->mean / m->stddev; /* Cowardly refuse to plot an empty data set */ - if ( m->n_data == 0 ) - return ; + if ( m->n_data == 0 ) + return ; np_chart = chart_create (); dnp_chart = chart_create (); - if ( !np_chart || ! dnp_chart ) + if ( !np_chart || ! dnp_chart ) return ; chart_write_title (np_chart, _ ("Normal Q-Q Plot of %s"), factorname); @@ -1860,7 +1888,7 @@ np_plot (const struct metrics *m, const char *factorname) chart_write_ylabel (np_chart, _ ("Expected Normal")); - chart_write_title (dnp_chart, _ ("Detrended Normal Q-Q Plot of %s"), + chart_write_title (dnp_chart, _ ("Detrended Normal Q-Q Plot of %s"), factorname); chart_write_xlabel (dnp_chart, _ ("Observed Value")); chart_write_ylabel (dnp_chart, _ ("Dev from Normal")); @@ -1885,25 +1913,25 @@ np_plot (const struct metrics *m, const char *factorname) chart_write_yscale (np_chart, yfirst, ylast, 5); { - /* We have to cache the detrended data, beacause we need to + /* We have to cache the detrended data, beacause we need to find its limits before we can plot it */ double *d_data = xnmalloc (m->n_data, sizeof *d_data); double d_max = -DBL_MAX; double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) + for ( i = 0 ; i < m->n_data; ++i ) { const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); chart_datum (np_chart, 0, m->wvp[i]->v.f, ns); d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; - + if ( d_data[i] < d_min ) d_min = d_data[i]; if ( d_data[i] > d_max ) d_max = d_data[i]; } chart_write_yscale (dnp_chart, d_min, d_max, 5); - for ( i = 0 ; i < m->n_data; ++i ) + for ( i = 0 ; i < m->n_data; ++i ) chart_datum (dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); free (d_data); @@ -1921,13 +1949,13 @@ np_plot (const struct metrics *m, const char *factorname) /* Show the percentiles */ void -show_percentiles (struct variable **dependent_var, - int n_dep_var, +show_percentiles (struct variable **dependent_var, + int n_dep_var, struct factor *fctr) { struct tab_table *tbl; int i; - + int n_cols, n_rows; int n_factors; @@ -1941,7 +1969,7 @@ show_percentiles (struct variable **dependent_var, if ( fctr ) { - struct factor_statistics **fs = fctr->fs ; + struct factor_statistics **fs = fctr->fs ; n_heading_columns = 3; n_factors = hsh_count (fctr->fstats); @@ -1962,7 +1990,7 @@ show_percentiles (struct variable **dependent_var, n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors; - n_cols = n_heading_columns + n_ptiles ; + n_cols = n_heading_columns + n_ptiles ; tbl = tab_create (n_cols, n_rows, 0); @@ -1971,7 +1999,7 @@ show_percentiles (struct variable **dependent_var, tab_dim (tbl, tab_natural_dimensions); /* Outline the box and have no internal lines*/ - tab_box (tbl, + tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, @@ -1988,14 +2016,14 @@ show_percentiles (struct variable **dependent_var, tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 ); - tab_box (tbl, + tab_box (tbl, -1, -1, -1, TAL_1, 0, n_heading_rows, n_heading_columns - 1, n_rows - 1); - tab_box (tbl, + tab_box (tbl, -1, -1, -1, TAL_1, n_heading_columns, n_heading_rows - 1, @@ -2013,18 +2041,18 @@ show_percentiles (struct variable **dependent_var, struct percentile **p = (struct percentile **) hsh_sort (ptiles); i = 0; - while ( (*p) ) + while ( (*p) ) { - tab_float (tbl, n_heading_columns + i++ , 1, + tab_float (tbl, n_heading_columns + i++ , 1, TAB_CENTER, (*p)->p, 8, 0); - + p++; } } - for ( i = 0 ; i < n_dep_var ; ++i ) + for ( i = 0 ; i < n_dep_var ; ++i ) { const int n_stat_rows = 2; const int row = n_heading_rows + i * n_stat_rows * n_factors ; @@ -2034,44 +2062,42 @@ show_percentiles (struct variable **dependent_var, tab_text (tbl, 0, i * n_stat_rows * n_factors + n_heading_rows, - TAB_LEFT | TAT_TITLE, + TAB_LEFT | TAT_TITLE, var_to_string (dependent_var[i]) ); if ( fctr ) { + const union value *prev = NULL ; struct factor_statistics **fs = fctr->fs; int count = 0; - tab_text (tbl, 1, n_heading_rows - 1, - TAB_CENTER | TAT_TITLE, + tab_text (tbl, 1, n_heading_rows - 1, + TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[0])); if ( fctr->indep_var[1]) - tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, + tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[1])); - while ( *fs ) + while ( *fs ) { - - static union value prev ; - - const int row = n_heading_rows + n_stat_rows * + const int row = n_heading_rows + n_stat_rows * ( ( i * n_factors ) + count ); - if ( 0 != compare_values (&prev, & (*fs)->id[0], + if ( !prev || 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { - - if ( count > 0 ) + + if ( count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - tab_text (tbl, + tab_text (tbl, 1, row, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[0], fctr->indep_var[0]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[0], fctr->indep_var[0]) ); @@ -2079,29 +2105,29 @@ show_percentiles (struct variable **dependent_var, prev = (*fs)->id[0]; - if (fctr->indep_var[1] && count > 0 ) + if (fctr->indep_var[1] && count > 0 ) tab_hline (tbl, TAL_1, 2, n_cols - 1, row); - if ( fctr->indep_var[1]) + if ( fctr->indep_var[1]) tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - value_to_string (& (*fs)->id[1], fctr->indep_var[1]) + TAB_LEFT | TAT_TITLE, + value_to_string ((*fs)->id[1], fctr->indep_var[1]) ); - populate_percentiles (tbl, n_heading_columns - 1, + populate_percentiles (tbl, n_heading_columns - 1, row, & (*fs)->m[i]); - count++ ; + count++ ; fs++; } } - else + else { - populate_percentiles (tbl, n_heading_columns - 1, + populate_percentiles (tbl, n_heading_columns - 1, i * n_stat_rows * n_factors + n_heading_rows, &totals[i]); } @@ -2119,44 +2145,44 @@ show_percentiles (struct variable **dependent_var, void -populate_percentiles (struct tab_table *tbl, int col, int row, +populate_percentiles (struct tab_table *tbl, int col, int row, const struct metrics *m) { int i; struct percentile **p = (struct percentile **) hsh_sort (m->ptile_hash); - - tab_text (tbl, + + tab_text (tbl, col, row + 1, - TAB_LEFT | TAT_TITLE, + TAB_LEFT | TAT_TITLE, _ ("Tukey\'s Hinges") ); - tab_text (tbl, - col, row, - TAB_LEFT | TAT_TITLE, + tab_text (tbl, + col, row, + TAB_LEFT | TAT_TITLE, ptile_alg_desc[m->ptile_alg] ); i = 0; - while ( (*p) ) + while ( (*p) ) { - tab_float (tbl, col + i + 1 , row, + tab_float (tbl, col + i + 1 , row, TAB_CENTER, (*p)->v, 8, 2); - if ( (*p)->p == 25 ) - tab_float (tbl, col + i + 1 , row + 1, + if ( (*p)->p == 25 ) + tab_float (tbl, col + i + 1 , row + 1, TAB_CENTER, m->hinge[0], 8, 2); - if ( (*p)->p == 50 ) - tab_float (tbl, col + i + 1 , row + 1, + if ( (*p)->p == 50 ) + tab_float (tbl, col + i + 1 , row + 1, TAB_CENTER, m->hinge[1], 8, 2); - if ( (*p)->p == 75 ) - tab_float (tbl, col + i + 1 , row + 1, + if ( (*p)->p == 75 ) + tab_float (tbl, col + i + 1 , row + 1, TAB_CENTER, m->hinge[2], 8, 2); @@ -2171,9 +2197,9 @@ populate_percentiles (struct tab_table *tbl, int col, int row, const char * -factor_to_string (const struct factor *fctr, - struct factor_statistics *fs, - const struct variable *var) +factor_to_string (const struct factor *fctr, + const struct factor_statistics *fs, + const struct variable *var) { static char buf1[100]; @@ -2184,24 +2210,24 @@ factor_to_string (const struct factor *fctr, if (var) sprintf (buf1, "%s (",var_to_string (var) ); - + snprintf (buf2, 100, "%s = %s", var_to_string (fctr->indep_var[0]), - value_to_string (&fs->id[0],fctr->indep_var[0])); - + value_to_string (fs->id[0], fctr->indep_var[0])); + strcat (buf1, buf2); - - if ( fctr->indep_var[1] ) + + if ( fctr->indep_var[1] ) { sprintf (buf2, "; %s = %s)", var_to_string (fctr->indep_var[1]), - value_to_string (&fs->id[1], + value_to_string (fs->id[1], fctr->indep_var[1])); strcat (buf1, buf2); } else { - if ( var ) + if ( var ) strcat (buf1, ")"); } @@ -2211,7 +2237,7 @@ factor_to_string (const struct factor *fctr, const char * -factor_to_string_concise (const struct factor *fctr, +factor_to_string_concise (const struct factor *fctr, struct factor_statistics *fs) { @@ -2221,11 +2247,11 @@ factor_to_string_concise (const struct factor *fctr, char buf2[100]; snprintf (buf, 100, "%s", - value_to_string (&fs->id[0], fctr->indep_var[0])); - - if ( fctr->indep_var[1] ) + value_to_string (fs->id[0], fctr->indep_var[0])); + + if ( fctr->indep_var[1] ) { - sprintf (buf2, ",%s)", value_to_string (&fs->id[1], fctr->indep_var[1]) ); + sprintf (buf2, ",%s)", value_to_string (fs->id[1], fctr->indep_var[1]) ); strcat (buf, buf2); } diff --git a/src/math/ChangeLog b/src/math/ChangeLog index 3b01849f..d3db1d0d 100644 --- a/src/math/ChangeLog +++ b/src/math/ChangeLog @@ -1,3 +1,8 @@ +Thu Dec 7 15:27:49 WST 2006 John Darrington + + * factor-stats.c factor-stats.h: Changed independent values to + be pointers rather than copies. + Wed Dec 6 21:14:59 2006 Ben Pfaff * coefficient.c: (pspp_coeff_get_value) Compare variable pointers diff --git a/src/math/factor-stats.c b/src/math/factor-stats.c index fde6abe1..fd4d91d0 100644 --- a/src/math/factor-stats.c +++ b/src/math/factor-stats.c @@ -60,13 +60,13 @@ metrics_precalc(struct metrics *m) If val is null, then treat it as MISSING */ void -metrics_calc(struct metrics *fs, const union value *val, - double weight, int case_no) +metrics_calc (struct metrics *fs, const union value *val, + double weight, int case_no) { struct weighted_value **wv; double x; - - if ( ! val ) + + if ( ! val ) { fs->n_missing += weight; return ; @@ -83,15 +83,15 @@ metrics_calc(struct metrics *fs, const union value *val, wv = (struct weighted_value **) hsh_probe (fs->ordered_data,(void *) val ); - if ( *wv ) + if ( *wv ) { - /* If this value has already been seen, then simply + /* If this value has already been seen, then simply increase its weight and push a new case number */ struct case_node *cn; assert( (*wv)->v.f == val->f ); - (*wv)->w += weight; + (*wv)->w += weight; cn = xmalloc ( sizeof *cn); cn->next = (*wv)->case_nos ; @@ -106,7 +106,7 @@ metrics_calc(struct metrics *fs, const union value *val, *wv = weighted_value_create(); (*wv)->v = *val; (*wv)->w = weight; - + cn = xmalloc (sizeof *cn); cn->next=0; cn->num = case_no; @@ -123,9 +123,9 @@ metrics_postcalc(struct metrics *m) double tc ; int k1, k2 ; int i; - int j = 1; + int j = 1; - moments1_calculate (m->moments, &m->n, &m->mean, &m->var, + moments1_calculate (m->moments, &m->n, &m->mean, &m->var, &m->skewness, &m->kurtosis); moments1_destroy (m->moments); @@ -143,7 +143,7 @@ metrics_postcalc(struct metrics *m) m->n_data = hsh_count(m->ordered_data); /* Trimmed mean calculation */ - if ( m->n_data <= 1 ) + if ( m->n_data <= 1 ) { m->trimmed_mean = m->mean; return; @@ -151,7 +151,7 @@ metrics_postcalc(struct metrics *m) m->histogram = histogram_create(10, m->min, m->max); - for ( i = 0 ; i < m->n_data ; ++i ) + for ( i = 0 ; i < m->n_data ; ++i ) { struct weighted_value **wv = (m->wvp) ; gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w); @@ -161,25 +161,25 @@ metrics_postcalc(struct metrics *m) k1 = -1; k2 = -1; - for ( i = 0 ; i < m->n_data ; ++i ) + for ( i = 0 ; i < m->n_data ; ++i ) { cc += m->wvp[i]->w; m->wvp[i]->cc = cc; m->wvp[i]->rank = j + (m->wvp[i]->w - 1) / 2.0 ; - + j += m->wvp[i]->w; - - if ( cc < tc ) + + if ( cc < tc ) k1 = i; } - + k2 = m->n_data; - for ( i = m->n_data -1 ; i >= 0; --i ) + for ( i = m->n_data -1 ; i >= 0; --i ) { - if ( tc > m->n - m->wvp[i]->cc) + if ( tc > m->n - m->wvp[i]->cc) k2 = i; } @@ -192,14 +192,14 @@ metrics_postcalc(struct metrics *m) m->n_data, m->n, m->hinge); /* Special case here */ - if ( k1 + 1 == k2 ) + if ( k1 + 1 == k2 ) { m->trimmed_mean = m->wvp[k2]->v.f; return; } m->trimmed_mean = 0; - for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) + for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) { m->trimmed_mean += m->wvp[i]->v.f * m->wvp[i]->w; } @@ -225,12 +225,12 @@ weighted_value_create(void) return wv; } -void +void weighted_value_free(struct weighted_value *wv) { struct case_node *cn ; - if ( !wv ) + if ( !wv ) return ; cn = wv->case_nos; @@ -238,7 +238,7 @@ weighted_value_free(struct weighted_value *wv) while(cn) { struct case_node *next = cn->next; - + free(cn); cn = next; } @@ -252,16 +252,18 @@ weighted_value_free(struct weighted_value *wv) /* Create a factor statistics object with for N dependent vars - and ID as the value of the independent variable */ -struct factor_statistics * -create_factor_statistics (int n, union value *id0, union value *id1) + and ID0 and ID1 as the values of the independent variable */ +struct factor_statistics * +create_factor_statistics (int n, + union value *id0, + union value *id1) { struct factor_statistics *f; f = xmalloc (sizeof *f); - f->id[0] = *id0; - f->id[1] = *id1; + f->id[0] = id0; + f->id[1] = id1; f->m = xnmalloc (n, sizeof *f->m); memset (f->m, 0, sizeof(struct metrics) * n); f->n_var = n; @@ -269,13 +271,12 @@ create_factor_statistics (int n, union value *id0, union value *id1) return f; } - -void +void metrics_destroy(struct metrics *m) { hsh_destroy(m->ordered_data); hsh_destroy(m->ptile_hash); - if ( m-> histogram ) + if ( m-> histogram ) gsl_histogram_free(m->histogram); } @@ -283,17 +284,17 @@ void factor_statistics_free(struct factor_statistics *f) { - int i; - for ( i = 0 ; i < f->n_var; ++i ) + int i; + free (f->id[0]); + free (f->id[1]); + for ( i = 0 ; i < f->n_var; ++i ) metrics_destroy(&f->m[i]); - free(f->m) ; + free(f->m) ; free(f); } - - -int +int factor_statistics_compare(const struct factor_statistics *f0, const struct factor_statistics *f1, int width) { @@ -303,33 +304,30 @@ factor_statistics_compare(const struct factor_statistics *f0, assert(f0); assert(f1); - cmp0 = compare_values(&f0->id[0], &f1->id[0], width); + cmp0 = compare_values(f0->id[0], f1->id[0], width); - if ( cmp0 != 0 ) + if ( cmp0 != 0 ) return cmp0; - if ( ( f0->id[1].f == SYSMIS ) && (f1->id[1].f != SYSMIS) ) + if ( ( f0->id[1]->f == SYSMIS ) && (f1->id[1]->f != SYSMIS) ) return 1; - if ( ( f0->id[1].f != SYSMIS ) && (f1->id[1].f == SYSMIS) ) + if ( ( f0->id[1]->f != SYSMIS ) && (f1->id[1]->f == SYSMIS) ) return -1; - return compare_values(&f0->id[1], &f1->id[1], width); - + return compare_values (f0->id[1], f1->id[1], width); } -unsigned int -factor_statistics_hash(const struct factor_statistics *f, int width) +unsigned int +factor_statistics_hash (const struct factor_statistics *f, int width) { - unsigned int h; - h = hash_value(&f->id[0], width); - - if ( f->id[1].f != SYSMIS ) - h += hash_value(&f->id[1], width); + h = hash_value (f->id[0], width); + + if ( f->id[1]->f != SYSMIS ) + h += hash_value(f->id[1], width); return h; } - diff --git a/src/math/factor-stats.h b/src/math/factor-stats.h index 720ea017..98909705 100644 --- a/src/math/factor-stats.h +++ b/src/math/factor-stats.h @@ -1,7 +1,7 @@ /* PSPP - A program for statistical analysis . -*-c-*- Copyright (C) 2004 Free Software Foundation, Inc. -Author: John Darrington 2004 +Author: John Darrington 2004, 2006 This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -22,7 +22,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #define FACTOR_STATS -/* FIXME: These things should probably be amalgamated with the +/* FIXME: These things should probably be amalgamated with the group_statistics struct */ #include @@ -39,13 +39,13 @@ struct metrics double n; double n_missing; - + double min; double max; double mean; - + double se_mean; double var; @@ -90,7 +90,7 @@ struct metrics * metrics_create(void); void metrics_precalc(struct metrics *m); -void metrics_calc(struct metrics *m, const union value *f, double weight, +void metrics_calc(struct metrics *m, const union value *f, double weight, int case_no); void metrics_postcalc(struct metrics *m); @@ -106,7 +106,7 @@ struct case_node struct case_node *next; }; -struct weighted_value +struct weighted_value { union value v; @@ -114,14 +114,14 @@ struct weighted_value double w; /* The cumulative weight */ - double cc; + double cc; /* The rank */ double rank; /* Linked list of cases nos which have this value */ struct case_node *case_nos; - + }; @@ -134,7 +134,7 @@ void weighted_value_free(struct weighted_value *wv); struct factor_statistics { /* The values of the independent variables */ - union value id[2]; + union value *id[2]; /* The an array stats for this factor, one for each dependent var */ struct metrics *m; @@ -146,8 +146,9 @@ struct factor_statistics { /* Create a factor statistics object with for N dependent vars and ID as the value of the independent variable */ -struct factor_statistics * -create_factor_statistics (int n, union value *id0, union value *id1); +struct factor_statistics * create_factor_statistics (int n, + union value *id0, + union value *id1); void factor_statistics_free(struct factor_statistics *f); @@ -155,13 +156,11 @@ void factor_statistics_free(struct factor_statistics *f); /* Compare f0 and f1. width is the width of the independent variable */ -int +int factor_statistics_compare(const struct factor_statistics *f0, const struct factor_statistics *f1, int width); - - -unsigned int +unsigned int factor_statistics_hash(const struct factor_statistics *f, int width); #endif diff --git a/src/math/sort.c b/src/math/sort.c index 827c2314..7d2469a8 100644 --- a/src/math/sort.c +++ b/src/math/sort.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include #include -- 2.30.2