From cd7b08ad5e6bbec75e778acf008f84e1eb548154 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sat, 20 Nov 2004 12:26:20 +0000 Subject: [PATCH] Rewrote most of the examine command. --- doc/ChangeLog | 4 +- doc/statistics.texi | 30 +- src/ChangeLog | 17 + src/casefile.c | 7 + src/casefile.h | 2 + src/examine.q | 1742 ++++++++++++++++++++------------------ src/factor_stats.c | 204 ++++- src/factor_stats.h | 91 +- src/glob.c | 2 +- src/hash.c | 5 + src/value-labels.c | 8 +- src/var-labs.c | 3 + tests/ChangeLog | 2 + tests/Makefile.am | 1 + tests/command/examine.sh | 255 ++++++ 15 files changed, 1472 insertions(+), 901 deletions(-) create mode 100755 tests/command/examine.sh diff --git a/doc/ChangeLog b/doc/ChangeLog index c674017b..7618916e 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,5 @@ + * statistics.texi Added documentation about the EXAMINE cmd + Tue Nov 16 13:18:53 WST 2004 John Darrington * utilities.texi Added documentation for the PERMISSIONS command. @@ -11,7 +13,7 @@ Tue Nov 9 09:38:43 WST 2004 John Darrington Fri Nov 5 17:46:46 WST 2004 John Darrington - * Added a note to the about SPLIT requiring adjecent cases. + * Added a note to the about SPLIT requiring adjacent cases. Sat Oct 30 17:32:53 WST 2004 John Darrington diff --git a/doc/statistics.texi b/doc/statistics.texi index 6014e670..08d66d79 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -10,6 +10,7 @@ far. @menu * DESCRIPTIVES:: Descriptive statistics. * FREQUENCIES:: Frequency tables. +* EXAMINE:: Testing data for normality. * CROSSTABS:: Crosstabulation tables. * T-TEST:: Test hypotheses about means. * ONEWAY:: One way analysis of variance. @@ -105,7 +106,7 @@ in the order that they are specified on the VARIABLES subcommand. The A and D settings request an ascending or descending sort order, respectively. -@node FREQUENCIES, CROSSTABS, DESCRIPTIVES, Statistics +@node FREQUENCIES, EXAMINE, DESCRIPTIVES, Statistics @section FREQUENCIES @vindex FREQUENCIES @@ -212,7 +213,32 @@ boundaries of the data set divided into the specified number of ranges. For instance, @code{/NTILES=4} would cause quartiles to be reported. -@node CROSSTABS, T-TEST, FREQUENCIES, Statistics +@node EXAMINE, CROSSTABS, FREQUENCIES, Statistics +@comment node-name, next, previous, up +@section EXAMINE +@vindex EXAMINE + +@cindex Normality, testing for + +@display +EXAMINE + VARIABLES=var_list [[BY var_list] [BY var_list]] + /STATISTICS=@{DESCRIPTIVES, EXTREME[(n)], ALL, NONE@} + /PLOT=@{STEMLEAF, BOXPLOT, NPPLOT, SPREADLEVEL(n), HISTOGRAM, + ALL, NONE@} + /CINTERVAL n + /COMPARE=@{GROUPS,VARIABLES@} + /ID=@{case_number, var_name@} + /@{TOTAL,NOTOTAL@} + /MISSING=@{LISTWISE, PAIRWISE@} [@{EXCLUDE, INCLUDE@}] + [@{NOREPORT,REPORT@}] +@end display + +The @cmd{EXAMINE} command is used to test how closely a distribution is to a +normal distribution. It also shows you outliers and extreme values. + + +@node CROSSTABS, T-TEST, EXAMINE, Statistics @section CROSSTABS @vindex CROSSTABS diff --git a/src/ChangeLog b/src/ChangeLog index dbf1f89c..07513dc2 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,18 @@ + * var-labs.c (var_to_string) Now returns null if the variable is null + + * value-labels.c (value_to_string) Made it return null if either the + value or the variable is null. + + * hash.c (hsh_clear) Fixed a buglet. + + * examine.q factor_stats.[ch] Largely rewrote, because I'd started + with the wrong model. + + * casefile.[ch] Added a function to return the casereader.case_idx + member + + * examine.q Implemented the extreme values results. + John Darrington * settings.h set.c glob.[ch] frequencies.q q2c.c error.c lexer.[ch] @@ -47,6 +62,7 @@ Tue Nov 16 13:19:18 WST 2004 John Darrington * permissions.c command.def Added the PERMISSIONS command +>>>>>>> 1.110 Mon Nov 15 01:33:32 2004 Ben Pfaff * q2c.c: (dump_header) Don't try to emit #includes at very top of @@ -273,6 +289,7 @@ Mon Nov 15 00:30:33 2004 Ben Pfaff (var_dtor_free) New function. (discard_variables) Use NULL instead of inline_file. +>>>>>>> 1.106 Fri Nov 12 10:07:11 WST 2004 John Darrington * value-labs.c Fixed the implmentation of value_to_string, so diff --git a/src/casefile.c b/src/casefile.c index 6d74e8e2..6033427c 100644 --- a/src/casefile.c +++ b/src/casefile.c @@ -90,6 +90,13 @@ struct casereader struct ccase c; /* Current case. */ }; +/* Return the case number of the current case */ +unsigned long +casereader_cnum(const struct casereader *r) +{ + return r->case_idx; +} + /* Doubly linked list of all casefiles. */ static struct casefile *casefiles; diff --git a/src/casefile.h b/src/casefile.h index 5674de0a..a074cb36 100644 --- a/src/casefile.h +++ b/src/casefile.h @@ -48,4 +48,6 @@ int casereader_read (struct casereader *, struct ccase *); int casereader_read_xfer (struct casereader *, struct ccase *); void casereader_destroy (struct casereader *); +unsigned long casereader_cnum(const struct casereader *); + #endif /* casefile.h */ diff --git a/src/examine.q b/src/examine.q index e9e0ca7e..697176f3 100644 --- a/src/examine.q +++ b/src/examine.q @@ -70,71 +70,107 @@ static struct variable **dependent_vars; static int n_dependent_vars; -static struct hsh_table *hash_table_factors=0; - - - struct factor { - /* The independent variable for this factor */ - struct variable *indep_var; + /* The independent variable */ + struct variable *indep_var[2]; + - /* The factor statistics for each value of the independent variable */ - struct hsh_table *hash_table_val; + /* Hash table of factor stats indexed by 2 values */ + struct hsh_table *fstats; - /* The subfactor (if any) */ - struct factor *subfactor; + /* The hash table after it's been crunched */ + struct factor_statistics **fs; + + struct factor *next; }; +/* Linked list of factors */ +static struct factor *factors=0; +static struct metrics *totals=0; +void +print_factors(void) +{ + struct factor *f = factors; -/* Parse the clause specifying the factors */ -static int examine_parse_independent_vars(struct cmd_examine *cmd, - struct hsh_table *hash_factors ); + while (f) + { + struct factor_statistics **fs = f->fs; + + printf("Factor: %s BY %s\n", + var_to_string(f->indep_var[0]), + var_to_string(f->indep_var[1]) ); + + + printf("Contains %d entries\n", hsh_count(f->fstats)); + + while (*fs) + { + printf("Factor %g; %g\n", (*fs)->id[0].f, (*fs)->id[1].f); + + /* + printf("Factor %s; %s\n", + value_to_string(&(*fs)->id[0], f->indep_var[0]), + value_to_string(&(*fs)->id[1], f->indep_var[1])); + */ + + + printf("Sum is %g; ",(*fs)->m[0].sum); + printf("N is %g; ",(*fs)->m[0].n); + printf("Mean is %g\n",(*fs)->m[0].mean); + + fs++ ; + } + f = f->next; + } + +} -/* Functions to support hashes of factors */ -int compare_factors(const struct factor *f1, const struct factor *f2, - void *aux); -unsigned hash_factor(const struct factor *f, void *aux); +/* Parse the clause specifying the factors */ +static int examine_parse_independent_vars(struct cmd_examine *cmd); -void free_factor(struct factor *f, void *aux UNUSED); /* Output functions */ static void show_summary(struct variable **dependent_var, int n_dep_var, - struct factor *f); + const struct factor *f); + +static void show_extremes(struct variable **dependent_var, + int n_dep_var, + const struct factor *factor, + int n_extremities); static void show_descriptives(struct variable **dependent_var, int n_dep_var, struct factor *factor); -static void show_extremes(struct variable **dependent_var, - int n_dep_var, - struct factor *factor, - int n_extremities); - +void np_plot(const struct metrics *m, const char *factorname); -void np_plot(const struct metrics *m, const char *varname); /* Per Split function */ -static void run_examine(const struct casefile *cf, void *); +static void run_examine(const struct casefile *cf, void *cmd_); static void output_examine(void); -static struct factor_statistics *totals = 0; +void factor_calc(struct ccase *c, int case_no, + double weight, int case_missing); +/* Function to use for testing for missing values */ +static is_missing_func value_is_missing; + int cmd_examine(void) @@ -142,48 +178,45 @@ cmd_examine(void) if ( !parse_examine(&cmd) ) return CMD_FAILURE; - + + /* If /MISSING=INCLUDE is set, then user missing values are ignored */ + if (cmd.incl == XMN_INCLUDE ) + value_is_missing = is_system_missing; + else + value_is_missing = is_missing; + if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; - - totals = xmalloc ( sizeof (struct factor_statistics *) ); - - totals->stats = xmalloc(sizeof ( struct metrics ) * n_dependent_vars); - - multipass_procedure_with_splits (run_examine, NULL); - - - hsh_destroy(hash_table_factors); - - free(totals->stats); - free(totals); + multipass_procedure_with_splits (run_examine, &cmd); return CMD_SUCCESS; }; + /* Show all the appropriate tables */ static void output_examine(void) { + struct factor *fctr; /* Show totals if appropriate */ - if ( ! cmd.sbc_nototal || - ! hash_table_factors || 0 == hsh_count (hash_table_factors)) + if ( ! cmd.sbc_nototal ) { - show_summary(dependent_vars, n_dependent_vars,0); + show_summary(dependent_vars, n_dependent_vars, 0); if ( cmd.sbc_statistics ) { - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives(dependent_vars, n_dependent_vars, 0); - if ( cmd.a_statistics[XMN_ST_EXTREME]) show_extremes(dependent_vars, n_dependent_vars, 0, cmd.st_n); + + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) + show_descriptives(dependent_vars, n_dependent_vars, 0); + } if ( cmd.sbc_plot) @@ -193,62 +226,75 @@ output_examine(void) int v; for ( v = 0 ; v < n_dependent_vars; ++v ) - { - np_plot(&totals->stats[v], var_to_string(dependent_vars[v])); - } - + np_plot(&totals[v], var_to_string(dependent_vars[v])); } } + } - /* Show grouped statistics if appropriate */ - if ( hash_table_factors && 0 != hsh_count (hash_table_factors)) + /* Show grouped statistics as appropriate */ + fctr = factors; + while ( fctr ) { - struct hsh_iterator hi; - struct factor *f; + show_summary(dependent_vars, n_dependent_vars, fctr); - for(f = hsh_first(hash_table_factors,&hi); - f != 0; - f = hsh_next(hash_table_factors,&hi)) + if ( cmd.sbc_statistics ) { - show_summary(dependent_vars, n_dependent_vars,f); - - if ( cmd.sbc_statistics ) - { - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives(dependent_vars, n_dependent_vars, f); - - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes(dependent_vars, n_dependent_vars, f, cmd.st_n); - } + if ( cmd.a_statistics[XMN_ST_EXTREME]) + show_extremes(dependent_vars, n_dependent_vars, fctr, cmd.st_n); + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) + show_descriptives(dependent_vars, n_dependent_vars, fctr); + } - if ( cmd.sbc_plot) + if ( cmd.sbc_plot) + { + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) { - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + int v; + for ( v = 0 ; v < n_dependent_vars; ++ v) { - struct hsh_iterator h2; - struct factor_statistics *foo ; - for (foo = hsh_first(f->hash_table_val,&h2); - foo != 0 ; - foo = hsh_next(f->hash_table_val,&h2)) + + struct factor_statistics **fs = fctr->fs ; + for ( fs = fctr->fs ; *fs ; ++fs ) { - int v; - for ( v = 0 ; v < n_dependent_vars; ++ v) + char buf1[100]; + char buf2[100]; + sprintf(buf1, "%s (", + var_to_string(dependent_vars[v])); + + sprintf(buf2, "%s = %s", + var_to_string(fctr->indep_var[0]), + value_to_string(&(*fs)->id[0],fctr->indep_var[0])); + + strcat(buf1, buf2); + + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, "; %s = %s)", + var_to_string(fctr->indep_var[1]), + value_to_string(&(*fs)->id[1], + fctr->indep_var[1])); + strcat(buf1, buf2); + } + else { - char buf[100]; - sprintf(buf, "%s (%s = %s)", - var_to_string(dependent_vars[v]), - var_to_string(f->indep_var), - value_to_string(foo->id,f->indep_var)); - np_plot(&foo->stats[v], buf); + strcat(buf1, ")"); } + + np_plot(&(*fs)->m[v],buf1); + } + } + } } + + fctr = fctr->next; } } @@ -281,54 +327,6 @@ xmn_custom_nototal(struct cmd_examine *p) } -/* Compare two factors */ -int -compare_factors (const struct factor *f1, - const struct factor *f2, - void *aux) -{ - int indep_var_cmp = strcmp(f1->indep_var->name, f2->indep_var->name); - - if ( 0 != indep_var_cmp ) - return indep_var_cmp; - - /* If the names are identical, and there are no subfactors then - the factors are identical */ - if ( ! f1->subfactor && ! f2->subfactor ) - return 0; - - /* ... otherwise we must compare the subfactors */ - - return compare_factors(f1->subfactor, f2->subfactor, aux); - -} - -/* Create a hash of a factor */ -unsigned -hash_factor( const struct factor *f, void *aux) -{ - unsigned h; - h = hsh_hash_string(f->indep_var->name); - - if ( f->subfactor ) - h += hash_factor(f->subfactor, aux); - - return h; -} - - -/* Free up a factor */ -void -free_factor(struct factor *f, void *aux) -{ - hsh_destroy(f->hash_table_val); - - if ( f->subfactor ) - free_factor(f->subfactor, aux); - - free(f); -} - /* Parser for the variables sub command */ static int @@ -350,981 +348,1075 @@ xmn_custom_variables(struct cmd_examine *cmd ) assert(n_dependent_vars); + totals = xmalloc( sizeof(struct metrics) * n_dependent_vars); + if ( lex_match(T_BY)) { - hash_table_factors = hsh_create(4, - (hsh_compare_func *) compare_factors, - (hsh_hash_func *) hash_factor, - (hsh_free_func *) free_factor, 0); - - return examine_parse_independent_vars(cmd, hash_table_factors); + return examine_parse_independent_vars(cmd); } - - return 1; } + /* Parse the clause specifying the factors */ static int -examine_parse_independent_vars(struct cmd_examine *cmd, - struct hsh_table *hash_table_factors ) +examine_parse_independent_vars(struct cmd_examine *cmd) { - struct factor *f = 0; + + struct factor *sf = xmalloc(sizeof(struct factor)); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) return 2; - if ( !f ) - { - f = xmalloc(sizeof(struct factor)); - f->indep_var = 0; - f->hash_table_val = 0; - f->subfactor = 0; - } - - f->indep_var = parse_variable(); - - if ( ! f->hash_table_val ) - f->hash_table_val = hsh_create(4,(hsh_compare_func *) compare_indep_values, - (hsh_hash_func *) hash_indep_value, - (hsh_free_func *) free_factor_stats, - (void *) f->indep_var->width); + + sf->indep_var[0] = parse_variable(); + sf->indep_var[1] = 0; if ( token == T_BY ) { + lex_match(T_BY); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) return 2; - f->subfactor = xmalloc(sizeof(struct factor)); + sf->indep_var[1] = parse_variable(); - f->subfactor->indep_var = parse_variable(); - - f->subfactor->subfactor = 0; - - f->subfactor->hash_table_val = - hsh_create(4, - (hsh_compare_func *) compare_indep_values, - (hsh_hash_func *) hash_indep_value, - (hsh_free_func *) free_factor_stats, - (void *) f->subfactor->indep_var->width); } - hsh_insert(hash_table_factors, f); + + sf->fstats = hsh_create(4, + (hsh_compare_func *) factor_statistics_compare, + (hsh_hash_func *) factor_statistics_hash, + (hsh_free_func *) factor_statistics_free, + 0); + + sf->next = factors; + factors = sf; lex_match(','); if ( token == '.' || token == '/' ) return 1; - return examine_parse_independent_vars(cmd, hash_table_factors); + return examine_parse_independent_vars(cmd); } + + void populate_descriptives(struct tab_table *t, int col, int row, const struct metrics *fs); +void populate_extremes(struct tab_table *t, int col, int row, int n, + const struct metrics *m); -void populate_extremities(struct tab_table *t, int col, int row, int n); +void populate_summary(struct tab_table *t, int col, int row, + const struct metrics *m); -/* Show the descriptives table */ + + +static int bad_weight_warn = 1; + + +/* Perform calculations for the sub factors */ void -show_descriptives(struct variable **dependent_var, - int n_dep_var, - struct factor *factor) +factor_calc(struct ccase *c, int case_no, double weight, int case_missing) { - int i; - int heading_columns ; - int n_cols; - const int n_stat_rows = 13; + int v; + struct factor *fctr = factors; - const int heading_rows = 1; - int n_rows = heading_rows ; + while ( fctr) + { + union value indep_vals[2] ; - struct tab_table *t; + indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv); + if ( fctr->indep_var[1] ) + indep_vals[1] = * case_data(c, fctr->indep_var[1]->fv); + else + indep_vals[1].f = SYSMIS; - if ( !factor ) - { - heading_columns = 1; - n_rows += n_dep_var * n_stat_rows; - } - else - { - assert(factor->indep_var); - if ( factor->subfactor == 0 ) + assert(fctr->fstats); + + struct factor_statistics **foo = ( struct factor_statistics ** ) + hsh_probe(fctr->fstats, (void *) &indep_vals); + + if ( !*foo ) { - heading_columns = 2; - n_rows += n_dep_var * hsh_count(factor->hash_table_val) * n_stat_rows; + + *foo = create_factor_statistics(n_dependent_vars, + &indep_vals[0], + &indep_vals[1]); + + for ( v = 0 ; v < n_dependent_vars ; ++v ) + { + metrics_precalc( &(*foo)->m[v] ); + } + } - else + + for ( v = 0 ; v < n_dependent_vars ; ++v ) { - heading_columns = 3; - n_rows += n_dep_var * hsh_count(factor->hash_table_val) * - hsh_count(factor->subfactor->hash_table_val) * n_stat_rows ; + const struct variable *var = dependent_vars[v]; + const union value *val = case_data (c, var->fv); + + if ( value_is_missing(val,var) || case_missing ) + val = 0; + + metrics_calc( &(*foo)->m[v], val, weight, case_no ); } + + fctr = fctr->next; } - n_cols = heading_columns + 4; - t = tab_create (n_cols, n_rows, 0); +} - tab_headers (t, heading_columns + 1, 0, heading_rows, 0); - tab_dim (t, tab_natural_dimensions); - /* Outline the box and have no internal lines*/ - tab_box (t, - TAL_2, TAL_2, - -1, -1, - 0, 0, - n_cols - 1, n_rows - 1); - tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows ); +static void +run_examine(const struct casefile *cf, void *cmd_ ) +{ + struct casereader *r; + struct ccase c; + int v; - tab_vline (t, TAL_1, heading_columns, 0, n_rows - 1); - tab_vline (t, TAL_2, n_cols - 2, 0, n_rows - 1); - tab_vline (t, TAL_1, n_cols - 1, 0, n_rows - 1); + const struct cmd_examine *cmd = (struct cmd_examine *) cmd_; - tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic")); - tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error")); + /* Make sure we haven't got rubbish left over from a + previous split */ + struct factor *fctr = factors; + while (fctr) + { + struct factor *next = fctr->next; - for ( i = 0 ; i < n_dep_var ; ++i ) + hsh_clear(fctr->fstats); + + fctr->fs = 0; + + fctr = next; + } + + + + for ( v = 0 ; v < n_dependent_vars ; ++v ) + metrics_precalc(&totals[v]); + + for(r = casefile_get_reader (cf); + casereader_read (r, &c) ; + case_destroy (&c) ) { - int row; - int n_subfactors = 1; - int n_factors = 1; - - if ( factor ) + int case_missing=0; + const int case_no = casereader_cnum(r); + + const double weight = + dict_get_case_weight(default_dict, &c, &bad_weight_warn); + + if ( cmd->miss == XMN_LISTWISE ) { - n_factors = hsh_count(factor->hash_table_val); - if ( factor->subfactor ) - n_subfactors = hsh_count(factor->subfactor->hash_table_val); + for ( v = 0 ; v < n_dependent_vars ; ++v ) + { + const struct variable *var = dependent_vars[v]; + const union value *val = case_data (&c, var->fv); + + if ( value_is_missing(val,var)) + case_missing = 1; + + } } + for ( v = 0 ; v < n_dependent_vars ; ++v ) + { + const struct variable *var = dependent_vars[v]; + const union value *val = case_data (&c, var->fv); - row = heading_rows + i * n_stat_rows * n_factors * n_subfactors; + if ( value_is_missing(val,var) || case_missing ) + val = 0; + + metrics_calc(&totals[v], val, weight, case_no ); + + } + + factor_calc(&c, case_no, weight, case_missing); + + } - if ( i > 0 ) - tab_hline(t, TAL_1, 0, n_cols - 1, row ); - if ( factor ) + for ( v = 0 ; v < n_dependent_vars ; ++v) + { + fctr = factors; + while ( fctr ) { struct hsh_iterator hi; - const struct factor_statistics *fs; - int count = 0; + struct factor_statistics *fs; - tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(factor->indep_var)); + for ( fs = hsh_first(fctr->fstats, &hi); + fs != 0 ; + fs = hsh_next(fctr->fstats, &hi)) + { + metrics_postcalc(&fs->m[v]); + } + fctr = fctr->next; + } + metrics_postcalc(&totals[v]); + } - for (fs = hsh_first(factor->hash_table_val, &hi); - fs != 0; - fs = hsh_next(factor->hash_table_val, &hi)) - { - tab_text (t, 1, - row + count * n_subfactors * n_stat_rows, - TAB_RIGHT | TAT_TITLE, - value_to_string(fs->id, factor->indep_var) - ); + /* Make sure that the combination of factors are complete */ - if ( count > 0 ) - tab_hline (t, TAL_1, 1, n_cols - 1, - row + count * n_subfactors * n_stat_rows); + fctr = factors; + while ( fctr ) + { + struct hsh_iterator hi; + struct hsh_iterator hi0; + struct hsh_iterator hi1; + struct factor_statistics *fs; + + struct hsh_table *idh0=0; + struct hsh_table *idh1=0; + union value *val0; + union value *val1; + + idh0 = hsh_create(4, (hsh_compare_func *) compare_values, + (hsh_hash_func *) hash_value, + 0,0); - if ( factor->subfactor ) - { - int count2=0; - struct hsh_iterator h2; - const struct factor_statistics *sub_fs; - - tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(factor->subfactor->indep_var)); - - for ( sub_fs = hsh_first(factor->subfactor->hash_table_val, - &h2); - sub_fs != 0; - sub_fs = hsh_next(factor->subfactor->hash_table_val, - &h2)) - { - - tab_text(t, 2, - row - + count * n_subfactors * n_stat_rows - + count2 * n_stat_rows, - TAB_RIGHT | TAT_TITLE , - value_to_string(sub_fs->id, factor->subfactor->indep_var) - ); - - if ( count2 > 0 ) - tab_hline (t, TAL_1, 2, n_cols - 1, - row - + count * n_subfactors * n_stat_rows - + count2 * n_stat_rows); - - populate_descriptives(t, heading_columns, - row - + count * n_subfactors - * n_stat_rows - + count2 * n_stat_rows, - &sub_fs->stats[i]); - - - count2++; - } - } - else - { - - populate_descriptives(t, heading_columns, - row - + count * n_subfactors * n_stat_rows, - &fs->stats[i]); - } + idh1 = hsh_create(4, (hsh_compare_func *) compare_values, + (hsh_hash_func *) hash_value, + 0,0); - count ++; - } + + for ( fs = hsh_first(fctr->fstats, &hi); + fs != 0 ; + fs = hsh_next(fctr->fstats, &hi)) + { + hsh_insert(idh0,(void *) &fs->id[0]); + hsh_insert(idh1,(void *) &fs->id[1]); } - else + + /* Ensure that the factors combination is complete */ + for ( val0 = hsh_first(idh0, &hi0); + val0 != 0 ; + val0 = hsh_next(idh0, &hi0)) { - populate_descriptives(t, heading_columns, - row, &totals->stats[i]); + for ( val1 = hsh_first(idh1, &hi1); + val1 != 0 ; + val1 = hsh_next(idh1, &hi1)) + { + struct factor_statistics **ffs; + union value key[2]; + key[0] = *val0; + key[1] = *val1; + + ffs = (struct factor_statistics **) + hsh_probe(fctr->fstats, (void *) &key ); + + if ( !*ffs ) { + int i; + (*ffs) = create_factor_statistics (n_dependent_vars, + &key[0], &key[1]); + for ( i = 0 ; i < n_dependent_vars ; ++i ) + metrics_precalc( &(*ffs)->m[i]); + } + } } - tab_text (t, - 0, row, - TAB_LEFT | TAT_TITLE, - var_to_string(dependent_var[i]) - ); + hsh_destroy(idh0); + hsh_destroy(idh1); + + fctr->fs = (struct factor_statistics **) hsh_sort_copy(fctr->fstats); + fctr = fctr->next; } - tab_title (t, 0, _("Descriptives")); + /* + print_factors(); + */ - tab_submit(t); + output_examine(); } - -/* Fill in the descriptives data */ -void -populate_descriptives(struct tab_table *tbl, int col, int row, - const struct metrics *m) +static void +show_summary(struct variable **dependent_var, int n_dep_var, + const struct factor *fctr) { + static const char *subtitle[]= + { + N_("Valid"), + N_("Missing"), + N_("Total") + }; - const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \ - m->n -1); + int i; + int heading_columns ; + int n_cols; + const int heading_rows = 3; + struct tab_table *tbl; + int n_rows ; + int n_factors = 1; - tab_text (tbl, col, - row, - TAB_LEFT | TAT_TITLE, - _("Mean")); + if ( fctr ) + { + heading_columns = 2; + n_factors = hsh_count(fctr->fstats); + n_rows = n_dep_var * n_factors ; - tab_float (tbl, col + 2, - row, - TAB_CENTER, - m->mean, - 8,2); - - tab_float (tbl, col + 3, - row, - TAB_CENTER, - m->stderr, - 8,3); + if ( fctr->indep_var[1] ) + heading_columns = 3; + } + else + { + heading_columns = 1; + n_rows = n_dep_var; + } + + n_rows += heading_rows; + + n_cols = heading_columns + 6; + + tbl = tab_create (n_cols,n_rows,0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); + + tab_dim (tbl, tab_natural_dimensions); + /* Outline the box */ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); - tab_text (tbl, col, - row + 1, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]); + /* Vertical lines for the data only */ + tab_box (tbl, + -1, -1, + -1, TAL_1, + heading_columns, 0, + n_cols - 1, n_rows - 1); - tab_text (tbl, col + 1, - row + 1, - TAB_LEFT | TAT_TITLE, - _("Lower Bound")); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 ); + tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, heading_rows -1 ); - tab_float (tbl, col + 2, - row + 1, - TAB_CENTER, - m->mean - t * m->stderr, - 8,3); + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - tab_text (tbl, col + 1, - row + 2, - TAB_LEFT | TAT_TITLE, - _("Upper Bound")); + tab_title (tbl, 0, _("Case Processing Summary")); + - tab_float (tbl, col + 2, - row + 2, - TAB_CENTER, - m->mean + t * m->stderr, - 8,3); + tab_joint_text(tbl, heading_columns, 0, + n_cols -1, 0, + TAB_CENTER | TAT_TITLE, + _("Cases")); - tab_text (tbl, col, - row + 3, - TAB_LEFT | TAT_TITLE, - _("5% Trimmed Mean")); + /* Remove lines ... */ + tab_box (tbl, + -1, -1, + TAL_0, TAL_0, + heading_columns, 0, + n_cols - 1, 0); - tab_float (tbl, col + 2, - row + 3, - TAB_CENTER, - m->trimmed_mean, - 8,2); + for ( i = 0 ; i < 3 ; ++i ) + { + tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, + _("N")); - tab_text (tbl, col, - row + 4, - TAB_LEFT | TAT_TITLE, - _("Median")); + tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE, + _("Percent")); - tab_text (tbl, col, - row + 5, - TAB_LEFT | TAT_TITLE, - _("Variance")); + tab_joint_text(tbl, heading_columns + i*2 , 1, + heading_columns + i*2 + 1, 1, + TAB_CENTER | TAT_TITLE, + subtitle[i]); - tab_float (tbl, col + 2, - row + 5, - TAB_CENTER, - m->var, - 8,3); + tab_box (tbl, -1, -1, + TAL_0, TAL_0, + heading_columns + i*2, 1, + heading_columns + i*2 + 1, 1); + } - tab_text (tbl, col, - row + 6, - TAB_LEFT | TAT_TITLE, - _("Std. Deviation")); + /* Titles for the independent variables */ + if ( fctr ) + { + tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[0])); - tab_float (tbl, col + 2, - row + 6, - TAB_CENTER, - m->stddev, - 8,3); + if ( fctr->indep_var[1] ) + { + tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[1])); + } + + } - - tab_text (tbl, col, - row + 7, - TAB_LEFT | TAT_TITLE, - _("Minimum")); - tab_float (tbl, col + 2, - row + 7, - TAB_CENTER, - m->min, - 8,3); + for ( i = 0 ; i < n_dep_var ; ++i ) + { + int n_factors = 1; + if ( fctr ) + n_factors = hsh_count(fctr->fstats); + - tab_text (tbl, col, - row + 8, - TAB_LEFT | TAT_TITLE, - _("Maximum")); + if ( i > 0 ) + tab_hline(tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows); + + tab_text (tbl, + 0, i * n_factors + heading_rows, + TAB_LEFT | TAT_TITLE, + var_to_string(dependent_var[i]) + ); - tab_float (tbl, col + 2, - row + 8, - TAB_CENTER, - m->max, - 8,3); + if ( !fctr ) + populate_summary(tbl, heading_columns, + (i * n_factors) + heading_rows, + &totals[i]); - tab_text (tbl, col, - row + 9, - TAB_LEFT | TAT_TITLE, - _("Range")); + else + { + struct factor_statistics **fs = fctr->fs; + int count = 0 ; - tab_float (tbl, col + 2, - row + 9, - TAB_CENTER, - m->max - m->min, - 8,3); + while (*fs) + { + static union value prev; + + if ( 0 != compare_values(&prev, &(*fs)->id[0], + fctr->indep_var[0]->width)) + { + tab_text (tbl, + 1, + (i * n_factors ) + count + + heading_rows, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 1, n_cols - 1, + (i * n_factors ) + count + heading_rows); - tab_text (tbl, col, - row + 10, - TAB_LEFT | TAT_TITLE, - _("Interquartile Range")); + } + + prev = (*fs)->id[0]; - tab_text (tbl, col, - row + 11, - TAB_LEFT | TAT_TITLE, - _("Skewness")); - tab_text (tbl, col, - row + 12, - TAB_LEFT | TAT_TITLE, - _("Kurtosis")); + if ( fctr->indep_var[1]) + tab_text (tbl, + 2, + (i * n_factors ) + count + + heading_rows, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[1], fctr->indep_var[1]) + ); + + populate_summary(tbl, heading_columns, + (i * n_factors) + count + + heading_rows, + &(*fs)->m[i]); + + count++ ; + fs++; + } + } + } + + tab_submit (tbl); } -void -show_summary(struct variable **dependent_var, - int n_dep_var, - struct factor *factor) +void +populate_summary(struct tab_table *t, int col, int row, + const struct metrics *m) + { - static const char *subtitle[]= - { - N_("Valid"), - N_("Missing"), - N_("Total") - }; + const double total = m->n + m->n_missing ; + + tab_float(t, col + 0, row + 0, TAB_RIGHT, m->n, 8, 0); + tab_float(t, col + 2, row + 0, TAB_RIGHT, m->n_missing, 8, 0); + tab_float(t, col + 4, row + 0, TAB_RIGHT, total, 8, 0); + + + if ( total > 0 ) { + tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", + 100.0 * m->n / total ); + + tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", + 100.0 * m->n_missing / total ); + + /* This seems a bit pointless !!! */ + tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", + 100.0 * total / total ); + + + } + + +} + + +static void +show_extremes(struct variable **dependent_var, int n_dep_var, + const struct factor *fctr, int n_extremities) +{ int i; int heading_columns ; int n_cols; - const int heading_rows = 3; + const int heading_rows = 1; struct tab_table *tbl; - int n_rows = heading_rows; + int n_factors = 1; + int n_rows ; - if ( !factor ) + if ( fctr ) { - heading_columns = 1; - n_rows += n_dep_var; + heading_columns = 2; + n_factors = hsh_count(fctr->fstats); + + n_rows = n_dep_var * 2 * n_extremities * n_factors; + + if ( fctr->indep_var[1] ) + heading_columns = 3; } else { - assert(factor->indep_var); - if ( factor->subfactor == 0 ) - { - heading_columns = 2; - n_rows += n_dep_var * hsh_count(factor->hash_table_val); - } - else - { - heading_columns = 3; - n_rows += n_dep_var * hsh_count(factor->hash_table_val) * - hsh_count(factor->subfactor->hash_table_val) ; - } + heading_columns = 1; + n_rows = n_dep_var * 2 * n_extremities; } + n_rows += heading_rows; - n_cols = heading_columns + 6; + heading_columns += 2; + n_cols = heading_columns + 2; tbl = tab_create (n_cols,n_rows,0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - /* Outline the box and have vertical internal lines*/ + /* Outline the box, No internal lines*/ tab_box (tbl, TAL_2, TAL_2, - -1, TAL_1, + -1, -1, 0, 0, n_cols - 1, n_rows - 1); tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - tab_hline (tbl, TAL_1, heading_columns, n_cols - 1, 1 ); - tab_hline (tbl, TAL_1, 0, n_cols - 1, heading_rows -1 ); - - tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); + tab_title (tbl, 0, _("Extreme Values")); - tab_title (tbl, 0, _("Case Processing Summary")); - - tab_joint_text(tbl, heading_columns, 0, - n_cols -1, 0, - TAB_CENTER | TAT_TITLE, - _("Cases")); + tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); + tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); - /* Remove lines ... */ - tab_box (tbl, - -1, -1, - TAL_0, TAL_0, - heading_columns, 0, - n_cols - 1, 0); - - if ( factor ) + if ( fctr ) { tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(factor->indep_var)); + var_to_string(fctr->indep_var[0])); - if ( factor->subfactor ) + if ( fctr->indep_var[1] ) tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(factor->subfactor->indep_var)); + var_to_string(fctr->indep_var[1])); } - for ( i = 0 ; i < 3 ; ++i ) - { - tab_text (tbl, heading_columns + i*2 , 2, TAB_CENTER | TAT_TITLE, _("N")); - tab_text (tbl, heading_columns + i*2 + 1, 2, TAB_CENTER | TAT_TITLE, - _("Percent")); - - tab_joint_text(tbl, heading_columns + i*2 , 1, - heading_columns + i*2 + 1, 1, - TAB_CENTER | TAT_TITLE, - subtitle[i]); + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); - tab_box (tbl, -1, -1, - TAL_0, TAL_0, - heading_columns + i*2, 1, - heading_columns + i*2 + 1, 1); - } for ( i = 0 ; i < n_dep_var ; ++i ) { - int n_subfactors = 1; - int n_factors = 1; - - if ( factor ) - { - n_factors = hsh_count(factor->hash_table_val); - if ( factor->subfactor ) - n_subfactors = hsh_count(factor->subfactor->hash_table_val); - } - tab_text (tbl, - 0, i * n_factors * n_subfactors + heading_rows, + if ( i > 0 ) + tab_hline(tbl, TAL_1, 0, n_cols -1 , + i * 2 * n_extremities * n_factors + heading_rows); + + tab_text (tbl, 0, + i * 2 * n_extremities * n_factors + heading_rows, TAB_LEFT | TAT_TITLE, var_to_string(dependent_var[i]) ); - if ( factor ) + + if ( !fctr ) + populate_extremes(tbl, heading_columns - 2, + i * 2 * n_extremities * n_factors + heading_rows, + n_extremities, &totals[i]); + + else { - struct hsh_iterator hi; - const struct factor_statistics *fs; - int count = 0; + struct factor_statistics **fs = fctr->fs; + int count = 0 ; - for (fs = hsh_first(factor->hash_table_val, &hi); - fs != 0; - fs = hsh_next(factor->hash_table_val, &hi)) + while (*fs) { - tab_text (tbl, 1, - i * n_factors * n_subfactors + heading_rows - + count * n_subfactors, - TAB_RIGHT | TAT_TITLE, - value_to_string(fs->id, factor->indep_var) - ); - - if ( factor->subfactor ) - { - int count2=0; - struct hsh_iterator h2; - const struct factor_statistics *sub_fs; - - for ( sub_fs = hsh_first(factor->subfactor->hash_table_val, - &h2); - sub_fs != 0; - sub_fs = hsh_next(factor->subfactor->hash_table_val, - &h2)) - { - - tab_text(tbl, 2, - i * n_factors * n_subfactors + heading_rows - + count * n_subfactors + count2, - TAB_RIGHT | TAT_TITLE , - value_to_string(sub_fs->id, factor->subfactor->indep_var) - ); - - count2++; - } - } - count ++; - } - } - } + static union value prev ; + const int row = heading_rows + ( 2 * n_extremities ) * + ( ( i * n_factors ) + count ); - tab_submit (tbl); - -} -static int bad_weight_warn = 1; + if ( 0 != compare_values(&prev, &(*fs)->id[0], + fctr->indep_var[0]->width)) + { + + if ( count > 0 ) + tab_hline (tbl, TAL_1, 1, n_cols - 1, row); + + tab_text (tbl, + 1, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + } + prev = (*fs)->id[0]; -static void -run_examine(const struct casefile *cf, void *aux UNUSED) -{ - struct hsh_iterator hi; - struct factor *fctr; + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 2, n_cols - 1, row); - struct casereader *r; - struct ccase c; - int v; + if ( fctr->indep_var[1]) + tab_text (tbl, 2, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[1], fctr->indep_var[1]) + ); - /* Make sure we haven't got rubbish left over from a - previous split */ - if ( hash_table_factors ) - { - for ( fctr = hsh_first(hash_table_factors, &hi); - fctr != 0; - fctr = hsh_next (hash_table_factors, &hi) ) - { - hsh_clear(fctr->hash_table_val); + populate_extremes(tbl, heading_columns - 2, + row, n_extremities, + &(*fs)->m[i]); - while ( (fctr = fctr->subfactor) ) - hsh_clear(fctr->hash_table_val); + count++ ; + fs++; + } } } - for ( v = 0 ; v < n_dependent_vars ; ++v ) - metrics_precalc(&totals->stats[v]); + tab_submit(tbl); +} - for(r = casefile_get_reader (cf); - casereader_read (r, &c) ; - case_destroy (&c) ) - { - const double weight = - dict_get_case_weight(default_dict, &c, &bad_weight_warn); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); - metrics_calc(&totals->stats[v], val, weight); - } +/* Fill in the extremities table */ +void +populate_extremes(struct tab_table *t, + int col, int row, int n, const struct metrics *m) +{ + int extremity; + int idx=0; - if ( hash_table_factors ) - { - for ( fctr = hsh_first(hash_table_factors, &hi); - fctr != 0; - fctr = hsh_next (hash_table_factors, &hi) ) - { - const union value *indep_val = - case_data(&c, fctr->indep_var->fv); + const int n_data = hsh_count(m->ordered_data); - struct factor_statistics **foo = ( struct factor_statistics ** ) - hsh_probe(fctr->hash_table_val, (void *) &indep_val); + tab_text(t, col, row, + TAB_RIGHT | TAT_TITLE , + _("Highest") + ); - if ( !*foo ) - { - *foo = xmalloc ( sizeof ( struct factor_statistics)); - (*foo)->id = indep_val; - (*foo)->stats = xmalloc ( sizeof ( struct metrics ) - * n_dependent_vars); + tab_text(t, col, row + n , + TAB_RIGHT | TAT_TITLE , + _("Lowest") + ); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - metrics_precalc( &(*foo)->stats[v] ); - hsh_insert(fctr->hash_table_val, (void *) *foo); - } + tab_hline(t, TAL_1, col, col + 3, row + n ); + + for (extremity = 0; extremity < n ; ++extremity ) + { + /* Highest */ + tab_float(t, col + 1, row + extremity, + TAB_RIGHT, + extremity + 1, 8, 0); + - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); + /* Lowest */ + tab_float(t, col + 1, row + extremity + n, + TAB_RIGHT, + extremity + 1, 8, 0); - metrics_calc( &(*foo)->stats[v], val, weight ); - } + } - if ( fctr->subfactor ) - { - struct factor *sfctr = fctr->subfactor; - const union value *ii_val = - case_data (&c, sfctr->indep_var->fv); + /* Lowest */ + for (idx = 0, extremity = 0; extremity < n && idx < n_data ; ++idx ) + { + int j; + const struct weighted_value *wv = &m->wv[idx]; + struct case_node *cn = wv->case_nos; - struct factor_statistics **bar = - (struct factor_statistics **) - hsh_probe(sfctr->hash_table_val, (void *) &ii_val); + + for (j = 0 ; j < wv->w ; ++j ) + { + if ( extremity + j >= n ) + break ; - if ( !*bar ) - { - *bar = xmalloc ( sizeof ( struct factor_statistics)); - (*bar)->id = ii_val; - (*bar)->stats = xmalloc ( sizeof ( struct metrics ) - * n_dependent_vars); - - for ( v = 0 ; v < n_dependent_vars ; ++v ) - metrics_precalc( &(*bar)->stats[v] ); + tab_float(t, col + 3, row + extremity + j + n, + TAB_RIGHT, + wv->v.f, 8, 2); - hsh_insert(sfctr->hash_table_val, - (void *) *bar); - } + tab_float(t, col + 2, row + extremity + j + n, + TAB_RIGHT, + cn->num, 8, 0); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - const union value *val = case_data (&c, var->fv); + if ( cn->next ) + cn = cn->next; - metrics_calc( &(*bar)->stats[v], val, weight ); - } - } - } } + extremity += wv->w ; } - for ( v = 0 ; v < n_dependent_vars ; ++v) + + /* Highest */ + for (idx = n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) { - if ( hash_table_factors ) + int j; + const struct weighted_value *wv = &m->wv[idx]; + struct case_node *cn = wv->case_nos; + + for (j = 0 ; j < wv->w ; ++j ) { - for ( fctr = hsh_first(hash_table_factors, &hi); - fctr != 0; - fctr = hsh_next (hash_table_factors, &hi) ) - { - struct hsh_iterator h2; - struct factor_statistics *fs; - - for ( fs = hsh_first(fctr->hash_table_val,&h2); - fs != 0; - fs = hsh_next(fctr->hash_table_val,&h2)) - { - metrics_postcalc( &fs->stats[v] ); - } + if ( extremity + j >= n ) + break ; - if ( fctr->subfactor) - { - struct hsh_iterator hsf; - struct factor_statistics *fss; - - for ( fss = hsh_first(fctr->subfactor->hash_table_val,&hsf); - fss != 0; - fss = hsh_next(fctr->subfactor->hash_table_val,&hsf)) - { - metrics_postcalc( &fss->stats[v] ); - } - } - } - } + tab_float(t, col + 3, row + extremity + j, + TAB_RIGHT, + wv->v.f, 8, 2); - metrics_postcalc(&totals->stats[v]); - } + tab_float(t, col + 2, row + extremity + j, + TAB_RIGHT, + cn->num, 8, 0); - output_examine(); + if ( cn->next ) + cn = cn->next; + + } + extremity += wv->w ; + } } -static void -show_extremes(struct variable **dependent_var, - int n_dep_var, - struct factor *factor, - int n_extremities) +/* Show the descriptives table */ +void +show_descriptives(struct variable **dependent_var, + int n_dep_var, + struct factor *fctr) { int i; int heading_columns ; int n_cols; + const int n_stat_rows = 13; + const int heading_rows = 1; - struct tab_table *t; - int n_rows = heading_rows; + struct tab_table *tbl; + + int n_factors = 1; + int n_rows ; - if ( !factor ) + if ( fctr ) { - heading_columns = 1 + 1; - n_rows += n_dep_var * 2 * n_extremities; + heading_columns = 4; + n_factors = hsh_count(fctr->fstats); + + n_rows = n_dep_var * n_stat_rows * n_factors; + + if ( fctr->indep_var[1] ) + heading_columns = 5; } else { - assert(factor->indep_var); - if ( factor->subfactor == 0 ) - { - heading_columns = 2 + 1; - n_rows += n_dep_var * 2 * n_extremities - * hsh_count(factor->hash_table_val); - } - else - { - heading_columns = 3 + 1; - n_rows += n_dep_var * 2 * n_extremities - * hsh_count(factor->hash_table_val) - * hsh_count(factor->subfactor->hash_table_val) ; - } + heading_columns = 3; + n_rows = n_dep_var * n_stat_rows; } + n_rows += heading_rows; - n_cols = heading_columns + 3; - - t = tab_create (n_cols,n_rows,0); - tab_headers (t, heading_columns, 0, heading_rows, 0); - - tab_dim (t, tab_natural_dimensions); - - /* Outline the box and have vertical internal lines*/ - tab_box (t, - TAL_2, TAL_2, - -1, TAL_1, - 0, 0, - n_cols - 1, n_rows - 1); - - - - tab_hline (t, TAL_2, 0, n_cols - 1, heading_rows ); + n_cols = heading_columns + 2; - tab_title (t, 0, _("Extreme Values")); + tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0); + tab_dim (tbl, tab_natural_dimensions); - /* Remove lines ... */ - tab_box (t, + /* Outline the box and have no internal lines*/ + tab_box (tbl, + TAL_2, TAL_2, -1, -1, - TAL_0, TAL_0, - heading_columns, 0, - n_cols - 1, 0); + 0, 0, + n_cols - 1, n_rows - 1); - if ( factor ) - { - tab_text (t, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(factor->indep_var)); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - if ( factor->subfactor ) - tab_text (t, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string(factor->subfactor->indep_var)); - } + tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1); + tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1); + tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); + + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Statistic")); + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Std. Error")); - tab_text (t, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); - tab_text (t, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); + tab_title (tbl, 0, _("Descriptives")); for ( i = 0 ; i < n_dep_var ; ++i ) { - int n_subfactors = 1; - int n_factors = 1; - - if ( factor ) - { - n_factors = hsh_count(factor->hash_table_val); - if ( factor->subfactor ) - n_subfactors = hsh_count(factor->subfactor->hash_table_val); - } + const int row = heading_rows + i * n_stat_rows * n_factors ; + + if ( i > 0 ) + tab_hline(tbl, TAL_1, 0, n_cols - 1, row ); - tab_text (t, - 0, i * 2 * n_extremities * n_factors * - n_subfactors + heading_rows, + tab_text (tbl, 0, + i * n_stat_rows * n_factors + heading_rows, TAB_LEFT | TAT_TITLE, var_to_string(dependent_var[i]) ); - if ( i > 0 ) - tab_hline (t, - TAL_1, 0, n_cols - 1, - heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors ) - ); - - if ( factor ) + if ( fctr ) { - struct hsh_iterator hi; - const struct factor_statistics *fs; + struct factor_statistics **fs = fctr->fs; int count = 0; - for ( fs = hsh_first(factor->hash_table_val, &hi); - fs != 0; - fs = hsh_next(factor->hash_table_val, &hi)) + tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[0])); + + + if ( fctr->indep_var[1]) + tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[1])); + + while( *fs ) { - tab_text (t, 1, heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors - + count * n_subfactors), - TAB_RIGHT | TAT_TITLE, - value_to_string(fs->id, factor->indep_var) - ); - if ( count > 0 ) - tab_hline (t, TAL_1, 1, n_cols - 1, - heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors - + count * n_subfactors)); + static union value prev ; + const int row = heading_rows + n_stat_rows * + ( ( i * n_factors ) + count ); - if ( factor->subfactor ) - { - struct hsh_iterator h2; - const struct factor_statistics *sub_fs; - int count2=0; - - for ( sub_fs = hsh_first(factor->subfactor->hash_table_val, - &h2); - sub_fs != 0; - sub_fs = hsh_next(factor->subfactor->hash_table_val, - &h2)) - { - - tab_text(t, 2, heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors - + count * n_subfactors + count2 ), - TAB_RIGHT | TAT_TITLE , - value_to_string(sub_fs->id, - factor->subfactor->indep_var) - ); - - - if ( count2 > 0 ) - tab_hline (t, TAL_1, 2, n_cols - 1, - heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors - + count * n_subfactors + count2 )); - - populate_extremities(t,3, - heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors - + count * n_subfactors + count2), - n_extremities ); - - count2++; - } - } - else + + if ( 0 != compare_values(&prev, &(*fs)->id[0], + fctr->indep_var[0]->width)) { - populate_extremities(t,2, - heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors - + count * n_subfactors), - n_extremities); + + if ( count > 0 ) + tab_hline (tbl, TAL_1, 1, n_cols - 1, row); + + tab_text (tbl, + 1, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); } - count ++; + prev = (*fs)->id[0]; + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 2, n_cols - 1, row); + + if ( fctr->indep_var[1]) + tab_text (tbl, 2, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[1], fctr->indep_var[1]) + ); + + populate_descriptives(tbl, heading_columns - 2, + row, &(*fs)->m[i]); + + count++ ; + fs++; } + } - else - { - populate_extremities(t, 1, - heading_rows + 2 * n_extremities * - (i * n_factors * n_subfactors ), - n_extremities); + else + { + + populate_descriptives(tbl, heading_columns - 2, + i * n_stat_rows * n_factors + heading_rows, + &totals[i]); } } - tab_submit (t); + tab_submit(tbl); + } -/* Fill in the extremities table */ -void -populate_extremities(struct tab_table *t, int col, int row, int n) + + + +/* Fill in the descriptives data */ +void +populate_descriptives(struct tab_table *tbl, int col, int row, + const struct metrics *m) { - int i; - tab_text(t, col, row, - TAB_RIGHT | TAT_TITLE , - _("Highest") - ); + const double t = gsl_cdf_tdist_Qinv(1 - cmd.n_cinterval[0]/100.0/2.0, \ + m->n -1); - tab_text(t, col, row + n , - TAB_RIGHT | TAT_TITLE , - _("Lowest") - ); - for (i = 0; i < n ; ++i ) - { - tab_float(t, col + 1, row + i, - TAB_RIGHT, - i + 1, 8, 0); + tab_text (tbl, col, + row, + TAB_LEFT | TAT_TITLE, + _("Mean")); - tab_float(t, col + 1, row + i + n, - TAB_RIGHT, - i + 1, 8, 0); - } + tab_float (tbl, col + 2, + row, + TAB_CENTER, + m->mean, + 8,2); + + tab_float (tbl, col + 3, + row, + TAB_CENTER, + m->stderr, + 8,3); + + + tab_text (tbl, col, + row + 1, + TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]); + + + tab_text (tbl, col + 1, + row + 1, + TAB_LEFT | TAT_TITLE, + _("Lower Bound")); + + tab_float (tbl, col + 2, + row + 1, + TAB_CENTER, + m->mean - t * m->stderr, + 8,3); + + tab_text (tbl, col + 1, + row + 2, + TAB_LEFT | TAT_TITLE, + _("Upper Bound")); + + + tab_float (tbl, col + 2, + row + 2, + TAB_CENTER, + m->mean + t * m->stderr, + 8,3); + + tab_text (tbl, col, + row + 3, + TAB_LEFT | TAT_TITLE, + _("5% Trimmed Mean")); + + tab_float (tbl, col + 2, + row + 3, + TAB_CENTER, + m->trimmed_mean, + 8,2); + + tab_text (tbl, col, + row + 4, + TAB_LEFT | TAT_TITLE, + _("Median")); + + tab_text (tbl, col, + row + 5, + TAB_LEFT | TAT_TITLE, + _("Variance")); + + tab_float (tbl, col + 2, + row + 5, + TAB_CENTER, + m->var, + 8,3); + + + tab_text (tbl, col, + row + 6, + TAB_LEFT | TAT_TITLE, + _("Std. Deviation")); + + + tab_float (tbl, col + 2, + row + 6, + TAB_CENTER, + m->stddev, + 8,3); + + + tab_text (tbl, col, + row + 7, + TAB_LEFT | TAT_TITLE, + _("Minimum")); + + tab_float (tbl, col + 2, + row + 7, + TAB_CENTER, + m->min, + 8,3); + + tab_text (tbl, col, + row + 8, + TAB_LEFT | TAT_TITLE, + _("Maximum")); + + tab_float (tbl, col + 2, + row + 8, + TAB_CENTER, + m->max, + 8,3); + + + tab_text (tbl, col, + row + 9, + TAB_LEFT | TAT_TITLE, + _("Range")); + + + tab_float (tbl, col + 2, + row + 9, + TAB_CENTER, + m->max - m->min, + 8,3); + + tab_text (tbl, col, + row + 10, + TAB_LEFT | TAT_TITLE, + _("Interquartile Range")); + + tab_text (tbl, col, + row + 11, + TAB_LEFT | TAT_TITLE, + _("Skewness")); + + tab_text (tbl, col, + row + 12, + TAB_LEFT | TAT_TITLE, + _("Kurtosis")); } + + /* Plot the normal and detrended normal plots for m Label the plots with factorname */ void @@ -1346,6 +1438,10 @@ np_plot(const struct metrics *m, const char *factorname) const double slope = 1.0 / m->stddev; const double intercept = - m->mean / m->stddev; + /* Cowardly refuse to plot an empty data set */ + if ( n_data == 0 ) + return ; + chart_initialise(&np_chart); chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname); chart_write_xlabel(&np_chart, _("Observed Value")); diff --git a/src/factor_stats.c b/src/factor_stats.c index cb2197ad..16e1930d 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -35,7 +35,10 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA void metrics_precalc(struct metrics *fs) { + assert (fs) ; + fs->n = 0; + fs->n_missing = 0; fs->ssq = 0; fs->sum = 0; fs->min = DBL_MAX; @@ -44,18 +47,29 @@ metrics_precalc(struct metrics *fs) fs->ordered_data = hsh_create(20, (hsh_compare_func *) compare_values, (hsh_hash_func *) hash_value, - 0, + (hsh_free_func *) weighted_value_free, (void *) 0); + } + +/* Include val in the calculation for the metrics. + If val is null, then treat it as MISSING +*/ void -metrics_calc(struct metrics *fs, const union value *val, double weight) +metrics_calc(struct metrics *fs, const union value *val, + double weight, int case_no) { - - struct weighted_value **wv; - const double x = val->f; + double x; + if ( ! val ) + { + fs->n_missing += weight; + return ; + } + + x = val->f; fs->n += weight; fs->ssq += x * x * weight; fs->sum += x * weight; @@ -69,23 +83,38 @@ metrics_calc(struct metrics *fs, const union value *val, double weight) if ( *wv ) { /* If this value has already been seen, then simply - increase its weight */ + increase its weight and push a new case number */ + + struct case_node *cn; assert( (*wv)->v.f == val->f ); (*wv)->w += weight; + + cn = xmalloc( sizeof (struct case_node) ) ; + cn->next = (*wv)->case_nos ; + cn->num = case_no; + + (*wv)->case_nos = cn; } else { - *wv = xmalloc( sizeof (struct weighted_value) ); + struct case_node *cn; + + *wv = weighted_value_create(); (*wv)->v = *val; (*wv)->w = weight; - hsh_insert(fs->ordered_data,(void *) *wv); + + cn = xmalloc( sizeof (struct case_node) ) ; + cn->next=0; + cn->num = case_no; + (*wv)->case_nos = cn; + } } void -metrics_postcalc(struct metrics *fs) +metrics_postcalc(struct metrics *m) { double sample_var; double cc = 0.0; @@ -99,44 +128,52 @@ metrics_postcalc(struct metrics *fs) int n_data; - fs->mean = fs->sum / fs->n; + m->mean = m->sum / m->n; - sample_var = ( fs->ssq / fs->n - fs->mean * fs->mean ); + sample_var = ( m->ssq / m->n - m->mean * m->mean ); - fs->var = fs->n * sample_var / ( fs->n - 1) ; - fs->stddev = sqrt(fs->var); + m->var = m->n * sample_var / ( m->n - 1) ; + m->stddev = sqrt(m->var); /* FIXME: Check this is correct ??? Shouldn't we use the sample variance ??? */ - fs->stderr = sqrt (fs->var / fs->n) ; + m->stderr = sqrt (m->var / m->n) ; + + data = (struct weighted_value **) hsh_data(m->ordered_data); + n_data = hsh_count(m->ordered_data); - data = (struct weighted_value **) hsh_data(fs->ordered_data); - n_data = hsh_count(fs->ordered_data); + if ( n_data == 0 ) + { + m->trimmed_mean = m->mean; + return; + } - fs->wv = xmalloc ( sizeof (struct weighted_value) * n_data); + + m->wv = xmalloc(sizeof(struct weighted_value ) * n_data); for ( i = 0 ; i < n_data ; ++i ) - fs->wv[i] = *(data[i]); + m->wv[i] = *(data[i]); - sort (fs->wv, n_data, sizeof (struct weighted_value) , + sort (m->wv, n_data, sizeof (struct weighted_value) , (algo_compare_func *) compare_values, 0); - - tc = fs->n * 0.05 ; + /* Trimmed mean calculation */ + + tc = m->n * 0.05 ; k1 = -1; k2 = -1; for ( i = 0 ; i < n_data ; ++i ) { - cc += fs->wv[i].w; - fs->wv[i].cc = cc; + cc += m->wv[i].w; + m->wv[i].cc = cc; - fs->wv[i].rank = j + (fs->wv[i].w - 1) / 2.0 ; + m->wv[i].rank = j + (m->wv[i].w - 1) / 2.0 ; - j += fs->wv[i].w; + j += m->wv[i].w; if ( cc < tc ) k1 = i; @@ -146,44 +183,127 @@ metrics_postcalc(struct metrics *fs) k2 = n_data; for ( i = n_data -1 ; i >= 0; --i ) { - if ( tc > fs->n - fs->wv[i].cc) + if ( tc > m->n - m->wv[i].cc) k2 = i; } - fs->trimmed_mean = 0; + m->trimmed_mean = 0; for ( i = k1 + 2 ; i <= k2 - 1 ; ++i ) { - fs->trimmed_mean += fs->wv[i].v.f * fs->wv[i].w; + m->trimmed_mean += m->wv[i].v.f * m->wv[i].w; } - fs->trimmed_mean += (fs->n - fs->wv[k2 - 1].cc - tc) * fs->wv[k2].v.f ; - fs->trimmed_mean += (fs->wv[k1 + 1].cc - tc) * fs->wv[k1 + 1].v.f ; - fs->trimmed_mean /= 0.9 * fs->n ; + m->trimmed_mean += (m->n - m->wv[k2 - 1].cc - tc) * m->wv[k2].v.f ; + m->trimmed_mean += (m->wv[k1 + 1].cc - tc) * m->wv[k1 + 1].v.f ; + m->trimmed_mean /= 0.9 * m->n ; } -/* Functions for hashes */ +struct weighted_value * +weighted_value_create(void) +{ + struct weighted_value *wv; + wv = xmalloc (sizeof (struct weighted_value )); + + wv->cc = 0; + wv->case_nos = 0; + + return wv; +} void -free_factor_stats(struct factor_statistics *f, int width UNUSED) +weighted_value_free(struct weighted_value *wv) { - free (f); + struct case_node *cn = wv->case_nos; + + while(cn) + { + struct case_node *next = cn->next; + + free(cn); + cn = next; + } + + free(wv); + } -int -compare_indep_values(const struct factor_statistics *f1, - const struct factor_statistics *f2, - int width) + + + + +/* Create a factor statistics object with for N dependent vars + and ID as the value of the independent variable */ +struct factor_statistics * +create_factor_statistics (int n, union value *id0, union value *id1) +{ + struct factor_statistics *f; + + f = xmalloc( sizeof ( struct factor_statistics )); + + f->id[0] = *id0; + f->id[1] = *id1; + f->m = xmalloc( sizeof ( struct metrics ) * n ) ; + + return f; +} + + +void +factor_statistics_free(struct factor_statistics *f) { - return compare_values(f1->id, f2->id, width); + free(f->m) ; + + free(f); } -unsigned -hash_indep_value(const struct factor_statistics *f, int width) + + + + +int +factor_statistics_compare(const struct factor_statistics *f0, + const struct factor_statistics *f1, void *aux) +{ + + int cmp0; + + assert(f0); + assert(f1); + + cmp0 = compare_values(&f0->id[0], &f1->id[0], aux); + + if ( cmp0 != 0 ) + return cmp0; + + + if ( ( f0->id[1].f == SYSMIS ) && (f1->id[1].f != SYSMIS) ) + return 1; + + if ( ( f0->id[1].f != SYSMIS ) && (f1->id[1].f == SYSMIS) ) + return -1; + + return compare_values(&f0->id[1], &f1->id[1], aux); + +} + +unsigned int +factor_statistics_hash(const struct factor_statistics *f, void *aux) { - return hash_value(f->id, width); + + unsigned int h; + + h = hash_value(&f->id[0], aux); + + if ( f->id[1].f != SYSMIS ) + h += hash_value(&f->id[1], aux); + + + return h; + } + diff --git a/src/factor_stats.h b/src/factor_stats.h index c7f12162..cf660c6a 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -28,25 +28,11 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "val.h" -struct weighted_value -{ - union value v; - - /* The weight */ - double w; - - /* The cumulative weight */ - double cc; - - /* The rank */ - double rank; -}; - - - struct metrics { double n; + + double n_missing; double ssq; @@ -66,45 +52,88 @@ struct metrics double trimmed_mean; - /* An ordered arary of data for this factor */ + /* A hash of data for this factor */ struct hsh_table *ordered_data; - /* An SORTED array of weighted values */ + /* A SORTED array of weighted values */ struct weighted_value *wv; }; + +void metrics_precalc(struct metrics *m); + +void metrics_calc(struct metrics *m, const union value *f, double weight, + int case_no); + +void metrics_postcalc(struct metrics *m); + + +/* Linked list of case nos */ +struct case_node +{ + int num; + struct case_node *next; +}; + +struct weighted_value +{ + union value v; + + /* The weight */ + double w; + + /* The cumulative weight */ + double cc; + + /* The rank */ + double rank; + + /* Linked list of cases nos which have this value */ + struct case_node *case_nos; + +}; + + +struct weighted_value *weighted_value_create(void); + +void weighted_value_free(struct weighted_value *wv); + + + struct factor_statistics { - /* The value of the independent variable for this factor */ - const union value *id; + /* The value of the independent variable */ + union value id[2]; - /* An array of metrics indexed by dependent variable */ - struct metrics *stats; + /* The an array stats for this factor, one for each dependent var */ + struct metrics *m; }; +/* Create a factor statistics object with for N dependent vars + and ID as the value of the independent variable */ +struct factor_statistics * +create_factor_statistics (int n, union value *id0, union value *id1); -void metrics_precalc(struct metrics *fs); -void metrics_calc(struct metrics *fs, const union value *f, double weight); +void factor_statistics_free(struct factor_statistics *f); -void metrics_postcalc(struct metrics *fs); +int +factor_statistics_compare(const struct factor_statistics *f0, + const struct factor_statistics *f1, void *aux); + +unsigned int +factor_statistics_hash(const struct factor_statistics *f, void *aux); -/* These functions are necessary for creating hashes */ -int compare_indep_values(const struct factor_statistics *f1, - const struct factor_statistics *f2, - int width); -unsigned hash_indep_value(const struct factor_statistics *f, int width) ; -void free_factor_stats(struct factor_statistics *f, int width ); #endif diff --git a/src/glob.c b/src/glob.c index 457be8ac..5b35a9f1 100644 --- a/src/glob.c +++ b/src/glob.c @@ -185,7 +185,7 @@ done_glob(void) { dict_destroy(default_dict); free(logfn); - done_settings(); + /* done_settings(); */ ds_destroy (&tokstr); } diff --git a/src/hash.c b/src/hash.c index 588b311b..a7325658 100644 --- a/src/hash.c +++ b/src/hash.c @@ -175,6 +175,8 @@ hsh_clear (struct hsh_table *h) for (i = 0; i < h->size; i++) h->entries[i] = NULL; + + h->used = 0; } /* Destroys table H and all its contents. */ @@ -259,6 +261,9 @@ comparison_helper (const void *a_, const void *b_, void *h_) void *const *b = b_; struct hsh_table *h = h_; + assert(a); + assert(b); + return h->compare (*a, *b, h->aux); } diff --git a/src/value-labels.c b/src/value-labels.c index 8b1d3286..16e64968 100644 --- a/src/value-labels.c +++ b/src/value-labels.c @@ -500,7 +500,13 @@ value_to_string(const union value *val, const struct variable *var) { static char buf[100]; char *s; - const struct val_labs *val_labs = var->val_labs; + const struct val_labs *val_labs ; + + if ( !val || ! var ) + return 0; + + val_labs = var->val_labs; + s = val_labs_find (val_labs, *val); diff --git a/src/var-labs.c b/src/var-labs.c index 4c9781bf..7d78027e 100644 --- a/src/var-labs.c +++ b/src/var-labs.c @@ -74,5 +74,8 @@ cmd_variable_labels (void) const char * var_to_string(const struct variable *var) { + if ( !var ) + return 0; + return ( var->label ? var->label : var->name); } diff --git a/tests/ChangeLog b/tests/ChangeLog index 9bc159a3..7dd403f8 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,5 @@ + * command/examine.sh Added + Mon Nov 15 23:52:55 2004 Ben Pfaff * bugs/random.sh: Update expected random values to reflect the GSL diff --git a/tests/Makefile.am b/tests/Makefile.am index 2736d5f2..9786d328 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -9,6 +9,7 @@ TESTS = \ command/count.sh \ command/data-list.sh \ command/erase.sh \ + command/examine.sh \ command/file-label.sh \ command/filter.sh \ command/flip.sh \ diff --git a/tests/command/examine.sh b/tests/command/examine.sh new file mode 100755 index 00000000..53064965 --- /dev/null +++ b/tests/command/examine.sh @@ -0,0 +1,255 @@ +#!/bin/sh + +# This program tests the EXAMINE command. + +TEMPDIR=/tmp/pspp-tst-$$ + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +export STAT_CONFIG_PATH=$top_srcdir/config + + +cleanup() +{ + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program" +cat > $TEMPDIR/out.stat <