X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fexamine.q;h=43ecc13037d3d6a2faf3ab0ae762a08cd78022ae;hb=d5fd364b203a2a84e5034b6ff5ac5d6c4412edb7;hp=d426ecca1e549c5a83dd4d512bfd9b73bcbf0ef9;hpb=5501903810bcbae487b12bc44d9cbedf29644d96;p=pspp-builds.git diff --git a/src/examine.q b/src/examine.q index d426ecca..43ecc130 100644 --- a/src/examine.q +++ b/src/examine.q @@ -15,8 +15,8 @@ General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. */ +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +02110-1301, USA. */ #include #include @@ -42,19 +42,26 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "casefile.h" #include "factor_stats.h" #include "moments.h" +#include "percentiles.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid /* (headers) */ #include "chart.h" /* (specification) "EXAMINE" (xmn_): - *variables=custom; + *^variables=custom; +total=custom; +nototal=custom; +missing=miss:pairwise/!listwise, rep:report/!noreport, incl:include/!exclude; +compare=cmp:variables/!groups; + +percentiles=custom; + +id=var; +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none; +cinterval=double; +statistics[st_]=descriptives,:extreme(*d:n),all,none. @@ -70,7 +77,7 @@ static struct cmd_examine cmd; static struct variable **dependent_vars; -static int n_dependent_vars; +static size_t n_dependent_vars; struct factor @@ -112,10 +119,28 @@ static void show_descriptives(struct variable **dependent_var, int n_dep_var, struct factor *factor); +static void show_percentiles(struct variable **dependent_var, + int n_dep_var, + struct factor *factor); + + + void np_plot(const struct metrics *m, const char *factorname); +void box_plot_group(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ) ; + + +void box_plot_variables(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ); + + /* Per Split function */ static void run_examine(const struct casefile *cf, void *cmd_); @@ -127,22 +152,50 @@ void factor_calc(struct ccase *c, int case_no, double weight, int case_missing); +/* Represent a factor as a string, so it can be + printed in a human readable fashion */ +const char * factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var); + + +/* Represent a factor as a string, so it can be + printed in a human readable fashion, + but sacrificing some readablility for the sake of brevity */ +const char *factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs); + + + + /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; + + +/* PERCENTILES */ + +static subc_list_double percentile_list; + +static enum pc_alg percentile_algorithm; + +static short sbc_percentile; int cmd_examine(void) { + subc_list_double_create(&percentile_list); + percentile_algorithm = PC_HAVERAGE; + if ( !parse_examine(&cmd) ) return CMD_FAILURE; /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == XMN_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; @@ -150,10 +203,40 @@ cmd_examine(void) if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; + /* If descriptives have been requested, make sure the + quartiles are calculated */ + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + { + subc_list_double_push(&percentile_list, 25); + subc_list_double_push(&percentile_list, 50); + subc_list_double_push(&percentile_list, 75); + } + multipass_procedure_with_splits (run_examine, &cmd); if ( totals ) - free(totals); + { + free( totals ); + } + + if ( dependent_vars ) + free (dependent_vars); + + { + struct factor *f = factors ; + while ( f ) + { + struct factor *ff = f; + + f = f->next; + free ( ff->fs ); + hsh_destroy ( ff->fstats ) ; + free ( ff ) ; + } + factors = 0; + } + + subc_list_double_destroy(&percentile_list); return CMD_SUCCESS; }; @@ -180,6 +263,8 @@ output_examine(void) show_descriptives(dependent_vars, n_dependent_vars, 0); } + if ( sbc_percentile ) + show_percentiles(dependent_vars, n_dependent_vars, 0); if ( cmd.sbc_plot) { @@ -190,6 +275,18 @@ output_examine(void) np_plot(&totals[v], var_to_string(dependent_vars[v])); } + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_GROUPS ) + { + box_plot_group(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + else + box_plot_variables(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { for ( v = 0 ; v < n_dependent_vars; ++v ) @@ -208,7 +305,6 @@ output_examine(void) } - } @@ -227,45 +323,36 @@ output_examine(void) show_descriptives(dependent_vars, n_dependent_vars, fctr); } + if ( sbc_percentile ) + show_percentiles(dependent_vars, n_dependent_vars, fctr); + + if ( cmd.sbc_plot) { - int v; + size_t v; struct factor_statistics **fs = fctr->fs ; + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_VARIABLES ) + box_plot_variables(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + else + box_plot_group(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + } + for ( v = 0 ; v < n_dependent_vars; ++v ) { for ( fs = fctr->fs ; *fs ; ++fs ) { - char buf1[100]; - char buf2[100]; - sprintf(buf1, "%s (", - var_to_string(dependent_vars[v])); - - snprintf(buf2, 100, "%s = %s", - var_to_string(fctr->indep_var[0]), - value_to_string(&(*fs)->id[0],fctr->indep_var[0])); - - strcat(buf1, buf2); - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, "; %s = %s)", - var_to_string(fctr->indep_var[1]), - value_to_string(&(*fs)->id[1], - fctr->indep_var[1])); - strcat(buf1, buf2); - } - else - { - strcat(buf1, ")"); - } + const char *s = factor_to_string(fctr, *fs, dependent_vars[v]); if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot(&(*fs)->m[v],buf1); + np_plot(&(*fs)->m[v], s); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { struct normal_curve normal; @@ -275,7 +362,7 @@ output_examine(void) normal.stddev = (*fs)->m[v].stddev; histogram_plot((*fs)->m[v].histogram, - buf1, &normal, 0); + s, &normal, 0); } } /* for ( fs .... */ @@ -290,6 +377,91 @@ output_examine(void) } +/* Create a hash table of percentiles and their values from the list of + percentiles */ +static struct hsh_table * +list_to_ptile_hash(const subc_list_double *l) +{ + int i; + + struct hsh_table *h ; + + h = hsh_create(subc_list_double_count(l), + (hsh_compare_func *) ptile_compare, + (hsh_hash_func *) ptile_hash, + (hsh_free_func *) free, + 0); + + + for ( i = 0 ; i < subc_list_double_count(l) ; ++i ) + { + struct percentile *p = xmalloc (sizeof *p); + + p->p = subc_list_double_at(l,i); + p->v = SYSMIS; + + hsh_insert(h, p); + + } + + return h; + +} + +/* Parse the PERCENTILES subcommand */ +static int +xmn_custom_percentiles(struct cmd_examine *p UNUSED) +{ + sbc_percentile = 1; + + lex_match('='); + + lex_match('('); + + while ( lex_is_number() ) + { + subc_list_double_push(&percentile_list,lex_number()); + + lex_get(); + + lex_match(',') ; + } + lex_match(')'); + + lex_match('='); + + if ( lex_match_id("HAVERAGE")) + percentile_algorithm = PC_HAVERAGE; + + else if ( lex_match_id("WAVERAGE")) + percentile_algorithm = PC_WAVERAGE; + + else if ( lex_match_id("ROUND")) + percentile_algorithm = PC_ROUND; + + else if ( lex_match_id("EMPIRICAL")) + percentile_algorithm = PC_EMPIRICAL; + + else if ( lex_match_id("AEMPIRICAL")) + percentile_algorithm = PC_AEMPIRICAL; + + else if ( lex_match_id("NONE")) + percentile_algorithm = PC_NONE; + + + if ( 0 == subc_list_double_count(&percentile_list)) + { + subc_list_double_push(&percentile_list, 5); + subc_list_double_push(&percentile_list, 10); + subc_list_double_push(&percentile_list, 25); + subc_list_double_push(&percentile_list, 50); + subc_list_double_push(&percentile_list, 75); + subc_list_double_push(&percentile_list, 90); + subc_list_double_push(&percentile_list, 95); + } + + return 1; +} /* TOTAL and NOTOTAL are simple, mutually exclusive flags */ static int @@ -318,16 +490,18 @@ xmn_custom_nototal(struct cmd_examine *p) -/* Parser for the variables sub command */ +/* Parser for the variables sub command + Returns 1 on success */ static int xmn_custom_variables(struct cmd_examine *cmd ) { - lex_match('='); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + return 2; + } if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars, PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) @@ -338,11 +512,17 @@ xmn_custom_variables(struct cmd_examine *cmd ) assert(n_dependent_vars); - totals = xmalloc( sizeof(struct metrics) * n_dependent_vars); + totals = xnmalloc (n_dependent_vars, sizeof *totals); if ( lex_match(T_BY)) { - return examine_parse_independent_vars(cmd); + int success ; + success = examine_parse_independent_vars(cmd); + if ( success != 1 ) { + free (dependent_vars); + free (totals) ; + } + return success; } return 1; @@ -354,12 +534,15 @@ xmn_custom_variables(struct cmd_examine *cmd ) static int examine_parse_independent_vars(struct cmd_examine *cmd) { - - struct factor *sf = xmalloc(sizeof(struct factor)); + int success; + struct factor *sf = xmalloc (sizeof *sf); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + free ( sf ) ; + return 2; + } sf->indep_var[0] = parse_variable(); @@ -372,7 +555,10 @@ examine_parse_independent_vars(struct cmd_examine *cmd) if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + free ( sf ) ; + return 2; + } sf->indep_var[1] = parse_variable(); @@ -393,12 +579,20 @@ examine_parse_independent_vars(struct cmd_examine *cmd) if ( token == '.' || token == '/' ) return 1; - return examine_parse_independent_vars(cmd); + success = examine_parse_independent_vars(cmd); + + if ( success != 1 ) + free ( sf ) ; + + return success; } +void populate_percentiles(struct tab_table *tbl, int col, int row, + const struct metrics *m); + void populate_descriptives(struct tab_table *t, int col, int row, const struct metrics *fs); @@ -418,11 +612,12 @@ static int bad_weight_warn = 1; void factor_calc(struct ccase *c, int case_no, double weight, int case_missing) { - int v; + size_t v; struct factor *fctr = factors; while ( fctr) { + struct factor_statistics **foo ; union value indep_vals[2] ; indep_vals[0] = * case_data(c, fctr->indep_var[0]->fv); @@ -434,7 +629,7 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) assert(fctr->fstats); - struct factor_statistics **foo = ( struct factor_statistics ** ) + foo = ( struct factor_statistics ** ) hsh_probe(fctr->fstats, (void *) &indep_vals); if ( !*foo ) @@ -456,10 +651,11 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) const struct variable *var = dependent_vars[v]; const union value *val = case_data (c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing (&var->miss, val) || case_missing ) val = 0; - - metrics_calc( &(*foo)->m[v], val, weight, case_no ); + + metrics_calc( &(*foo)->m[v], val, weight, case_no); + } fctr = fctr->next; @@ -468,9 +664,6 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) } - - - static void run_examine(const struct casefile *cf, void *cmd_ ) { @@ -482,7 +675,6 @@ run_examine(const struct casefile *cf, void *cmd_ ) /* Make sure we haven't got rubbish left over from a previous split */ - struct factor *fctr = factors; while (fctr) { @@ -517,7 +709,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var)) + if ( value_is_missing(&var->miss, val)) case_missing = 1; } @@ -528,10 +720,10 @@ run_examine(const struct casefile *cf, void *cmd_ ) const struct variable *var = dependent_vars[v]; const union value *val = case_data (&c, var->fv); - if ( value_is_missing(val,var) || case_missing ) + if ( value_is_missing(&var->miss, val) || case_missing ) val = 0; - metrics_calc(&totals[v], val, weight, case_no ); + metrics_calc(&totals[v], val, weight, case_no); } @@ -552,11 +744,17 @@ run_examine(const struct casefile *cf, void *cmd_ ) fs != 0 ; fs = hsh_next(fctr->fstats, &hi)) { + + fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list); + fs->m[v].ptile_alg = percentile_algorithm; metrics_postcalc(&fs->m[v]); } fctr = fctr->next; } + + totals[v].ptile_hash = list_to_ptile_hash(&percentile_list); + totals[v].ptile_alg = percentile_algorithm; metrics_postcalc(&totals[v]); } @@ -611,7 +809,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) hsh_probe(fctr->fstats, (void *) &key ); if ( !*ffs ) { - int i; + size_t i; (*ffs) = create_factor_statistics (n_dependent_vars, &key[0], &key[1]); for ( i = 0 ; i < n_dependent_vars ; ++i ) @@ -630,8 +828,15 @@ run_examine(const struct casefile *cf, void *cmd_ ) output_examine(); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - hsh_destroy(totals[v].ordered_data); + + if ( totals ) + { + size_t i; + for ( i = 0 ; i < n_dependent_vars ; ++i ) + { + metrics_destroy(&totals[i]); + } + } } @@ -663,7 +868,7 @@ show_summary(struct variable **dependent_var, int n_dep_var, n_rows = n_dep_var * n_factors ; if ( fctr->indep_var[1] ) - heading_columns = 3; + heading_columns = 3; } else { @@ -788,17 +993,17 @@ show_summary(struct variable **dependent_var, int n_dep_var, if ( 0 != compare_values(&prev, &(*fs)->id[0], fctr->indep_var[0]->width)) { - tab_text (tbl, - 1, - (i * n_factors ) + count + - heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); - - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 1, n_cols - 1, - (i * n_factors ) + count + heading_rows); + tab_text (tbl, + 1, + (i * n_factors ) + count + + heading_rows, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 1, n_cols - 1, + (i * n_factors ) + count + heading_rows); } @@ -881,7 +1086,7 @@ show_extremes(struct variable **dependent_var, int n_dep_var, n_rows = n_dep_var * 2 * n_extremities * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 3; + heading_columns = 3; } else { @@ -910,7 +1115,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_title (tbl, 0, _("Extreme Values")); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); @@ -927,9 +1131,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); - - - for ( i = 0 ; i < n_dep_var ; ++i ) { @@ -1062,7 +1263,7 @@ populate_extremes(struct tab_table *t, cn->num, 8, 0); if ( cn->next ) - cn = cn->next; + cn = cn->next; } @@ -1091,7 +1292,7 @@ populate_extremes(struct tab_table *t, cn->num, 8, 0); if ( cn->next ) - cn = cn->next; + cn = cn->next; } @@ -1126,7 +1327,7 @@ show_descriptives(struct variable **dependent_var, n_rows = n_dep_var * n_stat_rows * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 5; + heading_columns = 5; } else { @@ -1226,7 +1427,7 @@ show_descriptives(struct variable **dependent_var, ); populate_descriptives(tbl, heading_columns - 2, - row, &(*fs)->m[i]); + row, &(*fs)->m[i]); count++ ; fs++; @@ -1250,8 +1451,6 @@ show_descriptives(struct variable **dependent_var, - - /* Fill in the descriptives data */ void populate_descriptives(struct tab_table *tbl, int col, int row, @@ -1311,8 +1510,8 @@ populate_descriptives(struct tab_table *tbl, int col, int row, tab_text (tbl, col, row + 3, - TAB_LEFT | TAT_TITLE, - _("5% Trimmed Mean")); + TAB_LEFT | TAT_TITLE | TAT_PRINTF, + _("5%% Trimmed Mean")); tab_float (tbl, col + 2, row + 3, @@ -1325,6 +1524,23 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("Median")); + { + struct percentile *p; + double d = 50; + + p = hsh_find(m->ptile_hash, &d); + + assert(p); + + + tab_float (tbl, col + 2, + row + 4, + TAB_CENTER, + p->v, + 8, 2); + } + + tab_text (tbl, col, row + 5, TAB_LEFT | TAT_TITLE, @@ -1390,6 +1606,28 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("Interquartile Range")); + { + struct percentile *p1; + struct percentile *p2; + + double d = 75; + p1 = hsh_find(m->ptile_hash, &d); + + d = 25; + p2 = hsh_find(m->ptile_hash, &d); + + assert(p1); + assert(p2); + + tab_float (tbl, col + 2, + row + 10, + TAB_CENTER, + p1->v - p2->v, + 8, 2); + } + + + tab_text (tbl, col, row + 11, TAB_LEFT | TAT_TITLE, @@ -1433,6 +1671,130 @@ populate_descriptives(struct tab_table *tbl, int col, int row, } + +void +box_plot_variables(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id) +{ + + int i; + struct factor_statistics **fs ; + + if ( ! fctr ) + { + box_plot_group(fctr, vars, n_vars, id); + return; + } + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + struct chart *ch = chart_create(); + const char *s = factor_to_string(fctr, *fs, 0 ); + + chart_write_title(ch, s); + + for ( i = 0 ; i < n_vars ; ++i ) + { + y_max = max(y_max, (*fs)->m[i].max); + y_min = min(y_min, (*fs)->m[i].min); + } + + boxplot_draw_yscale(ch, y_max, y_min); + + for ( i = 0 ; i < n_vars ; ++i ) + { + + const double box_width = (ch->data_right - ch->data_left) + / (n_vars * 2.0 ) ; + + const double box_centre = ( i * 2 + 1) * box_width + + ch->data_left; + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &(*fs)->m[i], + var_to_string(vars[i])); + + + } + + chart_submit(ch); + + } +} + + + +/* Do a box plot, grouping all factors into one plot ; + each dependent variable has its own plot. +*/ +void +box_plot_group(const struct factor *fctr, + const struct variable **vars, + int n_vars, + const struct variable *id UNUSED) +{ + + int i; + + for ( i = 0 ; i < n_vars ; ++i ) + { + struct factor_statistics **fs ; + struct chart *ch; + + ch = chart_create(); + + boxplot_draw_yscale(ch, totals[i].max, totals[i].min); + + if ( fctr ) + { + int n_factors = 0; + int f=0; + for ( fs = fctr->fs ; *fs ; ++fs ) + ++n_factors; + + chart_write_title(ch, _("Boxplot of %s vs. %s"), + var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) ); + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + + const char *s = factor_to_string_concise(fctr, *fs); + + const double box_width = (ch->data_right - ch->data_left) + / (n_factors * 2.0 ) ; + + const double box_centre = ( f++ * 2 + 1) * box_width + + ch->data_left; + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &(*fs)->m[i], + s); + } + } + else if ( ch ) + { + const double box_width = (ch->data_right - ch->data_left) / 3.0; + const double box_centre = (ch->data_right + ch->data_left) / 2.0; + + chart_write_title(ch, _("Boxplot")); + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &totals[i], + var_to_string(vars[i]) ); + + } + + chart_submit(ch); + } +} + + /* Plot the normal and detrended normal plots for m Label the plots with factorname */ void @@ -1442,10 +1804,10 @@ np_plot(const struct metrics *m, const char *factorname) double yfirst=0, ylast=0; /* Normal Plot */ - struct chart np_chart; + struct chart *np_chart; /* Detrended Normal Plot */ - struct chart dnp_chart; + struct chart *dnp_chart; /* The slope and intercept of the ideal normal probability line */ const double slope = 1.0 / m->stddev; @@ -1455,16 +1817,21 @@ np_plot(const struct metrics *m, const char *factorname) if ( m->n_data == 0 ) return ; - chart_initialise(&np_chart); - chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(&np_chart, _("Observed Value")); - chart_write_ylabel(&np_chart, _("Expected Normal")); + np_chart = chart_create(); + dnp_chart = chart_create(); + + if ( !np_chart || ! dnp_chart ) + return ; - chart_initialise(&dnp_chart); - chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname); + chart_write_xlabel(np_chart, _("Observed Value")); + chart_write_ylabel(np_chart, _("Expected Normal")); + + + chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(&dnp_chart, _("Observed Value")); - chart_write_ylabel(&dnp_chart, _("Dev from Normal")); + chart_write_xlabel(dnp_chart, _("Observed Value")); + chart_write_ylabel(dnp_chart, _("Dev from Normal")); yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); @@ -1477,44 +1844,359 @@ np_plot(const struct metrics *m, const char *factorname) double x_upper = max(m->max, (ylast - intercept) / slope) ; double slack = (x_upper - x_lower) * 0.05 ; - chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, 5); + chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5); - chart_write_xscale(&dnp_chart, m->min, m->max, 5); + chart_write_xscale(dnp_chart, m->min, m->max, 5); } - chart_write_yscale(&np_chart, yfirst, ylast, 5); + chart_write_yscale(np_chart, yfirst, ylast, 5); { - /* We have to cache the detrended data, beacause we need to - find its limits before we can plot it */ - double *d_data; - d_data = xmalloc (m->n_data * sizeof(double)); - double d_max = -DBL_MAX; - double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) + /* We have to cache the detrended data, beacause we need to + find its limits before we can plot it */ + double *d_data = xnmalloc (m->n_data, sizeof *d_data); + double d_max = -DBL_MAX; + double d_min = DBL_MAX; + for ( i = 0 ; i < m->n_data; ++i ) + { + const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); + + chart_datum(np_chart, 0, m->wvp[i]->v.f, ns); + + d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; + + if ( d_data[i] < d_min ) d_min = d_data[i]; + if ( d_data[i] > d_max ) d_max = d_data[i]; + } + chart_write_yscale(dnp_chart, d_min, d_max, 5); + + for ( i = 0 ; i < m->n_data; ++i ) + chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); + + free(d_data); + } + + chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + + chart_submit(np_chart); + chart_submit(dnp_chart); +} + + + + +/* Show the percentiles */ +void +show_percentiles(struct variable **dependent_var, + int n_dep_var, + struct factor *fctr) +{ + struct tab_table *tbl; + int i; + + int n_cols, n_rows; + int n_factors; + + struct hsh_table *ptiles ; + + int n_heading_columns; + const int n_heading_rows = 2; + const int n_stat_rows = 2; + + int n_ptiles ; + + if ( fctr ) { - const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); + struct factor_statistics **fs = fctr->fs ; + n_heading_columns = 3; + n_factors = hsh_count(fctr->fstats); - chart_datum(&np_chart, 0, m->wvp[i]->v.f, ns); + ptiles = (*fs)->m[0].ptile_hash; - d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; - - if ( d_data[i] < d_min ) d_min = d_data[i]; - if ( d_data[i] > d_max ) d_max = d_data[i]; + if ( fctr->indep_var[1] ) + n_heading_columns = 4; } - chart_write_yscale(&dnp_chart, d_min, d_max, 5); + else + { + n_factors = 1; + n_heading_columns = 2; + + ptiles = totals[0].ptile_hash; + } + + n_ptiles = hsh_count(ptiles); + + n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors; - for ( i = 0 ; i < m->n_data; ++i ) - chart_datum(&dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); + n_cols = n_heading_columns + n_ptiles ; + + tbl = tab_create (n_cols, n_rows, 0); + + tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0); + + tab_dim (tbl, tab_natural_dimensions); + + /* Outline the box and have no internal lines*/ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); + + tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows ); + + tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1); + + + tab_title (tbl, 0, _("Percentiles")); + + + tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 ); + + + tab_box (tbl, + -1, -1, + -1, TAL_1, + 0, n_heading_rows, + n_heading_columns - 1, n_rows - 1); + + + tab_box (tbl, + -1, -1, + -1, TAL_1, + n_heading_columns, n_heading_rows - 1, + n_cols - 1, n_rows - 1); + + tab_joint_text(tbl, n_heading_columns + 1, 0, + n_cols - 1 , 0, + TAB_CENTER | TAT_TITLE , + _("Percentiles")); + + + { + /* Put in the percentile break points as headings */ + + struct percentile **p = (struct percentile **) hsh_sort(ptiles); + + i = 0; + while ( (*p) ) + { + tab_float(tbl, n_heading_columns + i++ , 1, + TAB_CENTER, + (*p)->p, 8, 0); + + p++; + } - free(d_data); } - chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); - chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + for ( i = 0 ; i < n_dep_var ; ++i ) + { + const int n_stat_rows = 2; + const int row = n_heading_rows + i * n_stat_rows * n_factors ; + + if ( i > 0 ) + tab_hline(tbl, TAL_1, 0, n_cols - 1, row ); + + tab_text (tbl, 0, + i * n_stat_rows * n_factors + n_heading_rows, + TAB_LEFT | TAT_TITLE, + var_to_string(dependent_var[i]) + ); + + if ( fctr ) + { + struct factor_statistics **fs = fctr->fs; + int count = 0; + + tab_text (tbl, 1, n_heading_rows - 1, + TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[0])); + + + if ( fctr->indep_var[1]) + tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[1])); + + while( *fs ) + { + + static union value prev ; + + const int row = n_heading_rows + n_stat_rows * + ( ( i * n_factors ) + count ); + + + if ( 0 != compare_values(&prev, &(*fs)->id[0], + fctr->indep_var[0]->width)) + { + + if ( count > 0 ) + tab_hline (tbl, TAL_1, 1, n_cols - 1, row); + + tab_text (tbl, + 1, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + + + } + + prev = (*fs)->id[0]; + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 2, n_cols - 1, row); + + if ( fctr->indep_var[1]) + tab_text (tbl, 2, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[1], fctr->indep_var[1]) + ); + + + populate_percentiles(tbl, n_heading_columns - 1, + row, &(*fs)->m[i]); + + + count++ ; + fs++; + } + + + } + else + { + populate_percentiles(tbl, n_heading_columns - 1, + i * n_stat_rows * n_factors + n_heading_rows, + &totals[i]); + } + + + } + + + tab_submit(tbl); + + +} + + + + +void +populate_percentiles(struct tab_table *tbl, int col, int row, + const struct metrics *m) +{ + int i; + + struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash); + + tab_text (tbl, + col, row + 1, + TAB_LEFT | TAT_TITLE, + _("Tukey\'s Hinges") + ); + + tab_text (tbl, + col, row, + TAB_LEFT | TAT_TITLE, + ptile_alg_desc[m->ptile_alg] + ); + + + i = 0; + while ( (*p) ) + { + tab_float(tbl, col + i + 1 , row, + TAB_CENTER, + (*p)->v, 8, 2); + if ( (*p)->p == 25 ) + tab_float(tbl, col + i + 1 , row + 1, + TAB_CENTER, + m->hinge[0], 8, 2); + + if ( (*p)->p == 50 ) + tab_float(tbl, col + i + 1 , row + 1, + TAB_CENTER, + m->hinge[1], 8, 2); + + if ( (*p)->p == 75 ) + tab_float(tbl, col + i + 1 , row + 1, + TAB_CENTER, + m->hinge[2], 8, 2); + + + i++; + + p++; + } + +} + + + +const char * +factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var) +{ + + static char buf1[100]; + char buf2[100]; + + strcpy(buf1,""); + + if (var) + sprintf(buf1, "%s (",var_to_string(var) ); + + + snprintf(buf2, 100, "%s = %s", + var_to_string(fctr->indep_var[0]), + value_to_string(&fs->id[0],fctr->indep_var[0])); + + strcat(buf1, buf2); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, "; %s = %s)", + var_to_string(fctr->indep_var[1]), + value_to_string(&fs->id[1], + fctr->indep_var[1])); + strcat(buf1, buf2); + } + else + { + if ( var ) + strcat(buf1, ")"); + } + + return buf1; +} + + + +const char * +factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs) + +{ + + static char buf[100]; + + char buf2[100]; + + snprintf(buf, 100, "%s", + value_to_string(&fs->id[0], fctr->indep_var[0])); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) ); + strcat(buf, buf2); + } - chart_finalise(&np_chart); - chart_finalise(&dnp_chart); + return buf; }