X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fexamine.q;h=e35c7049c800fc3698db43168ecc50d80afcd50a;hb=1d985886f778e35f8d89c4e3c897b79fde8de6ed;hp=6859146f5d25b246782060da9d4a7700461a982f;hpb=e78e2dcbb0afa6ec060ddd00ffe3231c1aac3907;p=pspp-builds.git diff --git a/src/examine.q b/src/examine.q index 6859146f..e35c7049 100644 --- a/src/examine.q +++ b/src/examine.q @@ -42,19 +42,22 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "casefile.h" #include "factor_stats.h" #include "moments.h" +#include "percentiles.h" /* (headers) */ #include "chart.h" /* (specification) "EXAMINE" (xmn_): - *variables=custom; + *^variables=custom; +total=custom; +nototal=custom; +missing=miss:pairwise/!listwise, rep:report/!noreport, incl:include/!exclude; +compare=cmp:variables/!groups; + +percentiles=custom; + +id=var; +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none; +cinterval=double; +statistics[st_]=descriptives,:extreme(*d:n),all,none. @@ -94,46 +97,6 @@ static struct factor *factors=0; static struct metrics *totals=0; -void -print_factors(void) -{ - struct factor *f = factors; - - while (f) - { - struct factor_statistics **fs = f->fs; - - printf("Factor: %s BY %s\n", - var_to_string(f->indep_var[0]), - var_to_string(f->indep_var[1]) ); - - - printf("Contains %d entries\n", hsh_count(f->fstats)); - - - while (*fs) - { - printf("Factor %g; %g\n", (*fs)->id[0].f, (*fs)->id[1].f); - - /* - printf("Factor %s; %s\n", - value_to_string(&(*fs)->id[0], f->indep_var[0]), - value_to_string(&(*fs)->id[1], f->indep_var[1])); - */ - - - printf("Mean is %g\n",(*fs)->m[0].mean); - - fs++ ; - } - - f = f->next; - } - - -} - - /* Parse the clause specifying the factors */ static int examine_parse_independent_vars(struct cmd_examine *cmd); @@ -152,10 +115,27 @@ static void show_descriptives(struct variable **dependent_var, int n_dep_var, struct factor *factor); +static void show_percentiles(struct variable **dependent_var, + int n_dep_var, + struct factor *factor); + + + void np_plot(const struct metrics *m, const char *factorname); +void box_plot_group(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ) ; + + +void box_plot_variables(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ); + /* Per Split function */ @@ -168,14 +148,42 @@ void factor_calc(struct ccase *c, int case_no, double weight, int case_missing); +/* Represent a factor as a string, so it can be + printed in a human readable fashion */ +const char * factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var); + + +/* Represent a factor as a string, so it can be + printed in a human readable fashion, + but sacrificing some readablility for the sake of brevity */ +const char *factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs); + + + + /* Function to use for testing for missing values */ static is_missing_func value_is_missing; +/* PERCENTILES */ + +static subc_list_double percentile_list; + +static enum pc_alg percentile_algorithm; + +static short sbc_percentile; + + int cmd_examine(void) { + subc_list_double_create(&percentile_list); + percentile_algorithm = PC_HAVERAGE; + if ( !parse_examine(&cmd) ) return CMD_FAILURE; @@ -191,10 +199,39 @@ cmd_examine(void) if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; + /* If descriptives have been requested, make sure the + quartiles are calculated */ + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + { + subc_list_double_push(&percentile_list, 25); + subc_list_double_push(&percentile_list, 50); + subc_list_double_push(&percentile_list, 75); + } + multipass_procedure_with_splits (run_examine, &cmd); if ( totals ) - free(totals); + { + free( totals ); + } + + if ( dependent_vars ) + free (dependent_vars); + + { + struct factor *f = factors ; + while ( f ) + { + struct factor *ff = f; + + f = f->next; + free ( ff->fs ); + hsh_destroy ( ff->fstats ) ; + free ( ff ) ; + } + } + + subc_list_double_destroy(&percentile_list); return CMD_SUCCESS; }; @@ -221,18 +258,47 @@ output_examine(void) show_descriptives(dependent_vars, n_dependent_vars, 0); } + if ( sbc_percentile ) + show_percentiles(dependent_vars, n_dependent_vars, 0); if ( cmd.sbc_plot) { + int v; if ( cmd.a_plot[XMN_PLT_NPPLOT] ) { - int v; + for ( v = 0 ; v < n_dependent_vars; ++v ) + np_plot(&totals[v], var_to_string(dependent_vars[v])); + } + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_GROUPS ) + { + box_plot_group(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + else + box_plot_variables(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + { for ( v = 0 ; v < n_dependent_vars; ++v ) - np_plot(&totals[v], var_to_string(dependent_vars[v])); + { + struct normal_curve normal; + + normal.N = totals[v].n; + normal.mean = totals[v].mean; + normal.stddev = totals[v].stddev; + + histogram_plot(totals[v].histogram, + var_to_string(dependent_vars[v]), + &normal, 0); + } } - } + } } @@ -252,49 +318,52 @@ output_examine(void) show_descriptives(dependent_vars, n_dependent_vars, fctr); } + if ( sbc_percentile ) + show_percentiles(dependent_vars, n_dependent_vars, fctr); + + if ( cmd.sbc_plot) { - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + int v; + + struct factor_statistics **fs = fctr->fs ; + + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) { - int v; - for ( v = 0 ; v < n_dependent_vars; ++ v) + if ( cmd.cmp == XMN_VARIABLES ) + box_plot_variables(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + else + box_plot_group(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + } + + for ( v = 0 ; v < n_dependent_vars; ++v ) + { + + for ( fs = fctr->fs ; *fs ; ++fs ) { - - struct factor_statistics **fs = fctr->fs ; - for ( fs = fctr->fs ; *fs ; ++fs ) - { - char buf1[100]; - char buf2[100]; - sprintf(buf1, "%s (", - var_to_string(dependent_vars[v])); - - sprintf(buf2, "%s = %s", - var_to_string(fctr->indep_var[0]), - value_to_string(&(*fs)->id[0],fctr->indep_var[0])); - - strcat(buf1, buf2); + const char *s = factor_to_string(fctr, *fs, dependent_vars[v]); - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, "; %s = %s)", - var_to_string(fctr->indep_var[1]), - value_to_string(&(*fs)->id[1], - fctr->indep_var[1])); - strcat(buf1, buf2); - } - else - { - strcat(buf1, ")"); - } - - np_plot(&(*fs)->m[v],buf1); + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + np_plot(&(*fs)->m[v], s); + + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + { + struct normal_curve normal; + normal.N = (*fs)->m[v].n; + normal.mean = (*fs)->m[v].mean; + normal.stddev = (*fs)->m[v].stddev; + + histogram_plot((*fs)->m[v].histogram, + s, &normal, 0); } - } + } /* for ( fs .... */ + + } /* for ( v = 0 ..... */ - } } fctr = fctr->next; @@ -303,6 +372,91 @@ output_examine(void) } +/* Create a hash table of percentiles and their values from the list of + percentiles */ +static struct hsh_table * +list_to_ptile_hash(const subc_list_double *l) +{ + int i; + + struct hsh_table *h ; + + h = hsh_create(subc_list_double_count(l), + (hsh_compare_func *) ptile_compare, + (hsh_hash_func *) ptile_hash, + (hsh_free_func *) free, + 0); + + + for ( i = 0 ; i < subc_list_double_count(l) ; ++i ) + { + struct percentile *p = xmalloc (sizeof (struct percentile)); + + p->p = subc_list_double_at(l,i); + p->v = SYSMIS; + + hsh_insert(h, p); + + } + + return h; + +} + +/* Parse the PERCENTILES subcommand */ +static int +xmn_custom_percentiles(struct cmd_examine *p UNUSED) +{ + sbc_percentile = 1; + + lex_match('='); + + lex_match('('); + + while ( lex_is_number() ) + { + subc_list_double_push(&percentile_list,lex_number()); + + lex_get(); + + lex_match(',') ; + } + lex_match(')'); + + lex_match('='); + + if ( lex_match_id("HAVERAGE")) + percentile_algorithm = PC_HAVERAGE; + + else if ( lex_match_id("WAVERAGE")) + percentile_algorithm = PC_WAVERAGE; + + else if ( lex_match_id("ROUND")) + percentile_algorithm = PC_ROUND; + + else if ( lex_match_id("EMPIRICAL")) + percentile_algorithm = PC_EMPIRICAL; + + else if ( lex_match_id("AEMPIRICAL")) + percentile_algorithm = PC_AEMPIRICAL; + + else if ( lex_match_id("NONE")) + percentile_algorithm = PC_NONE; + + + if ( 0 == subc_list_double_count(&percentile_list)) + { + subc_list_double_push(&percentile_list, 5); + subc_list_double_push(&percentile_list, 10); + subc_list_double_push(&percentile_list, 25); + subc_list_double_push(&percentile_list, 50); + subc_list_double_push(&percentile_list, 75); + subc_list_double_push(&percentile_list, 90); + subc_list_double_push(&percentile_list, 95); + } + + return 1; +} /* TOTAL and NOTOTAL are simple, mutually exclusive flags */ static int @@ -331,16 +485,18 @@ xmn_custom_nototal(struct cmd_examine *p) -/* Parser for the variables sub command */ +/* Parser for the variables sub command + Returns 1 on success */ static int xmn_custom_variables(struct cmd_examine *cmd ) { - lex_match('='); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + return 2; + } if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars, PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) @@ -355,7 +511,13 @@ xmn_custom_variables(struct cmd_examine *cmd ) if ( lex_match(T_BY)) { - return examine_parse_independent_vars(cmd); + int success ; + success = examine_parse_independent_vars(cmd); + if ( success != 1 ) { + free (dependent_vars); + free (totals) ; + } + return success; } return 1; @@ -367,12 +529,15 @@ xmn_custom_variables(struct cmd_examine *cmd ) static int examine_parse_independent_vars(struct cmd_examine *cmd) { - + int success; struct factor *sf = xmalloc(sizeof(struct factor)); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + free ( sf ) ; + return 2; + } sf->indep_var[0] = parse_variable(); @@ -385,7 +550,10 @@ examine_parse_independent_vars(struct cmd_examine *cmd) if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + free ( sf ) ; + return 2; + } sf->indep_var[1] = parse_variable(); @@ -406,12 +574,20 @@ examine_parse_independent_vars(struct cmd_examine *cmd) if ( token == '.' || token == '/' ) return 1; - return examine_parse_independent_vars(cmd); + success = examine_parse_independent_vars(cmd); + + if ( success != 1 ) + free ( sf ) ; + + return success; } +void populate_percentiles(struct tab_table *tbl, int col, int row, + const struct metrics *m); + void populate_descriptives(struct tab_table *t, int col, int row, const struct metrics *fs); @@ -471,8 +647,9 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) if ( value_is_missing(val,var) || case_missing ) val = 0; - - metrics_calc( &(*foo)->m[v], val, weight, case_no ); + + metrics_calc( &(*foo)->m[v], val, weight, case_no); + } fctr = fctr->next; @@ -544,7 +721,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) if ( value_is_missing(val,var) || case_missing ) val = 0; - metrics_calc(&totals[v], val, weight, case_no ); + metrics_calc(&totals[v], val, weight, case_no); } @@ -565,11 +742,17 @@ run_examine(const struct casefile *cf, void *cmd_ ) fs != 0 ; fs = hsh_next(fctr->fstats, &hi)) { + + fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list); + fs->m[v].ptile_alg = percentile_algorithm; metrics_postcalc(&fs->m[v]); } fctr = fctr->next; } + + totals[v].ptile_hash = list_to_ptile_hash(&percentile_list); + totals[v].ptile_alg = percentile_algorithm; metrics_postcalc(&totals[v]); } @@ -643,8 +826,15 @@ run_examine(const struct casefile *cf, void *cmd_ ) output_examine(); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - hsh_destroy(totals[v].ordered_data); + + if ( totals ) + { + int i; + for ( i = 0 ; i < n_dependent_vars ; ++i ) + { + metrics_destroy(&totals[i]); + } + } } @@ -676,7 +866,7 @@ show_summary(struct variable **dependent_var, int n_dep_var, n_rows = n_dep_var * n_factors ; if ( fctr->indep_var[1] ) - heading_columns = 3; + heading_columns = 3; } else { @@ -801,17 +991,17 @@ show_summary(struct variable **dependent_var, int n_dep_var, if ( 0 != compare_values(&prev, &(*fs)->id[0], fctr->indep_var[0]->width)) { - tab_text (tbl, - 1, - (i * n_factors ) + count + - heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); - - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 1, n_cols - 1, - (i * n_factors ) + count + heading_rows); + tab_text (tbl, + 1, + (i * n_factors ) + count + + heading_rows, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 1, n_cols - 1, + (i * n_factors ) + count + heading_rows); } @@ -894,7 +1084,7 @@ show_extremes(struct variable **dependent_var, int n_dep_var, n_rows = n_dep_var * 2 * n_extremities * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 3; + heading_columns = 3; } else { @@ -923,7 +1113,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_title (tbl, 0, _("Extreme Values")); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); @@ -940,9 +1129,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); - - - for ( i = 0 ; i < n_dep_var ; ++i ) { @@ -1075,7 +1261,7 @@ populate_extremes(struct tab_table *t, cn->num, 8, 0); if ( cn->next ) - cn = cn->next; + cn = cn->next; } @@ -1104,7 +1290,7 @@ populate_extremes(struct tab_table *t, cn->num, 8, 0); if ( cn->next ) - cn = cn->next; + cn = cn->next; } @@ -1139,7 +1325,7 @@ show_descriptives(struct variable **dependent_var, n_rows = n_dep_var * n_stat_rows * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 5; + heading_columns = 5; } else { @@ -1239,7 +1425,7 @@ show_descriptives(struct variable **dependent_var, ); populate_descriptives(tbl, heading_columns - 2, - row, &(*fs)->m[i]); + row, &(*fs)->m[i]); count++ ; fs++; @@ -1263,8 +1449,6 @@ show_descriptives(struct variable **dependent_var, - - /* Fill in the descriptives data */ void populate_descriptives(struct tab_table *tbl, int col, int row, @@ -1338,6 +1522,23 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("Median")); + { + struct percentile *p; + double d = 50; + + p = hsh_find(m->ptile_hash, &d); + + assert(p); + + + tab_float (tbl, col + 2, + row + 4, + TAB_CENTER, + p->v, + 8, 2); + } + + tab_text (tbl, col, row + 5, TAB_LEFT | TAT_TITLE, @@ -1403,6 +1604,28 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("Interquartile Range")); + { + struct percentile *p1; + struct percentile *p2; + + double d = 75; + p1 = hsh_find(m->ptile_hash, &d); + + d = 25; + p2 = hsh_find(m->ptile_hash, &d); + + assert(p1); + assert(p2); + + tab_float (tbl, col + 2, + row + 10, + TAB_CENTER, + p1->v - p2->v, + 8, 2); + } + + + tab_text (tbl, col, row + 11, TAB_LEFT | TAT_TITLE, @@ -1447,6 +1670,130 @@ populate_descriptives(struct tab_table *tbl, int col, int row, +void +box_plot_variables(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id) +{ + + int i; + struct factor_statistics **fs ; + + if ( ! fctr ) + { + box_plot_group(fctr, vars, n_vars, id); + return; + } + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + struct chart *ch; + + ch = chart_create(); + + const char *s = factor_to_string(fctr, *fs, 0 ); + + chart_write_title(ch, s); + + for ( i = 0 ; i < n_vars ; ++i ) + { + y_max = max(y_max, (*fs)->m[i].max); + y_min = min(y_min, (*fs)->m[i].min); + } + + boxplot_draw_yscale(ch, y_max, y_min); + + for ( i = 0 ; i < n_vars ; ++i ) + { + + const double box_width = (ch->data_right - ch->data_left) + / (n_vars * 2.0 ) ; + + const double box_centre = ( i * 2 + 1) * box_width + + ch->data_left; + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &(*fs)->m[i], + var_to_string(vars[i])); + + + } + + chart_submit(ch); + + } +} + + + +/* Do a box plot, grouping all factors into one plot ; + each dependent variable has its own plot. +*/ +void +box_plot_group(const struct factor *fctr, + const struct variable **vars, + int n_vars, + const struct variable *id UNUSED) +{ + + int i; + + for ( i = 0 ; i < n_vars ; ++i ) + { + struct factor_statistics **fs ; + struct chart *ch; + + ch = chart_create(); + + boxplot_draw_yscale(ch, totals[i].max, totals[i].min); + + if ( fctr ) + { + int n_factors = 0; + int f=0; + for ( fs = fctr->fs ; *fs ; ++fs ) + ++n_factors; + + chart_write_title(ch, _("Boxplot of %s vs. %s"), + var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) ); + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + + const char *s = factor_to_string_concise(fctr, *fs); + + const double box_width = (ch->data_right - ch->data_left) + / (n_factors * 2.0 ) ; + + const double box_centre = ( f++ * 2 + 1) * box_width + + ch->data_left; + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &(*fs)->m[i], + s); + } + } + else if ( ch ) + { + const double box_width = (ch->data_right - ch->data_left) / 3.0; + const double box_centre = (ch->data_right + ch->data_left) / 2.0; + + chart_write_title(ch, _("Boxplot")); + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &totals[i], + var_to_string(vars[i]) ); + + } + + chart_submit(ch); + } +} /* Plot the normal and detrended normal plots for m @@ -1458,13 +1805,10 @@ np_plot(const struct metrics *m, const char *factorname) double yfirst=0, ylast=0; /* Normal Plot */ - struct chart np_chart; + struct chart *np_chart; /* Detrended Normal Plot */ - struct chart dnp_chart; - - const struct weighted_value *wv = *(m->wvp); - + struct chart *dnp_chart; /* The slope and intercept of the ideal normal probability line */ const double slope = 1.0 / m->stddev; @@ -1474,19 +1818,25 @@ np_plot(const struct metrics *m, const char *factorname) if ( m->n_data == 0 ) return ; - chart_initialise(&np_chart); - chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(&np_chart, _("Observed Value")); - chart_write_ylabel(&np_chart, _("Expected Normal")); + np_chart = chart_create(); + dnp_chart = chart_create(); + + if ( !np_chart || ! dnp_chart ) + return ; + + chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname); + chart_write_xlabel(np_chart, _("Observed Value")); + chart_write_ylabel(np_chart, _("Expected Normal")); - chart_initialise(&dnp_chart); - chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + + chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(&dnp_chart, _("Observed Value")); - chart_write_ylabel(&dnp_chart, _("Dev from Normal")); + chart_write_xlabel(dnp_chart, _("Observed Value")); + chart_write_ylabel(dnp_chart, _("Dev from Normal")); + + yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); + ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); - yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1)); - ylast = gsl_cdf_ugaussian_Pinv (wv[m->n_data-1].rank / ( m->n + 1)); { /* Need to make sure that both the scatter plot and the ideal fit into the @@ -1495,50 +1845,360 @@ np_plot(const struct metrics *m, const char *factorname) double x_upper = max(m->max, (ylast - intercept) / slope) ; double slack = (x_upper - x_lower) * 0.05 ; - chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, - chart_rounded_tick((m->max - m->min) / 5.0)); - + chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5); - chart_write_xscale(&dnp_chart, m->min, m->max, - chart_rounded_tick((m->max - m->min) / 5.0)); + chart_write_xscale(dnp_chart, m->min, m->max, 5); } - chart_write_yscale(&np_chart, yfirst, ylast, - chart_rounded_tick((ylast - yfirst)/5.0) ); + chart_write_yscale(np_chart, yfirst, ylast, 5); { - /* We have to cache the detrended data, beacause we need to - find its limits before we can plot it */ - double *d_data; - d_data = xmalloc (m->n_data * sizeof(double)); - double d_max = -DBL_MAX; - double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) + /* We have to cache the detrended data, beacause we need to + find its limits before we can plot it */ + double *d_data; + d_data = xmalloc (m->n_data * sizeof(double)); + double d_max = -DBL_MAX; + double d_min = DBL_MAX; + for ( i = 0 ; i < m->n_data; ++i ) + { + const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); + + chart_datum(np_chart, 0, m->wvp[i]->v.f, ns); + + d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; + + if ( d_data[i] < d_min ) d_min = d_data[i]; + if ( d_data[i] > d_max ) d_max = d_data[i]; + } + chart_write_yscale(dnp_chart, d_min, d_max, 5); + + for ( i = 0 ; i < m->n_data; ++i ) + chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); + + free(d_data); + } + + chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + + chart_submit(np_chart); + chart_submit(dnp_chart); +} + + + + +/* Show the percentiles */ +void +show_percentiles(struct variable **dependent_var, + int n_dep_var, + struct factor *fctr) +{ + struct tab_table *tbl; + int i; + + int n_cols, n_rows; + int n_factors; + + struct hsh_table *ptiles ; + + int n_heading_columns; + const int n_heading_rows = 2; + const int n_stat_rows = 2; + + int n_ptiles ; + + if ( fctr ) { - const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1)); + struct factor_statistics **fs = fctr->fs ; + n_heading_columns = 3; + n_factors = hsh_count(fctr->fstats); - chart_datum(&np_chart, 0, wv[i].v.f, ns); + ptiles = (*fs)->m[0].ptile_hash; - d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns; - - if ( d_data[i] < d_min ) d_min = d_data[i]; - if ( d_data[i] > d_max ) d_max = d_data[i]; + if ( fctr->indep_var[1] ) + n_heading_columns = 4; } + else + { + n_factors = 1; + n_heading_columns = 2; + + ptiles = totals[0].ptile_hash; + } + + n_ptiles = hsh_count(ptiles); + + n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors; - chart_write_yscale(&dnp_chart, d_min, d_max, - chart_rounded_tick((d_max - d_min) / 5.0)); + n_cols = n_heading_columns + n_ptiles ; - for ( i = 0 ; i < m->n_data; ++i ) - chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]); + tbl = tab_create (n_cols, n_rows, 0); + + tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0); + + tab_dim (tbl, tab_natural_dimensions); + + /* Outline the box and have no internal lines*/ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); + + tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows ); + + tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1); + + + tab_title (tbl, 0, _("Percentiles")); + + + tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 ); + + + tab_box (tbl, + -1, -1, + -1, TAL_1, + 0, n_heading_rows, + n_heading_columns - 1, n_rows - 1); + + + tab_box (tbl, + -1, -1, + -1, TAL_1, + n_heading_columns, n_heading_rows - 1, + n_cols - 1, n_rows - 1); + + tab_joint_text(tbl, n_heading_columns + 1, 0, + n_cols - 1 , 0, + TAB_CENTER | TAT_TITLE , + _("Percentiles")); + + + { + /* Put in the percentile break points as headings */ + + struct percentile **p = (struct percentile **) hsh_sort(ptiles); + + i = 0; + while ( (*p) ) + { + tab_float(tbl, n_heading_columns + i++ , 1, + TAB_CENTER, + (*p)->p, 8, 0); + + p++; + } - free(d_data); } - chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); - chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + for ( i = 0 ; i < n_dep_var ; ++i ) + { + const int n_stat_rows = 2; + const int row = n_heading_rows + i * n_stat_rows * n_factors ; + + if ( i > 0 ) + tab_hline(tbl, TAL_1, 0, n_cols - 1, row ); + + tab_text (tbl, 0, + i * n_stat_rows * n_factors + n_heading_rows, + TAB_LEFT | TAT_TITLE, + var_to_string(dependent_var[i]) + ); + + if ( fctr ) + { + struct factor_statistics **fs = fctr->fs; + int count = 0; + + tab_text (tbl, 1, n_heading_rows - 1, + TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[0])); + + + if ( fctr->indep_var[1]) + tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, + var_to_string(fctr->indep_var[1])); + + while( *fs ) + { + + static union value prev ; + + const int row = n_heading_rows + n_stat_rows * + ( ( i * n_factors ) + count ); + + + if ( 0 != compare_values(&prev, &(*fs)->id[0], + fctr->indep_var[0]->width)) + { + + if ( count > 0 ) + tab_hline (tbl, TAL_1, 1, n_cols - 1, row); + + tab_text (tbl, + 1, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + + + } + + prev = (*fs)->id[0]; + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 2, n_cols - 1, row); + + if ( fctr->indep_var[1]) + tab_text (tbl, 2, row, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[1], fctr->indep_var[1]) + ); + + + populate_percentiles(tbl, n_heading_columns - 1, + row, &(*fs)->m[i]); + + + count++ ; + fs++; + } + + + } + else + { + populate_percentiles(tbl, n_heading_columns - 1, + i * n_stat_rows * n_factors + n_heading_rows, + &totals[i]); + } + + + } + + + tab_submit(tbl); + + +} + + + + +void +populate_percentiles(struct tab_table *tbl, int col, int row, + const struct metrics *m) +{ + int i; + + struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash); + + tab_text (tbl, + col, row + 1, + TAB_LEFT | TAT_TITLE, + _("Tukey\'s Hinges") + ); + + tab_text (tbl, + col, row, + TAB_LEFT | TAT_TITLE, + ptile_alg_desc[m->ptile_alg] + ); + + + i = 0; + while ( (*p) ) + { + tab_float(tbl, col + i + 1 , row, + TAB_CENTER, + (*p)->v, 8, 2); + if ( (*p)->p == 25 ) + tab_float(tbl, col + i + 1 , row + 1, + TAB_CENTER, + m->hinge[0], 8, 2); + + if ( (*p)->p == 50 ) + tab_float(tbl, col + i + 1 , row + 1, + TAB_CENTER, + m->hinge[1], 8, 2); + + if ( (*p)->p == 75 ) + tab_float(tbl, col + i + 1 , row + 1, + TAB_CENTER, + m->hinge[2], 8, 2); + + + i++; + + p++; + } + +} + + + +const char * +factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var) +{ + + static char buf1[100]; + char buf2[100]; + + strcpy(buf1,""); + + if (var) + sprintf(buf1, "%s (",var_to_string(var) ); + + + snprintf(buf2, 100, "%s = %s", + var_to_string(fctr->indep_var[0]), + value_to_string(&fs->id[0],fctr->indep_var[0])); + + strcat(buf1, buf2); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, "; %s = %s)", + var_to_string(fctr->indep_var[1]), + value_to_string(&fs->id[1], + fctr->indep_var[1])); + strcat(buf1, buf2); + } + else + { + if ( var ) + strcat(buf1, ")"); + } + + return buf1; +} + + + +const char * +factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs) + +{ + + static char buf[100]; + + char buf2[100]; + + snprintf(buf, 100, "%s", + value_to_string(&fs->id[0], fctr->indep_var[0])); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) ); + strcat(buf, buf2); + } - chart_finalise(&np_chart); - chart_finalise(&dnp_chart); + return buf; }