X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fexamine.q;h=e35c7049c800fc3698db43168ecc50d80afcd50a;hb=1d985886f778e35f8d89c4e3c897b79fde8de6ed;hp=97a63a902642aa9d69386034dc9c6c6f395d4156;hpb=4239c455e7b1061b7c960b793f9080e113123845;p=pspp-builds.git diff --git a/src/examine.q b/src/examine.q index 97a63a90..e35c7049 100644 --- a/src/examine.q +++ b/src/examine.q @@ -49,7 +49,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA /* (specification) "EXAMINE" (xmn_): - *variables=custom; + *^variables=custom; +total=custom; +nototal=custom; +missing=miss:pairwise/!listwise, @@ -57,6 +57,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA incl:include/!exclude; +compare=cmp:variables/!groups; +percentiles=custom; + +id=var; +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none; +cinterval=double; +statistics[st_]=descriptives,:extreme(*d:n),all,none. @@ -115,14 +116,27 @@ static void show_descriptives(struct variable **dependent_var, struct factor *factor); static void show_percentiles(struct variable **dependent_var, - int n_dep_var, - struct factor *factor); + int n_dep_var, + struct factor *factor); + void np_plot(const struct metrics *m, const char *factorname); +void box_plot_group(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ) ; + + +void box_plot_variables(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ); + + /* Per Split function */ static void run_examine(const struct casefile *cf, void *cmd_); @@ -134,6 +148,22 @@ void factor_calc(struct ccase *c, int case_no, double weight, int case_missing); +/* Represent a factor as a string, so it can be + printed in a human readable fashion */ +const char * factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var); + + +/* Represent a factor as a string, so it can be + printed in a human readable fashion, + but sacrificing some readablility for the sake of brevity */ +const char *factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs); + + + + /* Function to use for testing for missing values */ static is_missing_func value_is_missing; @@ -169,7 +199,6 @@ cmd_examine(void) if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; - /* If descriptives have been requested, make sure the quartiles are calculated */ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) @@ -182,7 +211,25 @@ cmd_examine(void) multipass_procedure_with_splits (run_examine, &cmd); if ( totals ) - free(totals); + { + free( totals ); + } + + if ( dependent_vars ) + free (dependent_vars); + + { + struct factor *f = factors ; + while ( f ) + { + struct factor *ff = f; + + f = f->next; + free ( ff->fs ); + hsh_destroy ( ff->fstats ) ; + free ( ff ) ; + } + } subc_list_double_destroy(&percentile_list); @@ -223,6 +270,18 @@ output_examine(void) np_plot(&totals[v], var_to_string(dependent_vars[v])); } + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_GROUPS ) + { + box_plot_group(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + else + box_plot_variables(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { for ( v = 0 ; v < n_dependent_vars; ++v ) @@ -269,39 +328,26 @@ output_examine(void) struct factor_statistics **fs = fctr->fs ; + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_VARIABLES ) + box_plot_variables(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + else + box_plot_group(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + } + for ( v = 0 ; v < n_dependent_vars; ++v ) { for ( fs = fctr->fs ; *fs ; ++fs ) { - char buf1[100]; - char buf2[100]; - sprintf(buf1, "%s (", - var_to_string(dependent_vars[v])); - - snprintf(buf2, 100, "%s = %s", - var_to_string(fctr->indep_var[0]), - value_to_string(&(*fs)->id[0],fctr->indep_var[0])); - - strcat(buf1, buf2); - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, "; %s = %s)", - var_to_string(fctr->indep_var[1]), - value_to_string(&(*fs)->id[1], - fctr->indep_var[1])); - strcat(buf1, buf2); - } - else - { - strcat(buf1, ")"); - } + const char *s = factor_to_string(fctr, *fs, dependent_vars[v]); if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot(&(*fs)->m[v],buf1); + np_plot(&(*fs)->m[v], s); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { struct normal_curve normal; @@ -311,7 +357,7 @@ output_examine(void) normal.stddev = (*fs)->m[v].stddev; histogram_plot((*fs)->m[v].histogram, - buf1, &normal, 0); + s, &normal, 0); } } /* for ( fs .... */ @@ -326,6 +372,8 @@ output_examine(void) } +/* Create a hash table of percentiles and their values from the list of + percentiles */ static struct hsh_table * list_to_ptile_hash(const subc_list_double *l) { @@ -345,6 +393,7 @@ list_to_ptile_hash(const subc_list_double *l) struct percentile *p = xmalloc (sizeof (struct percentile)); p->p = subc_list_double_at(l,i); + p->v = SYSMIS; hsh_insert(h, p); @@ -364,9 +413,9 @@ xmn_custom_percentiles(struct cmd_examine *p UNUSED) lex_match('('); - while ( lex_double_p() ) + while ( lex_is_number() ) { - subc_list_double_push(&percentile_list,lex_double()); + subc_list_double_push(&percentile_list,lex_number()); lex_get(); @@ -436,16 +485,18 @@ xmn_custom_nototal(struct cmd_examine *p) -/* Parser for the variables sub command */ +/* Parser for the variables sub command + Returns 1 on success */ static int xmn_custom_variables(struct cmd_examine *cmd ) { - lex_match('='); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + return 2; + } if (!parse_variables (default_dict, &dependent_vars, &n_dependent_vars, PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) @@ -460,7 +511,13 @@ xmn_custom_variables(struct cmd_examine *cmd ) if ( lex_match(T_BY)) { - return examine_parse_independent_vars(cmd); + int success ; + success = examine_parse_independent_vars(cmd); + if ( success != 1 ) { + free (dependent_vars); + free (totals) ; + } + return success; } return 1; @@ -472,12 +529,15 @@ xmn_custom_variables(struct cmd_examine *cmd ) static int examine_parse_independent_vars(struct cmd_examine *cmd) { - + int success; struct factor *sf = xmalloc(sizeof(struct factor)); if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + free ( sf ) ; + return 2; + } sf->indep_var[0] = parse_variable(); @@ -490,7 +550,10 @@ examine_parse_independent_vars(struct cmd_examine *cmd) if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) - return 2; + { + free ( sf ) ; + return 2; + } sf->indep_var[1] = parse_variable(); @@ -511,7 +574,12 @@ examine_parse_independent_vars(struct cmd_examine *cmd) if ( token == '.' || token == '/' ) return 1; - return examine_parse_independent_vars(cmd); + success = examine_parse_independent_vars(cmd); + + if ( success != 1 ) + free ( sf ) ; + + return success; } @@ -579,8 +647,9 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) if ( value_is_missing(val,var) || case_missing ) val = 0; - - metrics_calc( &(*foo)->m[v], val, weight, case_no ); + + metrics_calc( &(*foo)->m[v], val, weight, case_no); + } fctr = fctr->next; @@ -652,7 +721,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) if ( value_is_missing(val,var) || case_missing ) val = 0; - metrics_calc(&totals[v], val, weight, case_no ); + metrics_calc(&totals[v], val, weight, case_no); } @@ -757,8 +826,15 @@ run_examine(const struct casefile *cf, void *cmd_ ) output_examine(); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - hsh_destroy(totals[v].ordered_data); + + if ( totals ) + { + int i; + for ( i = 0 ; i < n_dependent_vars ; ++i ) + { + metrics_destroy(&totals[i]); + } + } } @@ -790,7 +866,7 @@ show_summary(struct variable **dependent_var, int n_dep_var, n_rows = n_dep_var * n_factors ; if ( fctr->indep_var[1] ) - heading_columns = 3; + heading_columns = 3; } else { @@ -915,17 +991,17 @@ show_summary(struct variable **dependent_var, int n_dep_var, if ( 0 != compare_values(&prev, &(*fs)->id[0], fctr->indep_var[0]->width)) { - tab_text (tbl, - 1, - (i * n_factors ) + count + - heading_rows, - TAB_LEFT | TAT_TITLE, - value_to_string(&(*fs)->id[0], fctr->indep_var[0]) - ); - - if (fctr->indep_var[1] && count > 0 ) - tab_hline(tbl, TAL_1, 1, n_cols - 1, - (i * n_factors ) + count + heading_rows); + tab_text (tbl, + 1, + (i * n_factors ) + count + + heading_rows, + TAB_LEFT | TAT_TITLE, + value_to_string(&(*fs)->id[0], fctr->indep_var[0]) + ); + + if (fctr->indep_var[1] && count > 0 ) + tab_hline(tbl, TAL_1, 1, n_cols - 1, + (i * n_factors ) + count + heading_rows); } @@ -1008,7 +1084,7 @@ show_extremes(struct variable **dependent_var, int n_dep_var, n_rows = n_dep_var * 2 * n_extremities * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 3; + heading_columns = 3; } else { @@ -1037,7 +1113,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_title (tbl, 0, _("Extreme Values")); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); @@ -1054,9 +1129,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); - - - for ( i = 0 ; i < n_dep_var ; ++i ) { @@ -1189,7 +1261,7 @@ populate_extremes(struct tab_table *t, cn->num, 8, 0); if ( cn->next ) - cn = cn->next; + cn = cn->next; } @@ -1218,7 +1290,7 @@ populate_extremes(struct tab_table *t, cn->num, 8, 0); if ( cn->next ) - cn = cn->next; + cn = cn->next; } @@ -1253,7 +1325,7 @@ show_descriptives(struct variable **dependent_var, n_rows = n_dep_var * n_stat_rows * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 5; + heading_columns = 5; } else { @@ -1353,7 +1425,7 @@ show_descriptives(struct variable **dependent_var, ); populate_descriptives(tbl, heading_columns - 2, - row, &(*fs)->m[i]); + row, &(*fs)->m[i]); count++ ; fs++; @@ -1377,13 +1449,6 @@ show_descriptives(struct variable **dependent_var, - - - - - - - /* Fill in the descriptives data */ void populate_descriptives(struct tab_table *tbl, int col, int row, @@ -1465,12 +1530,14 @@ populate_descriptives(struct tab_table *tbl, int col, int row, assert(p); + tab_float (tbl, col + 2, row + 4, TAB_CENTER, p->v, 8, 2); } + tab_text (tbl, col, row + 5, @@ -1602,6 +1669,133 @@ populate_descriptives(struct tab_table *tbl, int col, int row, } + +void +box_plot_variables(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id) +{ + + int i; + struct factor_statistics **fs ; + + if ( ! fctr ) + { + box_plot_group(fctr, vars, n_vars, id); + return; + } + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + struct chart *ch; + + ch = chart_create(); + + const char *s = factor_to_string(fctr, *fs, 0 ); + + chart_write_title(ch, s); + + for ( i = 0 ; i < n_vars ; ++i ) + { + y_max = max(y_max, (*fs)->m[i].max); + y_min = min(y_min, (*fs)->m[i].min); + } + + boxplot_draw_yscale(ch, y_max, y_min); + + for ( i = 0 ; i < n_vars ; ++i ) + { + + const double box_width = (ch->data_right - ch->data_left) + / (n_vars * 2.0 ) ; + + const double box_centre = ( i * 2 + 1) * box_width + + ch->data_left; + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &(*fs)->m[i], + var_to_string(vars[i])); + + + } + + chart_submit(ch); + + } +} + + + +/* Do a box plot, grouping all factors into one plot ; + each dependent variable has its own plot. +*/ +void +box_plot_group(const struct factor *fctr, + const struct variable **vars, + int n_vars, + const struct variable *id UNUSED) +{ + + int i; + + for ( i = 0 ; i < n_vars ; ++i ) + { + struct factor_statistics **fs ; + struct chart *ch; + + ch = chart_create(); + + boxplot_draw_yscale(ch, totals[i].max, totals[i].min); + + if ( fctr ) + { + int n_factors = 0; + int f=0; + for ( fs = fctr->fs ; *fs ; ++fs ) + ++n_factors; + + chart_write_title(ch, _("Boxplot of %s vs. %s"), + var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) ); + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + + const char *s = factor_to_string_concise(fctr, *fs); + + const double box_width = (ch->data_right - ch->data_left) + / (n_factors * 2.0 ) ; + + const double box_centre = ( f++ * 2 + 1) * box_width + + ch->data_left; + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &(*fs)->m[i], + s); + } + } + else if ( ch ) + { + const double box_width = (ch->data_right - ch->data_left) / 3.0; + const double box_centre = (ch->data_right + ch->data_left) / 2.0; + + chart_write_title(ch, _("Boxplot")); + + boxplot_draw_boxplot(ch, + box_centre, box_width, + &totals[i], + var_to_string(vars[i]) ); + + } + + chart_submit(ch); + } +} + + /* Plot the normal and detrended normal plots for m Label the plots with factorname */ void @@ -1611,10 +1805,10 @@ np_plot(const struct metrics *m, const char *factorname) double yfirst=0, ylast=0; /* Normal Plot */ - struct chart np_chart; + struct chart *np_chart; /* Detrended Normal Plot */ - struct chart dnp_chart; + struct chart *dnp_chart; /* The slope and intercept of the ideal normal probability line */ const double slope = 1.0 / m->stddev; @@ -1624,16 +1818,21 @@ np_plot(const struct metrics *m, const char *factorname) if ( m->n_data == 0 ) return ; - chart_initialise(&np_chart); - chart_write_title(&np_chart, _("Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(&np_chart, _("Observed Value")); - chart_write_ylabel(&np_chart, _("Expected Normal")); + np_chart = chart_create(); + dnp_chart = chart_create(); - chart_initialise(&dnp_chart); - chart_write_title(&dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + if ( !np_chart || ! dnp_chart ) + return ; + + chart_write_title(np_chart, _("Normal Q-Q Plot of %s"), factorname); + chart_write_xlabel(np_chart, _("Observed Value")); + chart_write_ylabel(np_chart, _("Expected Normal")); + + + chart_write_title(dnp_chart, _("Detrended Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel(&dnp_chart, _("Observed Value")); - chart_write_ylabel(&dnp_chart, _("Dev from Normal")); + chart_write_xlabel(dnp_chart, _("Observed Value")); + chart_write_ylabel(dnp_chart, _("Dev from Normal")); yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); @@ -1646,46 +1845,45 @@ np_plot(const struct metrics *m, const char *factorname) double x_upper = max(m->max, (ylast - intercept) / slope) ; double slack = (x_upper - x_lower) * 0.05 ; - chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, 5); + chart_write_xscale(np_chart, x_lower - slack, x_upper + slack, 5); - chart_write_xscale(&dnp_chart, m->min, m->max, 5); + chart_write_xscale(dnp_chart, m->min, m->max, 5); } - chart_write_yscale(&np_chart, yfirst, ylast, 5); + chart_write_yscale(np_chart, yfirst, ylast, 5); { - /* We have to cache the detrended data, beacause we need to - find its limits before we can plot it */ - double *d_data; - d_data = xmalloc (m->n_data * sizeof(double)); - double d_max = -DBL_MAX; - double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) - { - const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); + /* We have to cache the detrended data, beacause we need to + find its limits before we can plot it */ + double *d_data; + d_data = xmalloc (m->n_data * sizeof(double)); + double d_max = -DBL_MAX; + double d_min = DBL_MAX; + for ( i = 0 ; i < m->n_data; ++i ) + { + const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); - chart_datum(&np_chart, 0, m->wvp[i]->v.f, ns); + chart_datum(np_chart, 0, m->wvp[i]->v.f, ns); - d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; + d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; - if ( d_data[i] < d_min ) d_min = d_data[i]; - if ( d_data[i] > d_max ) d_max = d_data[i]; - } - chart_write_yscale(&dnp_chart, d_min, d_max, 5); + if ( d_data[i] < d_min ) d_min = d_data[i]; + if ( d_data[i] > d_max ) d_max = d_data[i]; + } + chart_write_yscale(dnp_chart, d_min, d_max, 5); - for ( i = 0 ; i < m->n_data; ++i ) - chart_datum(&dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); + for ( i = 0 ; i < m->n_data; ++i ) + chart_datum(dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); - free(d_data); + free(d_data); } - chart_line(&np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); - chart_line(&dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); - - chart_finalise(&np_chart); - chart_finalise(&dnp_chart); + chart_line(np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_line(dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + chart_submit(np_chart); + chart_submit(dnp_chart); } @@ -1694,8 +1892,8 @@ np_plot(const struct metrics *m, const char *factorname) /* Show the percentiles */ void show_percentiles(struct variable **dependent_var, - int n_dep_var, - struct factor *fctr) + int n_dep_var, + struct factor *fctr) { struct tab_table *tbl; int i; @@ -1720,7 +1918,7 @@ show_percentiles(struct variable **dependent_var, ptiles = (*fs)->m[0].ptile_hash; if ( fctr->indep_var[1] ) - n_heading_columns = 4; + n_heading_columns = 4; } else { @@ -1862,7 +2060,7 @@ show_percentiles(struct variable **dependent_var, populate_percentiles(tbl, n_heading_columns - 1, - row, &(*fs)->m[i]); + row, &(*fs)->m[i]); count++ ; @@ -1874,8 +2072,8 @@ show_percentiles(struct variable **dependent_var, else { populate_percentiles(tbl, n_heading_columns - 1, - i * n_stat_rows * n_factors + n_heading_rows, - &totals[i]); + i * n_stat_rows * n_factors + n_heading_rows, + &totals[i]); } @@ -1920,17 +2118,17 @@ populate_percentiles(struct tab_table *tbl, int col, int row, if ( (*p)->p == 25 ) tab_float(tbl, col + i + 1 , row + 1, TAB_CENTER, - m->hinges[0], 8, 2); + m->hinge[0], 8, 2); if ( (*p)->p == 50 ) tab_float(tbl, col + i + 1 , row + 1, TAB_CENTER, - m->hinges[1], 8, 2); + m->hinge[1], 8, 2); if ( (*p)->p == 75 ) tab_float(tbl, col + i + 1 , row + 1, TAB_CENTER, - m->hinges[2], 8, 2); + m->hinge[2], 8, 2); i++; @@ -1938,8 +2136,69 @@ populate_percentiles(struct tab_table *tbl, int col, int row, p++; } +} + + + +const char * +factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var) +{ + + static char buf1[100]; + char buf2[100]; + strcpy(buf1,""); + if (var) + sprintf(buf1, "%s (",var_to_string(var) ); + + snprintf(buf2, 100, "%s = %s", + var_to_string(fctr->indep_var[0]), + value_to_string(&fs->id[0],fctr->indep_var[0])); + + strcat(buf1, buf2); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, "; %s = %s)", + var_to_string(fctr->indep_var[1]), + value_to_string(&fs->id[1], + fctr->indep_var[1])); + strcat(buf1, buf2); + } + else + { + if ( var ) + strcat(buf1, ")"); + } + + return buf1; } + + +const char * +factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs) + +{ + + static char buf[100]; + + char buf2[100]; + + snprintf(buf, 100, "%s", + value_to_string(&fs->id[0], fctr->indep_var[0])); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) ); + strcat(buf, buf2); + } + + + return buf; +}