X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fexamine.q;h=d426ecca1e549c5a83dd4d512bfd9b73bcbf0ef9;hb=5156fa5a8323a16f6b4bbc8950221cdc1d0e023d;hp=697176f3269f23cb6af8c2e9b67efc023896a823;hpb=cd7b08ad5e6bbec75e778acf008f84e1eb548154;p=pspp-builds.git diff --git a/src/examine.q b/src/examine.q index 697176f3..d426ecca 100644 --- a/src/examine.q +++ b/src/examine.q @@ -41,6 +41,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include "hash.h" #include "casefile.h" #include "factor_stats.h" +#include "moments.h" + /* (headers) */ #include "chart.h" @@ -80,7 +82,7 @@ struct factor /* Hash table of factor stats indexed by 2 values */ struct hsh_table *fstats; - /* The hash table after it's been crunched */ + /* The hash table after it has been crunched */ struct factor_statistics **fs; struct factor *next; @@ -92,48 +94,6 @@ static struct factor *factors=0; static struct metrics *totals=0; -void -print_factors(void) -{ - struct factor *f = factors; - - while (f) - { - struct factor_statistics **fs = f->fs; - - printf("Factor: %s BY %s\n", - var_to_string(f->indep_var[0]), - var_to_string(f->indep_var[1]) ); - - - printf("Contains %d entries\n", hsh_count(f->fstats)); - - - while (*fs) - { - printf("Factor %g; %g\n", (*fs)->id[0].f, (*fs)->id[1].f); - - /* - printf("Factor %s; %s\n", - value_to_string(&(*fs)->id[0], f->indep_var[0]), - value_to_string(&(*fs)->id[1], f->indep_var[1])); - */ - - - printf("Sum is %g; ",(*fs)->m[0].sum); - printf("N is %g; ",(*fs)->m[0].n); - printf("Mean is %g\n",(*fs)->m[0].mean); - - fs++ ; - } - - f = f->next; - } - - -} - - /* Parse the clause specifying the factors */ static int examine_parse_independent_vars(struct cmd_examine *cmd); @@ -157,7 +117,6 @@ void np_plot(const struct metrics *m, const char *factorname); - /* Per Split function */ static void run_examine(const struct casefile *cf, void *cmd_); @@ -193,6 +152,9 @@ cmd_examine(void) multipass_procedure_with_splits (run_examine, &cmd); + if ( totals ) + free(totals); + return CMD_SUCCESS; }; @@ -205,7 +167,7 @@ output_examine(void) struct factor *fctr; /* Show totals if appropriate */ - if ( ! cmd.sbc_nototal ) + if ( ! cmd.sbc_nototal || factors == 0 ) { show_summary(dependent_vars, n_dependent_vars, 0); @@ -221,13 +183,29 @@ output_examine(void) if ( cmd.sbc_plot) { + int v; if ( cmd.a_plot[XMN_PLT_NPPLOT] ) { - int v; + for ( v = 0 ; v < n_dependent_vars; ++v ) + np_plot(&totals[v], var_to_string(dependent_vars[v])); + } + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + { for ( v = 0 ; v < n_dependent_vars; ++v ) - np_plot(&totals[v], var_to_string(dependent_vars[v])); + { + struct normal_curve normal; + + normal.N = totals[v].n; + normal.mean = totals[v].mean; + normal.stddev = totals[v].stddev; + + histogram_plot(totals[v].histogram, + var_to_string(dependent_vars[v]), + &normal, 0); + } } + } @@ -251,47 +229,59 @@ output_examine(void) if ( cmd.sbc_plot) { - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + int v; + + struct factor_statistics **fs = fctr->fs ; + + for ( v = 0 ; v < n_dependent_vars; ++v ) { - int v; - for ( v = 0 ; v < n_dependent_vars; ++ v) + + for ( fs = fctr->fs ; *fs ; ++fs ) { - - struct factor_statistics **fs = fctr->fs ; - for ( fs = fctr->fs ; *fs ; ++fs ) - { - char buf1[100]; - char buf2[100]; - sprintf(buf1, "%s (", - var_to_string(dependent_vars[v])); + char buf1[100]; + char buf2[100]; + sprintf(buf1, "%s (", + var_to_string(dependent_vars[v])); - sprintf(buf2, "%s = %s", - var_to_string(fctr->indep_var[0]), - value_to_string(&(*fs)->id[0],fctr->indep_var[0])); + snprintf(buf2, 100, "%s = %s", + var_to_string(fctr->indep_var[0]), + value_to_string(&(*fs)->id[0],fctr->indep_var[0])); + strcat(buf1, buf2); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, "; %s = %s)", + var_to_string(fctr->indep_var[1]), + value_to_string(&(*fs)->id[1], + fctr->indep_var[1])); strcat(buf1, buf2); + } + else + { + strcat(buf1, ")"); + } - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, "; %s = %s)", - var_to_string(fctr->indep_var[1]), - value_to_string(&(*fs)->id[1], - fctr->indep_var[1])); - strcat(buf1, buf2); - } - else - { - strcat(buf1, ")"); - } - - np_plot(&(*fs)->m[v],buf1); + if ( cmd.a_plot[XMN_PLT_NPPLOT] ) + np_plot(&(*fs)->m[v],buf1); + + + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) + { + struct normal_curve normal; + normal.N = (*fs)->m[v].n; + normal.mean = (*fs)->m[v].mean; + normal.stddev = (*fs)->m[v].stddev; + + histogram_plot((*fs)->m[v].histogram, + buf1, &normal, 0); } - } + } /* for ( fs .... */ + + } /* for ( v = 0 ..... */ - } } fctr = fctr->next; @@ -638,12 +628,11 @@ run_examine(const struct casefile *cf, void *cmd_ ) fctr = fctr->next; } - /* - print_factors(); - */ - output_examine(); + for ( v = 0 ; v < n_dependent_vars ; ++v ) + hsh_destroy(totals[v].ordered_data); + } @@ -1021,7 +1010,6 @@ populate_extremes(struct tab_table *t, int extremity; int idx=0; - const int n_data = hsh_count(m->ordered_data); tab_text(t, col, row, TAB_RIGHT | TAT_TITLE , @@ -1053,10 +1041,10 @@ populate_extremes(struct tab_table *t, /* Lowest */ - for (idx = 0, extremity = 0; extremity < n && idx < n_data ; ++idx ) + for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx ) { int j; - const struct weighted_value *wv = &m->wv[idx]; + const struct weighted_value *wv = m->wvp[idx]; struct case_node *cn = wv->case_nos; @@ -1083,10 +1071,10 @@ populate_extremes(struct tab_table *t, /* Highest */ - for (idx = n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) + for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) { int j; - const struct weighted_value *wv = &m->wv[idx]; + const struct weighted_value *wv = m->wvp[idx]; struct case_node *cn = wv->case_nos; for (j = 0 ; j < wv->w ; ++j ) @@ -1288,7 +1276,7 @@ populate_descriptives(struct tab_table *tbl, int col, int row, tab_float (tbl, col + 3, row, TAB_CENTER, - m->stderr, + m->se_mean, 8,3); @@ -1306,7 +1294,7 @@ populate_descriptives(struct tab_table *tbl, int col, int row, tab_float (tbl, col + 2, row + 1, TAB_CENTER, - m->mean - t * m->stderr, + m->mean - t * m->se_mean, 8,3); tab_text (tbl, col + 1, @@ -1318,7 +1306,7 @@ populate_descriptives(struct tab_table *tbl, int col, int row, tab_float (tbl, col + 2, row + 2, TAB_CENTER, - m->mean + t * m->stderr, + m->mean + t * m->se_mean, 8,3); tab_text (tbl, col, @@ -1407,14 +1395,42 @@ populate_descriptives(struct tab_table *tbl, int col, int row, TAB_LEFT | TAT_TITLE, _("Skewness")); + + tab_float (tbl, col + 2, + row + 11, + TAB_CENTER, + m->skewness, + 8,3); + + /* stderr of skewness */ + tab_float (tbl, col + 3, + row + 11, + TAB_CENTER, + calc_seskew(m->n), + 8,3); + + tab_text (tbl, col, row + 12, TAB_LEFT | TAT_TITLE, _("Kurtosis")); -} + tab_float (tbl, col + 2, + row + 12, + TAB_CENTER, + m->kurtosis, + 8,3); + /* stderr of kurtosis */ + tab_float (tbl, col + 3, + row + 12, + TAB_CENTER, + calc_sekurt(m->n), + 8,3); + + +} /* Plot the normal and detrended normal plots for m @@ -1431,15 +1447,12 @@ np_plot(const struct metrics *m, const char *factorname) /* Detrended Normal Plot */ struct chart dnp_chart; - const struct weighted_value *wv = m->wv; - const int n_data = hsh_count(m->ordered_data) ; - /* The slope and intercept of the ideal normal probability line */ const double slope = 1.0 / m->stddev; const double intercept = - m->mean / m->stddev; /* Cowardly refuse to plot an empty data set */ - if ( n_data == 0 ) + if ( m->n_data == 0 ) return ; chart_initialise(&np_chart); @@ -1453,8 +1466,9 @@ np_plot(const struct metrics *m, const char *factorname) chart_write_xlabel(&dnp_chart, _("Observed Value")); chart_write_ylabel(&dnp_chart, _("Dev from Normal")); - yfirst = gsl_cdf_ugaussian_Pinv (wv[0].rank / ( m->n + 1)); - ylast = gsl_cdf_ugaussian_Pinv (wv[n_data-1].rank / ( m->n + 1)); + yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); + ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); + { /* Need to make sure that both the scatter plot and the ideal fit into the @@ -1463,42 +1477,36 @@ np_plot(const struct metrics *m, const char *factorname) double x_upper = max(m->max, (ylast - intercept) / slope) ; double slack = (x_upper - x_lower) * 0.05 ; - chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, - chart_rounded_tick((m->max - m->min) / 5.0)); + chart_write_xscale(&np_chart, x_lower - slack, x_upper + slack, 5); - - chart_write_xscale(&dnp_chart, m->min, m->max, - chart_rounded_tick((m->max - m->min) / 5.0)); + chart_write_xscale(&dnp_chart, m->min, m->max, 5); } - chart_write_yscale(&np_chart, yfirst, ylast, - chart_rounded_tick((ylast - yfirst)/5.0) ); + chart_write_yscale(&np_chart, yfirst, ylast, 5); { /* We have to cache the detrended data, beacause we need to find its limits before we can plot it */ double *d_data; - d_data = xmalloc (n_data * sizeof(double)); + d_data = xmalloc (m->n_data * sizeof(double)); double d_max = -DBL_MAX; double d_min = DBL_MAX; - for ( i = 0 ; i < n_data; ++i ) + for ( i = 0 ; i < m->n_data; ++i ) { - const double ns = gsl_cdf_ugaussian_Pinv (wv[i].rank / ( m->n + 1)); + const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); - chart_datum(&np_chart, 0, wv[i].v.f, ns); + chart_datum(&np_chart, 0, m->wvp[i]->v.f, ns); - d_data[i] = (wv[i].v.f - m->mean) / m->stddev - ns; + d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; if ( d_data[i] < d_min ) d_min = d_data[i]; if ( d_data[i] > d_max ) d_max = d_data[i]; } + chart_write_yscale(&dnp_chart, d_min, d_max, 5); - chart_write_yscale(&dnp_chart, d_min, d_max, - chart_rounded_tick((d_max - d_min) / 5.0)); - - for ( i = 0 ; i < n_data; ++i ) - chart_datum(&dnp_chart, 0, wv[i].v.f, d_data[i]); + for ( i = 0 ; i < m->n_data; ++i ) + chart_datum(&dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); free(d_data); }