X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.q;h=090d5835db9c10db80d9451f4902a935a253ee84;hb=52c400a0ac67e4a43c5cae5a0d40e1452326240c;hp=e39dc8d3f74d200046d3c4e287ced638703e88c5;hpb=ccbfcb82b48b6faf8cd8f8f5a635c9f53f05fd98;p=pspp-builds.git diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index e39dc8d3..090d5835 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -43,16 +42,14 @@ #include #include #include -#include #include #include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include #include "minmax.h" #include "xalloc.h" @@ -62,9 +59,7 @@ #define N_(msgid) msgid /* (headers) */ -#include #include -#include #include /* (specification) @@ -105,11 +100,11 @@ struct factor_metrics struct percentile **ptl; size_t n_ptiles; - struct statistic *tukey_hinges; - struct statistic *box_whisker; - struct statistic *trimmed_mean; - struct statistic *histogram; - struct order_stats *np; + struct tukey_hinges *tukey_hinges; + struct box_whisker *box_whisker; + struct trimmed_mean *trimmed_mean; + struct histogram *histogram; + struct np *np; /* Three quartiles indexing into PTL */ struct percentile **quartiles; @@ -180,12 +175,12 @@ factor_destroy (struct xfactor *fctr) moments1_destroy (result->metrics[v].moments); extrema_destroy (result->metrics[v].minima); extrema_destroy (result->metrics[v].maxima); - statistic_destroy (result->metrics[v].trimmed_mean); - statistic_destroy (result->metrics[v].tukey_hinges); - statistic_destroy (result->metrics[v].box_whisker); - statistic_destroy (result->metrics[v].histogram); + statistic_destroy (&result->metrics[v].trimmed_mean->parent.parent); + statistic_destroy (&result->metrics[v].tukey_hinges->parent.parent); + statistic_destroy (&result->metrics[v].box_whisker->parent.parent); + statistic_destroy (&result->metrics[v].histogram->parent); for (i = 0 ; i < result->metrics[v].n_ptiles; ++i) - statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]); + statistic_destroy (&result->metrics[v].ptl[i]->parent.parent); free (result->metrics[v].ptl); free (result->metrics[v].quartiles); casereader_destroy (result->metrics[v].up_reader); @@ -321,150 +316,6 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) }; -struct np_plot_chart - { - struct chart chart; - char *label; - struct casereader *data; - - /* Copied directly from struct np. */ - double y_min, y_max; - double dns_min, dns_max; - - /* Calculated. */ - double slope, intercept; - double y_first, y_last; - double x_lower, x_upper; - double slack; - }; - -static const struct chart_class np_plot_chart_class; -static const struct chart_class dnp_plot_chart_class; - -/* Plot the normal and detrended normal plots for RESULT. - Label the plots with LABEL */ -static void -np_plot (struct np *np, const char *label) -{ - struct np_plot_chart *np_plot, *dnp_plot; - - if ( np->n < 1.0 ) - { - msg (MW, _("Not creating plot because data set is empty.")); - return ; - } - - np_plot = xmalloc (sizeof *np_plot); - chart_init (&np_plot->chart, &np_plot_chart_class); - np_plot->label = xstrdup (label); - np_plot->data = casewriter_make_reader (np->writer); - np_plot->y_min = np->y_min; - np_plot->y_max = np->y_max; - np_plot->dns_min = np->dns_min; - np_plot->dns_max = np->dns_max; - - /* Slope and intercept of the ideal normal probability line. */ - np_plot->slope = 1.0 / np->stddev; - np_plot->intercept = -np->mean / np->stddev; - - np_plot->y_first = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1)); - np_plot->y_last = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1)); - - /* Need to make sure that both the scatter plot and the ideal fit into the - plot */ - np_plot->x_lower = MIN ( - np->y_min, (np_plot->y_first - np_plot->intercept) / np_plot->slope); - np_plot->x_upper = MAX ( - np->y_max, (np_plot->y_last - np_plot->intercept) / np_plot->slope) ; - np_plot->slack = (np_plot->x_upper - np_plot->x_lower) * 0.05 ; - - dnp_plot = xmemdup (np_plot, sizeof *np_plot); - chart_init (&dnp_plot->chart, &dnp_plot_chart_class); - dnp_plot->label = xstrdup (dnp_plot->label); - dnp_plot->data = casereader_clone (dnp_plot->data); - - chart_submit (&np_plot->chart); - chart_submit (&dnp_plot->chart); -} - -static void -np_plot_chart_draw (const struct chart *chart, plPlotter *lp) -{ - const struct np_plot_chart *plot = (struct np_plot_chart *) chart; - struct chart_geometry geom; - struct casereader *data; - struct ccase *c; - - chart_geometry_init (lp, &geom); - chart_write_title (lp, &geom, _("Normal Q-Q Plot of %s"), plot->label); - chart_write_xlabel (lp, &geom, _("Observed Value")); - chart_write_ylabel (lp, &geom, _("Expected Normal")); - chart_write_xscale (lp, &geom, - plot->x_lower - plot->slack, - plot->x_upper + plot->slack, 5); - chart_write_yscale (lp, &geom, plot->y_first, plot->y_last, 5); - - data = casereader_clone (plot->data); - for (; (c = casereader_read (data)) != NULL; case_unref (c)) - chart_datum (lp, &geom, 0, - case_data_idx (c, NP_IDX_Y)->f, - case_data_idx (c, NP_IDX_NS)->f); - casereader_destroy (data); - - chart_line (lp, &geom, plot->slope, plot->intercept, - plot->y_first, plot->y_last, CHART_DIM_Y); - - chart_geometry_free (lp); -} - -static void -dnp_plot_chart_draw (const struct chart *chart, plPlotter *lp) -{ - const struct np_plot_chart *plot = (struct np_plot_chart *) chart; - struct chart_geometry geom; - struct casereader *data; - struct ccase *c; - - chart_geometry_init (lp, &geom); - chart_write_title (lp, &geom, _("Detrended Normal Q-Q Plot of %s"), - plot->label); - chart_write_xlabel (lp, &geom, _("Observed Value")); - chart_write_ylabel (lp, &geom, _("Dev from Normal")); - chart_write_xscale (lp, &geom, plot->y_min, plot->y_max, 5); - chart_write_yscale (lp, &geom, plot->dns_min, plot->dns_max, 5); - - data = casereader_clone (plot->data); - for (; (c = casereader_read (data)) != NULL; case_unref (c)) - chart_datum (lp, &geom, 0, case_data_idx (c, NP_IDX_Y)->f, - case_data_idx (c, NP_IDX_DNS)->f); - casereader_destroy (data); - - chart_line (lp, &geom, 0, 0, plot->y_min, plot->y_max, CHART_DIM_X); - - chart_geometry_free (lp); -} - -static void -np_plot_chart_destroy (struct chart *chart) -{ - struct np_plot_chart *plot = (struct np_plot_chart *) chart; - - casereader_destroy (plot->data); - free (plot->label); - free (plot); -} - -static const struct chart_class np_plot_chart_class = - { - np_plot_chart_draw, - np_plot_chart_destroy - }; - -static const struct chart_class dnp_plot_chart_class = - { - dnp_plot_chart_draw, - np_plot_chart_destroy - }; static void @@ -481,20 +332,38 @@ show_npplot (const struct variable **dependent_var, ll != ll_null (&fctr->result_list); ll = ll_next (ll)) { - struct string str; + struct string label; const struct factor_result *result = ll_data (ll, struct factor_result, ll); + struct chart_item *npp, *dnpp; + struct casereader *reader; + struct np *np; - ds_init_empty (&str); - ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + ds_init_empty (&label); + ds_put_format (&label, "%s ", var_get_name (dependent_var[v])); + factor_to_string (fctr, result, &label); - factor_to_string (fctr, result, &str); + np = result->metrics[v].np; + reader = casewriter_make_reader (np->writer); + npp = np_plot_create (np, reader, ds_cstr (&label)); + dnpp = dnp_plot_create (np, reader, ds_cstr (&label)); - np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str)); + ds_destroy (&label); - statistic_destroy ((struct statistic *)result->metrics[v].np); + if (npp == NULL || dnpp == NULL) + { + msg (MW, _("Not creating NP plot because data set is empty.")); + chart_item_unref (npp); + chart_item_unref (dnpp); + } + else + { + chart_item_submit (npp); + chart_item_submit (dnpp); + } - ds_destroy (&str); + statistic_destroy (&np->parent.parent); + casereader_destroy (reader); } } } @@ -520,7 +389,7 @@ show_histogram (const struct variable **dependent_var, struct histogram *histogram; double mean, var, n; - histogram = (struct histogram *) result->metrics[v].histogram; + histogram = result->metrics[v].histogram; if (histogram == NULL) { /* Probably all values are SYSMIS. */ @@ -532,10 +401,11 @@ show_histogram (const struct variable **dependent_var, factor_to_string (fctr, result, &str); - moments1_calculate ((struct moments1 *) result->metrics[v].moments, + moments1_calculate (result->metrics[v].moments, &n, &mean, &var, NULL, NULL); - chart_submit (histogram_chart_create (histogram, ds_cstr (&str), - n, mean, sqrt (var), false)); + chart_item_submit (histogram_chart_create (histogram->gsl_hist, + ds_cstr (&str), n, mean, + sqrt (var), false)); ds_destroy (&str); } @@ -594,14 +464,12 @@ show_boxplot_groups (const struct variable **dependent_var, struct factor_metrics *metrics = &result->metrics[v]; struct string str = DS_EMPTY_INITIALIZER; factor_to_string_concise (fctr, result, &str); - boxplot_add_box (boxplot, - (struct box_whisker *) metrics->box_whisker, - ds_cstr (&str)); + boxplot_add_box (boxplot, metrics->box_whisker, ds_cstr (&str)); metrics->box_whisker = NULL; ds_destroy (&str); } - chart_submit (boxplot_get_chart (boxplot)); + boxplot_submit (boxplot); } } @@ -644,13 +512,12 @@ show_boxplot_variables (const struct variable **dependent_var, for (v = 0; v < n_dep_var; ++v) { struct factor_metrics *metrics = &result->metrics[v]; - boxplot_add_box (boxplot, - (struct box_whisker *) metrics->box_whisker, + boxplot_add_box (boxplot, metrics->box_whisker, var_get_name (dependent_var[v])); metrics->box_whisker = NULL; } - chart_submit (boxplot_get_chart (boxplot)); + boxplot_submit (boxplot); } } @@ -1012,15 +879,13 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, metric->n_ptiles = percentile_list.n_data; - metric->ptl = xcalloc (metric->n_ptiles, - sizeof (struct percentile *)); + metric->ptl = xcalloc (metric->n_ptiles, sizeof *metric->ptl); metric->quartiles = xcalloc (3, sizeof (*metric->quartiles)); for (i = 0 ; i < metric->n_ptiles; ++i) { - metric->ptl[i] = (struct percentile *) - percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); + metric->ptl[i] = percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); if ( percentile_list.data[i] == 25) metric->quartiles[0] = metric->ptl[i]; @@ -1041,18 +906,18 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, n_os ++; } - os = xcalloc (sizeof (struct order_stats *), n_os); + os = xcalloc (n_os, sizeof *os); for (i = 0 ; i < metric->n_ptiles ; ++i ) { - os[i] = (struct order_stats *) metric->ptl[i]; + os[i] = &metric->ptl[i]->parent; } - os[i] = (struct order_stats *) metric->tukey_hinges; - os[i+1] = (struct order_stats *) metric->trimmed_mean; + os[i] = &metric->tukey_hinges->parent; + os[i+1] = &metric->trimmed_mean->parent; if (cmd->a_plot[XMN_PLT_NPPLOT]) - os[i+2] = metric->np; + os[i+2] = &metric->np->parent; order_stats_accumulate (os, n_os, casereader_clone (metric->up_reader), @@ -1103,7 +968,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, { struct factor_metrics *metric = &result->metrics[v]; if ( metric->histogram) - histogram_add ((struct histogram *) metric->histogram, + histogram_add (metric->histogram, case_data (c, dependent_vars[v])->f, weight); } case_unref (c); @@ -1119,13 +984,13 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, struct factor_metrics *metric = &result->metrics[v]; int n_vals = caseproto_get_n_widths (casereader_get_proto ( metric->up_reader)); + struct order_stats *os; metric->box_whisker = - box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges, - cmd->v_id, n_vals - 1); + box_whisker_create ( metric->tukey_hinges, cmd->v_id, n_vals - 1); - order_stats_accumulate ((struct order_stats **) &metric->box_whisker, - 1, + os = &metric->box_whisker->parent; + order_stats_accumulate ( &os, 1, casereader_clone (metric->up_reader), wv, dependent_vars[v], MV_ANY); } @@ -1265,11 +1130,9 @@ show_summary (const struct variable **dependent_var, int n_dep_var, n_cols = heading_columns + 6; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1428,10 +1291,10 @@ show_summary (const struct variable **dependent_var, int n_dep_var, TAB_LEFT, n, wfmt); - tab_text (tbl, heading_columns + 1, - heading_rows + j + v * ll_count (&fctr->result_list), - TAB_RIGHT | TAT_PRINTF, - "%g%%", n * 100.0 / result->metrics[v].n); + tab_text_format (tbl, heading_columns + 1, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", n * 100.0 / result->metrics[v].n); /* Total Missing */ tab_double (tbl, heading_columns + 2, @@ -1440,12 +1303,12 @@ show_summary (const struct variable **dependent_var, int n_dep_var, result->metrics[v].n - n, wfmt); - tab_text (tbl, heading_columns + 3, - heading_rows + j + v * ll_count (&fctr->result_list), - TAB_RIGHT | TAT_PRINTF, - "%g%%", - (result->metrics[v].n - n) * 100.0 / result->metrics[v].n - ); + tab_text_format (tbl, heading_columns + 3, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", + (result->metrics[v].n - n) * 100.0 / result->metrics[v].n + ); /* Total Valid + Missing */ tab_double (tbl, heading_columns + 4, @@ -1454,12 +1317,12 @@ show_summary (const struct variable **dependent_var, int n_dep_var, result->metrics[v].n, wfmt); - tab_text (tbl, heading_columns + 5, - heading_rows + j + v * ll_count (&fctr->result_list), - TAB_RIGHT | TAT_PRINTF, - "%g%%", - (result->metrics[v].n) * 100.0 / result->metrics[v].n - ); + tab_text_format (tbl, heading_columns + 5, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", + ((result->metrics[v].n) * 100.0 + / result->metrics[v].n)); ++j; } @@ -1502,11 +1365,9 @@ show_descriptives (const struct variable **dependent_var, n_cols = heading_columns + 2; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1581,11 +1442,11 @@ show_descriptives (const struct variable **dependent_var, TAB_LEFT, _("Mean")); - tab_text (tbl, n_cols - 4, - heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, - TAB_LEFT | TAT_PRINTF, - _("%g%% Confidence Interval for Mean"), - cmd.n_cinterval[0]); + tab_text_format (tbl, n_cols - 4, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("%g%% Confidence Interval for Mean"), + cmd.n_cinterval[0]); tab_text (tbl, n_cols - 3, heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, @@ -1598,9 +1459,8 @@ show_descriptives (const struct variable **dependent_var, _("Upper Bound")); tab_text (tbl, n_cols - 4, - heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, - TAB_LEFT | TAT_PRINTF, - _("5%% Trimmed Mean")); + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, _("5% Trimmed Mean")); tab_text (tbl, n_cols - 4, heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, @@ -1682,7 +1542,7 @@ show_descriptives (const struct variable **dependent_var, tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, TAB_CENTER, - trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), + trimmed_mean_calculate (result->metrics[v].trimmed_mean), NULL); @@ -1815,11 +1675,9 @@ show_extremes (const struct variable **dependent_var, n_cols = heading_columns + 2; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1871,15 +1729,15 @@ show_extremes (const struct variable **dependent_var, for ( e = 1; e <= cmd.st_n; ++e ) { - tab_text (tbl, n_cols - 3, - heading_rows + row_var_start + row_result_start + e - 1, - TAB_RIGHT | TAT_PRINTF, - _("%d"), e); - - tab_text (tbl, n_cols - 3, - heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, - TAB_RIGHT | TAT_PRINTF, - _("%d"), e); + tab_text_format (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + e - 1, + TAB_RIGHT, + "%d", e); + + tab_text_format (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, + TAB_RIGHT, + "%d", e); } @@ -1910,7 +1768,6 @@ show_extremes (const struct variable **dependent_var, min_ll = ll_next (min_ll); } - max_ll = ll_head (extrema_list (result->metrics[v].maxima)); for (e = 0; e < cmd.st_n;) { @@ -2020,11 +1877,9 @@ show_percentiles (const struct variable **dependent_var, n_cols = heading_columns + n_percentiles; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -2103,8 +1958,7 @@ show_percentiles (const struct variable **dependent_var, tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1); - tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges, - hinges); + tukey_hinges_calculate (result->metrics[v].tukey_hinges, hinges); for (j = 0; j < n_percentiles; ++j) { @@ -2143,11 +1997,10 @@ show_percentiles (const struct variable **dependent_var, for (i = 0 ; i < n_percentiles; ++i ) { - tab_text (tbl, n_cols - n_percentiles + i, 1, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, - _("%g"), - subc_list_double_at (&percentile_list, i) - ); + tab_text_format (tbl, n_cols - n_percentiles + i, 1, + TAB_CENTER | TAT_TITLE, + _("%g"), + subc_list_double_at (&percentile_list, i)); }