X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.q;h=50d9525b255d16f1eae307f95d4915a9235425e6;hb=81579d9e9f994fb2908f50af41c3eb033d216e58;hp=d17aebf97efa8b839c27731aa4d2d3a73d469371;hpb=d844266ecd4aebd32f55ab22d6ca4266d4a0c4e1;p=pspp-builds.git diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index d17aebf9..50d9525b 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc. + Copyright (C) 2004, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -17,54 +17,50 @@ #include #include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "xalloc.h" +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/casewriter.h" +#include "data/dictionary.h" +#include "data/procedure.h" +#include "data/subcase.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "language/command.h" +#include "language/dictionary/split-file.h" +#include "language/lexer/lexer.h" +#include "libpspp/compiler.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" +#include "math/box-whisker.h" +#include "math/extrema.h" +#include "math/histogram.h" +#include "math/moments.h" +#include "math/np.h" +#include "math/order-stats.h" +#include "math/percentiles.h" +#include "math/sort.h" +#include "math/trimmed-mean.h" +#include "math/tukey-hinges.h" +#include "output/chart-item.h" +#include "output/charts/boxplot.h" +#include "output/charts/np-plot.h" +#include "output/charts/plot-hist.h" +#include "output/tab.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) #define N_(msgid) msgid /* (headers) */ -#include -#include -#include -#include /* (specification) "EXAMINE" (xmn_): @@ -104,11 +100,11 @@ struct factor_metrics struct percentile **ptl; size_t n_ptiles; - struct statistic *tukey_hinges; - struct statistic *box_whisker; - struct statistic *trimmed_mean; - struct statistic *histogram; - struct order_stats *np; + struct tukey_hinges *tukey_hinges; + struct box_whisker *box_whisker; + struct trimmed_mean *trimmed_mean; + struct histogram *histogram; + struct np *np; /* Three quartiles indexing into PTL */ struct percentile **quartiles; @@ -179,12 +175,12 @@ factor_destroy (struct xfactor *fctr) moments1_destroy (result->metrics[v].moments); extrema_destroy (result->metrics[v].minima); extrema_destroy (result->metrics[v].maxima); - statistic_destroy (result->metrics[v].trimmed_mean); - statistic_destroy (result->metrics[v].tukey_hinges); - statistic_destroy (result->metrics[v].box_whisker); - statistic_destroy (result->metrics[v].histogram); + statistic_destroy (&result->metrics[v].trimmed_mean->parent.parent); + statistic_destroy (&result->metrics[v].tukey_hinges->parent.parent); + statistic_destroy (&result->metrics[v].box_whisker->parent.parent); + statistic_destroy (&result->metrics[v].histogram->parent); for (i = 0 ; i < result->metrics[v].n_ptiles; ++i) - statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]); + statistic_destroy (&result->metrics[v].ptl[i]->parent.parent); free (result->metrics[v].ptl); free (result->metrics[v].quartiles); casereader_destroy (result->metrics[v].up_reader); @@ -339,7 +335,7 @@ show_npplot (const struct variable **dependent_var, struct string label; const struct factor_result *result = ll_data (ll, struct factor_result, ll); - struct chart *npp, *dnpp; + struct chart_item *npp, *dnpp; struct casereader *reader; struct np *np; @@ -347,7 +343,7 @@ show_npplot (const struct variable **dependent_var, ds_put_format (&label, "%s ", var_get_name (dependent_var[v])); factor_to_string (fctr, result, &label); - np = (struct np *) result->metrics[v].np; + np = result->metrics[v].np; reader = casewriter_make_reader (np->writer); npp = np_plot_create (np, reader, ds_cstr (&label)); dnpp = dnp_plot_create (np, reader, ds_cstr (&label)); @@ -357,16 +353,17 @@ show_npplot (const struct variable **dependent_var, if (npp == NULL || dnpp == NULL) { msg (MW, _("Not creating NP plot because data set is empty.")); - chart_unref (npp); - chart_unref (dnpp); + chart_item_unref (npp); + chart_item_unref (dnpp); } else { - chart_submit (npp); - chart_submit (dnpp); + chart_item_submit (npp); + chart_item_submit (dnpp); } statistic_destroy (&np->parent.parent); + casereader_destroy (reader); } } } @@ -392,7 +389,7 @@ show_histogram (const struct variable **dependent_var, struct histogram *histogram; double mean, var, n; - histogram = (struct histogram *) result->metrics[v].histogram; + histogram = result->metrics[v].histogram; if (histogram == NULL) { /* Probably all values are SYSMIS. */ @@ -404,10 +401,11 @@ show_histogram (const struct variable **dependent_var, factor_to_string (fctr, result, &str); - moments1_calculate ((struct moments1 *) result->metrics[v].moments, + moments1_calculate (result->metrics[v].moments, &n, &mean, &var, NULL, NULL); - chart_submit (histogram_chart_create (histogram, ds_cstr (&str), - n, mean, sqrt (var), false)); + chart_item_submit (histogram_chart_create (histogram->gsl_hist, + ds_cstr (&str), n, mean, + sqrt (var), false)); ds_destroy (&str); } @@ -466,14 +464,12 @@ show_boxplot_groups (const struct variable **dependent_var, struct factor_metrics *metrics = &result->metrics[v]; struct string str = DS_EMPTY_INITIALIZER; factor_to_string_concise (fctr, result, &str); - boxplot_add_box (boxplot, - (struct box_whisker *) metrics->box_whisker, - ds_cstr (&str)); + boxplot_add_box (boxplot, metrics->box_whisker, ds_cstr (&str)); metrics->box_whisker = NULL; ds_destroy (&str); } - chart_submit (boxplot_get_chart (boxplot)); + boxplot_submit (boxplot); } } @@ -516,13 +512,12 @@ show_boxplot_variables (const struct variable **dependent_var, for (v = 0; v < n_dep_var; ++v) { struct factor_metrics *metrics = &result->metrics[v]; - boxplot_add_box (boxplot, - (struct box_whisker *) metrics->box_whisker, + boxplot_add_box (boxplot, metrics->box_whisker, var_get_name (dependent_var[v])); metrics->box_whisker = NULL; } - chart_submit (boxplot_get_chart (boxplot)); + boxplot_submit (boxplot); } } @@ -592,9 +587,9 @@ static int xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, struct cmd_examine *p UNUSED, void *aux UNUSED) { - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); - lex_match (lexer, '('); + lex_match (lexer, T_LPAREN); while ( lex_is_number (lexer) ) { @@ -602,11 +597,11 @@ xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, lex_get (lexer); - lex_match (lexer, ',') ; + lex_match (lexer, T_COMMA) ; } - lex_match (lexer, ')'); + lex_match (lexer, T_RPAREN); - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); if ( lex_match_id (lexer, "HAVERAGE")) percentile_algorithm = PC_HAVERAGE; @@ -678,9 +673,9 @@ xmn_custom_variables (struct lexer *lexer, struct dataset *ds, void *aux UNUSED) { const struct dictionary *dict = dataset_dict (ds); - lex_match (lexer, '='); + lex_match (lexer, T_EQUALS); - if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL) && lex_token (lexer) != T_ALL) { return 2; @@ -724,7 +719,7 @@ examine_parse_independent_vars (struct lexer *lexer, ll_init (&sf->result_list); if ( (lex_token (lexer) != T_ID || - dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL) && lex_token (lexer) != T_ALL) { free ( sf ) ; @@ -739,7 +734,7 @@ examine_parse_independent_vars (struct lexer *lexer, lex_match (lexer, T_BY); if ( (lex_token (lexer) != T_ID || - dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + dict_lookup_var (dict, lex_tokcstr (lexer)) == NULL) && lex_token (lexer) != T_ALL) { free (sf); @@ -753,9 +748,9 @@ examine_parse_independent_vars (struct lexer *lexer, else ll_push_tail (&factor_list, &sf->ll); - lex_match (lexer, ','); + lex_match (lexer, T_COMMA); - if ( lex_token (lexer) == '.' || lex_token (lexer) == '/' ) + if ( lex_token (lexer) == T_ENDCMD || lex_token (lexer) == T_SLASH ) return 1; success = examine_parse_independent_vars (lexer, dict, cmd); @@ -884,15 +879,13 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, metric->n_ptiles = percentile_list.n_data; - metric->ptl = xcalloc (metric->n_ptiles, - sizeof (struct percentile *)); + metric->ptl = xcalloc (metric->n_ptiles, sizeof *metric->ptl); metric->quartiles = xcalloc (3, sizeof (*metric->quartiles)); for (i = 0 ; i < metric->n_ptiles; ++i) { - metric->ptl[i] = (struct percentile *) - percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); + metric->ptl[i] = percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); if ( percentile_list.data[i] == 25) metric->quartiles[0] = metric->ptl[i]; @@ -913,18 +906,18 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, n_os ++; } - os = xcalloc (sizeof (struct order_stats *), n_os); + os = xcalloc (n_os, sizeof *os); for (i = 0 ; i < metric->n_ptiles ; ++i ) { - os[i] = (struct order_stats *) metric->ptl[i]; + os[i] = &metric->ptl[i]->parent; } - os[i] = (struct order_stats *) metric->tukey_hinges; - os[i+1] = (struct order_stats *) metric->trimmed_mean; + os[i] = &metric->tukey_hinges->parent; + os[i+1] = &metric->trimmed_mean->parent; if (cmd->a_plot[XMN_PLT_NPPLOT]) - os[i+2] = metric->np; + os[i+2] = &metric->np->parent; order_stats_accumulate (os, n_os, casereader_clone (metric->up_reader), @@ -975,7 +968,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, { struct factor_metrics *metric = &result->metrics[v]; if ( metric->histogram) - histogram_add ((struct histogram *) metric->histogram, + histogram_add (metric->histogram, case_data (c, dependent_vars[v])->f, weight); } case_unref (c); @@ -991,13 +984,13 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, struct factor_metrics *metric = &result->metrics[v]; int n_vals = caseproto_get_n_widths (casereader_get_proto ( metric->up_reader)); + struct order_stats *os; metric->box_whisker = - box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges, - cmd->v_id, n_vals - 1); + box_whisker_create ( metric->tukey_hinges, cmd->v_id, n_vals - 1); - order_stats_accumulate ((struct order_stats **) &metric->box_whisker, - 1, + os = &metric->box_whisker->parent; + order_stats_accumulate ( &os, 1, casereader_clone (metric->up_reader), wv, dependent_vars[v], MV_ANY); } @@ -1137,11 +1130,9 @@ show_summary (const struct variable **dependent_var, int n_dep_var, n_cols = heading_columns + 6; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1300,10 +1291,10 @@ show_summary (const struct variable **dependent_var, int n_dep_var, TAB_LEFT, n, wfmt); - tab_text (tbl, heading_columns + 1, - heading_rows + j + v * ll_count (&fctr->result_list), - TAB_RIGHT | TAT_PRINTF, - "%g%%", n * 100.0 / result->metrics[v].n); + tab_text_format (tbl, heading_columns + 1, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", n * 100.0 / result->metrics[v].n); /* Total Missing */ tab_double (tbl, heading_columns + 2, @@ -1312,12 +1303,12 @@ show_summary (const struct variable **dependent_var, int n_dep_var, result->metrics[v].n - n, wfmt); - tab_text (tbl, heading_columns + 3, - heading_rows + j + v * ll_count (&fctr->result_list), - TAB_RIGHT | TAT_PRINTF, - "%g%%", - (result->metrics[v].n - n) * 100.0 / result->metrics[v].n - ); + tab_text_format (tbl, heading_columns + 3, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", + (result->metrics[v].n - n) * 100.0 / result->metrics[v].n + ); /* Total Valid + Missing */ tab_double (tbl, heading_columns + 4, @@ -1326,12 +1317,12 @@ show_summary (const struct variable **dependent_var, int n_dep_var, result->metrics[v].n, wfmt); - tab_text (tbl, heading_columns + 5, - heading_rows + j + v * ll_count (&fctr->result_list), - TAB_RIGHT | TAT_PRINTF, - "%g%%", - (result->metrics[v].n) * 100.0 / result->metrics[v].n - ); + tab_text_format (tbl, heading_columns + 5, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT, + "%g%%", + ((result->metrics[v].n) * 100.0 + / result->metrics[v].n)); ++j; } @@ -1374,11 +1365,9 @@ show_descriptives (const struct variable **dependent_var, n_cols = heading_columns + 2; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1453,11 +1442,11 @@ show_descriptives (const struct variable **dependent_var, TAB_LEFT, _("Mean")); - tab_text (tbl, n_cols - 4, - heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, - TAB_LEFT | TAT_PRINTF, - _("%g%% Confidence Interval for Mean"), - cmd.n_cinterval[0]); + tab_text_format (tbl, n_cols - 4, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("%g%% Confidence Interval for Mean"), + cmd.n_cinterval[0]); tab_text (tbl, n_cols - 3, heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, @@ -1470,9 +1459,8 @@ show_descriptives (const struct variable **dependent_var, _("Upper Bound")); tab_text (tbl, n_cols - 4, - heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, - TAB_LEFT | TAT_PRINTF, - _("5%% Trimmed Mean")); + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, _("5% Trimmed Mean")); tab_text (tbl, n_cols - 4, heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, @@ -1554,7 +1542,7 @@ show_descriptives (const struct variable **dependent_var, tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, TAB_CENTER, - trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), + trimmed_mean_calculate (result->metrics[v].trimmed_mean), NULL); @@ -1687,11 +1675,9 @@ show_extremes (const struct variable **dependent_var, n_cols = heading_columns + 2; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1743,15 +1729,15 @@ show_extremes (const struct variable **dependent_var, for ( e = 1; e <= cmd.st_n; ++e ) { - tab_text (tbl, n_cols - 3, - heading_rows + row_var_start + row_result_start + e - 1, - TAB_RIGHT | TAT_PRINTF, - _("%d"), e); - - tab_text (tbl, n_cols - 3, - heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, - TAB_RIGHT | TAT_PRINTF, - _("%d"), e); + tab_text_format (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + e - 1, + TAB_RIGHT, + "%d", e); + + tab_text_format (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, + TAB_RIGHT, + "%d", e); } @@ -1782,7 +1768,6 @@ show_extremes (const struct variable **dependent_var, min_ll = ll_next (min_ll); } - max_ll = ll_head (extrema_list (result->metrics[v].maxima)); for (e = 0; e < cmd.st_n;) { @@ -1892,11 +1877,9 @@ show_percentiles (const struct variable **dependent_var, n_cols = heading_columns + n_percentiles; - tbl = tab_create (n_cols, n_rows, 0); + tbl = tab_create (n_cols, n_rows); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions, NULL, NULL); - /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, @@ -1975,8 +1958,7 @@ show_percentiles (const struct variable **dependent_var, tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1); - tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges, - hinges); + tukey_hinges_calculate (result->metrics[v].tukey_hinges, hinges); for (j = 0; j < n_percentiles; ++j) { @@ -2015,11 +1997,10 @@ show_percentiles (const struct variable **dependent_var, for (i = 0 ; i < n_percentiles; ++i ) { - tab_text (tbl, n_cols - n_percentiles + i, 1, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, - _("%g"), - subc_list_double_at (&percentile_list, i) - ); + tab_text_format (tbl, n_cols - n_percentiles + i, 1, + TAB_CENTER | TAT_TITLE, + _("%g"), + subc_list_double_at (&percentile_list, i)); }