X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.q;h=fe762d3306afc8225af12f6d3b034033038cbfdb;hb=4a75ef1461b812818838247a95604ed0ab9696bf;hp=9a55c9f3e5915787f5d28a4e45542ddfe9205870;hpb=338fb2a2e84df6427a2fdee6769421f57d5666d8;p=pspp-builds.git diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 9a55c9f3..fe762d33 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,22 +1,18 @@ -/* PSPP - EXAMINE data for normality . -*-c-*- +/* PSPP - a program for statistical analysis. + Copyright (C) 2004 Free Software Foundation, Inc. -Copyright (C) 2004 Free Software Foundation, Inc. -Author: John Darrington 2004, 2006 + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License as -published by the Free Software Foundation; either version 2 of the -License, or (at your option) any later version. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. -This program is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA -02110-1301, USA. */ + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ #include @@ -27,7 +23,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include -#include +#include +#include #include #include #include @@ -35,10 +32,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include #include -#include #include #include -#include #include #include #include @@ -51,6 +46,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA #include #include "minmax.h" +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -85,7 +81,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA static struct cmd_examine cmd; -static struct variable **dependent_vars; +static const struct variable **dependent_vars; static size_t n_dependent_vars; @@ -117,19 +113,19 @@ static int examine_parse_independent_vars (struct lexer *lexer, const struct dic /* Output functions */ -static void show_summary (struct variable **dependent_var, int n_dep_var, +static void show_summary (const struct variable **dependent_var, int n_dep_var, const struct factor *f); -static void show_extremes (struct variable **dependent_var, +static void show_extremes (const struct variable **dependent_var, int n_dep_var, const struct factor *factor, int n_extremities); -static void show_descriptives (struct variable **dependent_var, +static void show_descriptives (const struct variable **dependent_var, int n_dep_var, struct factor *factor); -static void show_percentiles (struct variable **dependent_var, +static void show_percentiles (const struct variable **dependent_var, int n_dep_var, struct factor *factor); @@ -153,8 +149,8 @@ void box_plot_variables (const struct factor *fctr, /* Per Split function */ -static bool run_examine (const struct ccase *, - const struct casefile *cf, void *cmd_, const struct dataset *); +static void run_examine (struct cmd_examine *, struct casereader *, + struct dataset *); static void output_examine (void); @@ -165,23 +161,24 @@ void factor_calc (const struct ccase *c, int case_no, /* Represent a factor as a string, so it can be printed in a human readable fashion */ -const char * factor_to_string (const struct factor *fctr, +static void factor_to_string (const struct factor *fctr, const struct factor_statistics *fs, - const struct variable *var); - + const struct variable *var, + struct string *str + ); /* Represent a factor as a string, so it can be printed in a human readable fashion, but sacrificing some readablility for the sake of brevity */ -const char *factor_to_string_concise (const struct factor *fctr, - struct factor_statistics *fs); +static void factor_to_string_concise (const struct factor *fctr, + const struct factor_statistics *fs, + struct string *); -/* Function to use for testing for missing values */ -static var_is_missing_func *value_is_missing; - +/* Categories of missing values to exclude. */ +static enum mv_class exclude_values; /* PERCENTILES */ @@ -195,6 +192,8 @@ static short sbc_percentile; int cmd_examine (struct lexer *lexer, struct dataset *ds) { + struct casegrouper *grouper; + struct casereader *group; bool ok; subc_list_double_create (&percentile_list); @@ -207,10 +206,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } /* If /MISSING=INCLUDE is set, then user missing values are ignored */ - if (cmd.incl == XMN_INCLUDE ) - value_is_missing = var_is_value_system_missing; - else - value_is_missing = var_is_value_missing; + exclude_values = cmd.incl == XMN_INCLUDE ? MV_SYSTEM : MV_ANY; if ( cmd.st_n == SYSMIS ) cmd.st_n = 5; @@ -227,7 +223,11 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) subc_list_double_push (&percentile_list, 75); } - ok = multipass_procedure_with_splits (ds, run_examine, &cmd); + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + run_examine (&cmd, group, ds); + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; if ( totals ) { @@ -372,10 +372,12 @@ output_examine (void) for ( fs = fctr->fs ; *fs ; ++fs ) { - const char *s = factor_to_string (fctr, *fs, dependent_vars[v]); + struct string str; + ds_init_empty (&str); + factor_to_string (fctr, *fs, dependent_vars[v], &str); if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot (& (*fs)->m[v], s); + np_plot (& (*fs)->m[v], ds_cstr (&str)); if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { @@ -386,9 +388,11 @@ output_examine (void) normal.stddev = (*fs)->m[v].stddev; histogram_plot ((*fs)->m[v].histogram, - s, &normal, 0); + ds_cstr (&str) , &normal, 0); } + ds_destroy (&str); + } /* for ( fs .... */ } /* for ( v = 0 ..... */ @@ -530,7 +534,7 @@ xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examin return 2; } - if (!parse_variables (lexer, dict, &dependent_vars, &n_dependent_vars, + if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars, PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) { free (dependent_vars); @@ -632,9 +636,6 @@ void populate_summary (struct tab_table *t, int col, int row, -static bool bad_weight_warn = true; - - /* Perform calculations for the sub factors */ void factor_calc (const struct ccase *c, int case_no, double weight, @@ -696,7 +697,7 @@ factor_calc (const struct ccase *c, int case_no, double weight, var_get_width (var) ); - if ( value_is_missing (var, val) || case_missing ) + if (case_missing || var_is_value_missing (var, val, exclude_values)) { free (val); continue; @@ -711,23 +712,31 @@ factor_calc (const struct ccase *c, int case_no, double weight, } } -static bool -run_examine (const struct ccase *first, const struct casefile *cf, - void *cmd_, const struct dataset *ds) +static void +run_examine (struct cmd_examine *cmd, struct casereader *input, + struct dataset *ds) { struct dictionary *dict = dataset_dict (ds); - struct casereader *r; + casenumber case_no; struct ccase c; int v; - - const struct cmd_examine *cmd = (struct cmd_examine *) cmd_; + bool ok; struct factor *fctr; - output_split_file_values (ds, first); + if (!casereader_peek (input, 0, &c)) + { + casereader_destroy (input); + return; + } + output_split_file_values (ds, &c); + case_destroy (&c); + + input = casereader_create_filter_weight (input, dict, NULL, NULL); + input = casereader_create_counter (input, &case_no, 0); /* Make sure we haven't got rubbish left over from a - previous split */ + previous split. */ fctr = factors; while (fctr) { @@ -743,15 +752,10 @@ run_examine (const struct ccase *first, const struct casefile *cf, for ( v = 0 ; v < n_dependent_vars ; ++v ) metrics_precalc (&totals[v]); - for (r = casefile_get_reader (cf, NULL); - casereader_read (r, &c) ; - case_destroy (&c) ) + for (; casereader_read (input, &c); case_destroy (&c)) { - int case_missing=0; - const int case_no = casereader_cnum (r); - - const double weight = - dict_get_case_weight (dict, &c, &bad_weight_warn); + int case_missing = 0; + const double weight = dict_get_case_weight (dict, &c, NULL); if ( cmd->miss == XMN_LISTWISE ) { @@ -763,7 +767,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, var_get_width (var) ); - if ( value_is_missing (var, val)) + if ( var_is_value_missing (var, val, exclude_values)) case_missing = 1; free (val); @@ -778,7 +782,8 @@ run_examine (const struct ccase *first, const struct casefile *cf, var_get_width (var) ); - if ( value_is_missing (var, val) || case_missing ) + if ( var_is_value_missing (var, val, exclude_values) + || case_missing ) { free (val) ; continue ; @@ -791,6 +796,7 @@ run_examine (const struct ccase *first, const struct casefile *cf, factor_calc (&c, case_no, weight, case_missing); } + ok = casereader_destroy (input); for ( v = 0 ; v < n_dependent_vars ; ++v) { @@ -886,7 +892,8 @@ run_examine (const struct ccase *first, const struct casefile *cf, fctr = fctr->next; } - output_examine (); + if (ok) + output_examine (); if ( totals ) @@ -897,13 +904,11 @@ run_examine (const struct ccase *first, const struct casefile *cf, metrics_destroy (&totals[i]); } } - - return true; } static void -show_summary (struct variable **dependent_var, int n_dep_var, +show_summary (const struct variable **dependent_var, int n_dep_var, const struct factor *fctr) { static const char *subtitle[]= @@ -1054,15 +1059,21 @@ show_summary (struct variable **dependent_var, int n_dep_var, 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + (*fs)->id[0], &vstr); + tab_text (tbl, 1, (i * n_factors ) + count + heading_rows, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[0], - (*fs)->id[0]) + ds_cstr (&vstr) ); + ds_destroy (&vstr); + if (fctr->indep_var[1] && count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, (i * n_factors ) + count + heading_rows); @@ -1071,15 +1082,21 @@ show_summary (struct variable **dependent_var, int n_dep_var, prev = (*fs)->id[0]; - if ( fctr->indep_var[1]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[1], + (*fs)->id[1], &vstr); tab_text (tbl, 2, (i * n_factors ) + count + heading_rows, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[1], (*fs)->id[1]) + ds_cstr (&vstr) ); + ds_destroy (&vstr); + } populate_summary (tbl, heading_columns, (i * n_factors) + count @@ -1128,7 +1145,7 @@ populate_summary (struct tab_table *t, int col, int row, static void -show_extremes (struct variable **dependent_var, int n_dep_var, +show_extremes (const struct variable **dependent_var, int n_dep_var, const struct factor *fctr, int n_extremities) { int i; @@ -1227,6 +1244,10 @@ show_extremes (struct variable **dependent_var, int n_dep_var, if ( !prev || 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + (*fs)->id[0], &vstr); if ( count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, row); @@ -1234,9 +1255,10 @@ show_extremes (struct variable **dependent_var, int n_dep_var, tab_text (tbl, 1, row, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[0], - (*fs)->id[0]) + ds_cstr (&vstr) ); + + ds_destroy (&vstr); } prev = (*fs)->id[0]; @@ -1245,11 +1267,19 @@ show_extremes (struct variable **dependent_var, int n_dep_var, tab_hline (tbl, TAL_1, 2, n_cols - 1, row); if ( fctr->indep_var[1]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); + tab_text (tbl, 2, row, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[1], (*fs)->id[1]) + ds_cstr (&vstr) ); + ds_destroy (&vstr); + } + populate_extremes (tbl, heading_columns - 2, row, n_extremities, & (*fs)->m[i]); @@ -1365,7 +1395,7 @@ populate_extremes (struct tab_table *t, /* Show the descriptives table */ void -show_descriptives (struct variable **dependent_var, +show_descriptives (const struct variable **dependent_var, int n_dep_var, struct factor *fctr) { @@ -1465,6 +1495,10 @@ show_descriptives (struct variable **dependent_var, if ( !prev || 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + (*fs)->id[0], &vstr); if ( count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, row); @@ -1472,9 +1506,10 @@ show_descriptives (struct variable **dependent_var, tab_text (tbl, 1, row, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[0], - (*fs)->id[0]) + ds_cstr (&vstr) ); + + ds_destroy (&vstr); } prev = (*fs)->id[0]; @@ -1483,11 +1518,19 @@ show_descriptives (struct variable **dependent_var, tab_hline (tbl, TAL_1, 2, n_cols - 1, row); if ( fctr->indep_var[1]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); + tab_text (tbl, 2, row, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[1], (*fs)->id[1]) + ds_cstr (&vstr) ); + ds_destroy (&vstr); + } + populate_descriptives (tbl, heading_columns - 2, row, & (*fs)->m[i]); @@ -1511,18 +1554,14 @@ show_descriptives (struct variable **dependent_var, } - - /* Fill in the descriptives data */ void populate_descriptives (struct tab_table *tbl, int col, int row, const struct metrics *m) { - - const double t = gsl_cdf_tdist_Qinv (1 - cmd.n_cinterval[0]/100.0/2.0, \ + const double t = gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0)/2.0, m->n -1); - tab_text (tbl, col, row, TAB_LEFT | TAT_TITLE, @@ -1751,12 +1790,14 @@ box_plot_variables (const struct factor *fctr, for ( fs = fctr->fs ; *fs ; ++fs ) { + struct string str; double y_min = DBL_MAX; double y_max = -DBL_MAX; struct chart *ch = chart_create (); - const char *s = factor_to_string (fctr, *fs, 0 ); + ds_init_empty (&str); + factor_to_string (fctr, *fs, 0, &str ); - chart_write_title (ch, s); + chart_write_title (ch, ds_cstr (&str)); for ( i = 0 ; i < n_vars ; ++i ) { @@ -1784,7 +1825,7 @@ box_plot_variables (const struct factor *fctr, } chart_submit (ch); - + ds_destroy (&str); } } @@ -1823,19 +1864,21 @@ box_plot_group (const struct factor *fctr, for ( fs = fctr->fs ; *fs ; ++fs ) { - - const char *s = factor_to_string_concise (fctr, *fs); - + struct string str; const double box_width = (ch->data_right - ch->data_left) / (n_factors * 2.0 ) ; const double box_centre = ( f++ * 2 + 1) * box_width + ch->data_left; + ds_init_empty (&str); + factor_to_string_concise (fctr, *fs, &str); + boxplot_draw_boxplot (ch, box_centre, box_width, & (*fs)->m[i], - s); + ds_cstr (&str)); + ds_destroy (&str); } } else if ( ch ) @@ -1951,7 +1994,7 @@ np_plot (const struct metrics *m, const char *factorname) /* Show the percentiles */ void -show_percentiles (struct variable **dependent_var, +show_percentiles (const struct variable **dependent_var, int n_dep_var, struct factor *fctr) { @@ -2092,6 +2135,11 @@ show_percentiles (struct variable **dependent_var, if ( !prev || 0 != compare_values (prev, (*fs)->id[0], var_get_width (fctr->indep_var[0]))) { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + (*fs)->id[0], &vstr); + if ( count > 0 ) tab_hline (tbl, TAL_1, 1, n_cols - 1, row); @@ -2099,11 +2147,10 @@ show_percentiles (struct variable **dependent_var, tab_text (tbl, 1, row, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[0], - (*fs)->id[0]) + ds_cstr (&vstr) ); - + ds_destroy (&vstr); } prev = (*fs)->id[0]; @@ -2112,11 +2159,19 @@ show_percentiles (struct variable **dependent_var, tab_hline (tbl, TAL_1, 2, n_cols - 1, row); if ( fctr->indep_var[1]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); + tab_text (tbl, 2, row, TAB_LEFT | TAT_TITLE, - var_get_value_name (fctr->indep_var[1], (*fs)->id[1]) + ds_cstr (&vstr) ); + ds_destroy (&vstr); + } + populate_percentiles (tbl, n_heading_columns - 1, row, & (*fs)->m[i]); @@ -2197,67 +2252,58 @@ populate_percentiles (struct tab_table *tbl, int col, int row, } - - -const char * +static void factor_to_string (const struct factor *fctr, const struct factor_statistics *fs, - const struct variable *var) + const struct variable *var, + struct string *str + ) { - - static char buf1[100]; - char buf2[100]; - - strcpy (buf1,""); - if (var) - sprintf (buf1, "%s (",var_to_string (var) ); + ds_put_format (str, "%s (",var_to_string (var) ); - snprintf (buf2, 100, "%s = %s", - var_to_string (fctr->indep_var[0]), - var_get_value_name (fctr->indep_var[0], fs->id[0])); + ds_put_format (str, "%s = ", + var_to_string (fctr->indep_var[0])); - strcat (buf1, buf2); + var_append_value_name (fctr->indep_var[0], fs->id[0], str); if ( fctr->indep_var[1] ) { - sprintf (buf2, "; %s = %s)", - var_to_string (fctr->indep_var[1]), - var_get_value_name (fctr->indep_var[1], fs->id[1])); - strcat (buf1, buf2); + ds_put_format (str, "; %s = )", + var_to_string (fctr->indep_var[1])); + + var_append_value_name (fctr->indep_var[1], fs->id[1], str); } else { if ( var ) - strcat (buf1, ")"); + ds_put_cstr (str, ")"); } - - return buf1; } - -const char * +static void factor_to_string_concise (const struct factor *fctr, - struct factor_statistics *fs) + const struct factor_statistics *fs, + struct string *str + ) { - - static char buf[100]; - - char buf2[100]; - - snprintf (buf, 100, "%s", - var_get_value_name (fctr->indep_var[0], fs->id[0])); + var_append_value_name (fctr->indep_var[0], fs->id[0], str); if ( fctr->indep_var[1] ) { - sprintf (buf2, ",%s)", var_get_value_name (fctr->indep_var[1], - fs->id[1]) ); - strcat (buf, buf2); - } + ds_put_cstr (str, ","); + var_append_value_name (fctr->indep_var[1],fs->id[1], str); - return buf; + ds_put_cstr (str, ")"); + } } + +/* + Local Variables: + mode: c + End: +*/