X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.c;h=46863cad9724f3b7899ab637ec8f3100a06df548;hb=912833f9331784d692ade66c76bffb5497b890e0;hp=37cf0c0e3f6e4464cebd08cefd5a70c220cd72eb;hpb=d31b04850f78d3732d318051e05529f361f0e9a5;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 37cf0c0e3f..46863cad97 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012 Free Software Foundation, Inc. + Copyright (C) 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -66,6 +66,14 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid +static void +append_value_name (const struct variable *var, const union value *val, struct string *str) +{ + var_append_value_name (var, val, str); + if ( var_is_value_missing (var, val, MV_ANY)) + ds_put_cstr (str, _(" (missing)")); +} + enum bp_mode { BP_GROUPS, @@ -96,7 +104,8 @@ struct examine size_t n_iacts; struct interaction **iacts; - enum mv_class exclude; + enum mv_class dep_excl; + enum mv_class fctr_excl; const struct dictionary *dict; @@ -120,9 +129,9 @@ struct examine size_t n_percentiles; bool npplot; - bool histogram; + bool histogramplot; bool boxplot; - bool spreadlevel; + bool spreadlevelplot; int sl_power; enum bp_mode boxplot_mode; @@ -295,13 +304,19 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) ds_init_empty (&label); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { + struct string l; const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - - ds_put_cstr (&label, var_to_string (ivar)); - ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); - ds_put_cstr (&label, "; "); + ds_init_empty (&l); + + append_value_name (ivar, val, &l); + ds_ltrim (&l, ss_cstr (" ")); + + ds_put_substring (&label, l.ss); + if (ivar_idx < iact->n_vars - 1) + ds_put_cstr (&label, "; "); + + ds_destroy (&l); } boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label)); @@ -359,7 +374,7 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -426,7 +441,7 @@ show_npplot (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -551,7 +566,7 @@ show_histogram (const struct examine *cmd, int iact_idx) ds_put_cstr (&label, var_to_string (ivar)); ds_put_cstr (&label, " = "); - var_append_value_name (ivar, val, &label); + append_value_name (ivar, val, &label); ds_put_cstr (&label, "; "); } @@ -590,6 +605,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) const int nc = heading_columns + cmd->n_percentiles; t = tab_create (nc, nr); + tab_title (t, _("Percentiles")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -676,7 +692,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -709,7 +725,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat, 0, percentile_calculate (es->percentiles[p], cmd->pc_alg), - 0); + NULL, RC_OTHER); if (cmd->ptiles[p] == 25.0) { @@ -717,7 +733,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[0], - 0); + NULL, RC_OTHER); } else if (cmd->ptiles[p] == 50.0) { @@ -725,7 +741,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[1], - 0); + NULL, RC_OTHER); } else if (cmd->ptiles[p] == 75.0) { @@ -733,7 +749,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[2], - 0); + NULL, RC_OTHER); } } @@ -769,6 +785,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) const int nc = 2 + heading_columns; t = tab_create (nc, nr); + tab_title (t, _("Descriptives")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -844,7 +861,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -874,12 +891,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat, - 0, m1, 0); + 0, m1, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat, - 0, calc_semean (m2, m0), 0); + 0, calc_semean (m2, m0), NULL, RC_OTHER); tab_text_format (t, 1 + iact->n_vars, @@ -899,7 +916,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, m1 - tval * calc_semean (m2, m0), 0); + 0, m1 - tval * calc_semean (m2, m0), NULL, RC_OTHER); tab_text (t, @@ -912,7 +929,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 2, - 0, m1 + tval * calc_semean (m2, m0), 0); + 0, m1 + tval * calc_semean (m2, m0), NULL, RC_OTHER); tab_text (t, @@ -927,7 +944,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 3, 0, trimmed_mean_calculate (es->trimmed_mean), - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -941,7 +958,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 4, 0, percentile_calculate (es->quartiles[1], cmd->pc_alg), - 0); + NULL, RC_OTHER); tab_text (t, @@ -954,7 +971,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 5, - 0, m2, 0); + 0, m2, NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -966,7 +983,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 6, - 0, sqrt (m2), 0); + 0, sqrt (m2), NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -980,7 +997,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 7, 0, es->minima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -994,7 +1011,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 8, 0, es->maxima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1008,7 +1025,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 9, 0, es->maxima[0].val - es->minima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1024,7 +1041,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) 0, percentile_calculate (es->quartiles[2], cmd->pc_alg) - percentile_calculate (es->quartiles[0], cmd->pc_alg), - 0); + NULL, RC_OTHER); @@ -1039,12 +1056,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, m3, 0); + 0, m3, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, calc_seskew (m0), 0); + 0, calc_seskew (m0), NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1056,12 +1073,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, m4, 0); + 0, m4, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, calc_sekurt (m0), 0); + 0, calc_sekurt (m0), NULL, RC_OTHER); } free (prev_val); @@ -1088,6 +1105,7 @@ extremes_report (const struct examine *cmd, int iact_idx) const int nc = 2 + heading_columns; t = tab_create (nc, nr); + tab_title (t, _("Extreme Values")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -1160,7 +1178,7 @@ extremes_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, @@ -1204,7 +1222,7 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + e, TAB_RIGHT, e + 1, - &F_8_0); + NULL, RC_INTEGER); /* The casenumber */ if (cmd->id_var) @@ -1221,14 +1239,14 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + e, TAB_RIGHT, es->maxima[e].identity.f, - &F_8_0); + NULL, RC_INTEGER); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + e, 0, es->maxima[e].val, - var_get_print_format (cmd->dep_vars[v])); + var_get_print_format (cmd->dep_vars[v]), RC_OTHER); tab_double (t, @@ -1236,7 +1254,7 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, TAB_RIGHT, e + 1, - &F_8_0); + NULL, RC_INTEGER); /* The casenumber */ if (cmd->id_var) @@ -1253,14 +1271,14 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, TAB_RIGHT, es->minima[e].identity.f, - &F_8_0); + NULL, RC_INTEGER); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, 0, es->minima[e].val, - var_get_print_format (cmd->dep_vars[v])); + var_get_print_format (cmd->dep_vars[v]), RC_OTHER); } } free (prev_val); @@ -1287,6 +1305,7 @@ summary_report (const struct examine *cmd, int iact_idx) const int nc = 6 + heading_columns; t = tab_create (nc, nr); + tab_set_format (t, RC_WEIGHT, wfmt); tab_title (t, _("Case Processing Summary")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -1378,7 +1397,7 @@ summary_report (const struct examine *cmd, int iact_idx) { struct string str; ds_init_empty (&str); - var_append_value_name (ivar, val, &str); + append_value_name (ivar, val, &str); tab_text (t, 1 + ivar_idx, heading_rows + n_cats * v + i, @@ -1401,7 +1420,7 @@ summary_report (const struct examine *cmd, int iact_idx) heading_rows + n_cats * v + i, 0, es[v].non_missing, - wfmt); + NULL, RC_WEIGHT); tab_text_format (t, @@ -1418,7 +1437,7 @@ summary_report (const struct examine *cmd, int iact_idx) heading_rows + n_cats * v + i, 0, es[v].missing, - wfmt); + NULL, RC_WEIGHT); tab_text_format (t, heading_columns + 3, @@ -1432,7 +1451,7 @@ summary_report (const struct examine *cmd, int iact_idx) heading_rows + n_cats * v + i, 0, total, - wfmt); + NULL, RC_WEIGHT); /* This can only be 100% can't it? */ tab_text_format (t, @@ -1452,25 +1471,6 @@ summary_report (const struct examine *cmd, int iact_idx) tab_submit (t); } - -/* Match a variable. - If the match succeeds, the variable will be placed in VAR. - Returns true if successful */ -static bool -lex_match_variable (struct lexer *lexer, - const struct dictionary *dict, const struct variable **var) -{ - if (lex_token (lexer) != T_ID) - - return false; - - *var = parse_variable_const (lexer, dict); - - if ( *var == NULL) - return false; - return true; -} - /* Attempt to parse an interaction from LEXER */ static struct interaction * parse_interaction (struct lexer *lexer, struct examine *ex) @@ -1531,14 +1531,14 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, int v; const struct examine *examine = aux1; struct exploratory_stats *es = user_data; - + for (v = 0; v < examine->n_dep_vars; v++) { struct ccase *outcase ; const struct variable *var = examine->dep_vars[v]; const double x = case_data (c, var)->f; - if (var_is_value_missing (var, case_data (c, var), examine->exclude)) + if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) { es[v].missing += weight; continue; @@ -1584,28 +1584,26 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { int i; casenumber imin = 0; - double imax = es[v].cc; + casenumber imax; struct casereader *reader; struct ccase *c; - casenumber total_cases; - if (examine->histogram) + if (examine->histogramplot) { /* Sturges Rule */ double bin_width = fabs (es[v].minimum - es[v].maximum) / (1 + log2 (es[v].cc)) ; - bin_width = chart_rounded_tick (bin_width); - es[v].histogram = histogram_create (bin_width, es[v].minimum, es[v].maximum); } es[v].sorted_reader = casewriter_make_reader (es[v].sorted_writer); - total_cases = casereader_count_cases (es[v].sorted_reader); es[v].sorted_writer = NULL; + imax = casereader_get_case_cnt (es[v].sorted_reader); + es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); for (i = 0; i < examine->calc_extremes; ++i) @@ -1618,7 +1616,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) (c = casereader_read (reader)) != NULL; case_unref (c)) { const double val = case_data_idx (c, EX_VAL)->f; - const double wt = case_data_idx (c, EX_WT)->f; /* FIXME: What about fractional weights ??? */ + const double wt = case_data_idx (c, EX_WT)->f; moments_pass_two (es[v].mom, val, wt); @@ -1634,15 +1632,15 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) min->val = val; value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width); } - imin += wt; + imin ++; } - imax -= wt; + imax --; if (imax < examine->calc_extremes) { int x; - for (x = imax; x < imax + wt; ++x) + for (x = imax; x < imax + 1; ++x) { struct extremity *max; @@ -1660,7 +1658,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) if (examine->calc_extremes > 0) { assert (es[v].minima[0].val == es[v].minimum); - assert (es[v].maxima[0].val == es[v].maximum); + assert (es[v].maxima[0].val == es[v].maximum); } { @@ -1796,36 +1794,33 @@ run_examine (struct examine *cmd, struct casereader *input) struct payload payload; payload.create = create_n; payload.update = update_n; - payload.destroy = calculate_n; + payload.calculate = calculate_n; + payload.destroy = NULL; cmd->wv = dict_get_weight (cmd->dict); cmd->cats = categoricals_create (cmd->iacts, cmd->n_iacts, - cmd->wv, cmd->exclude, MV_ANY); + cmd->wv, cmd->dep_excl, cmd->fctr_excl); categoricals_set_payload (cmd->cats, &payload, cmd, NULL); - if (cmd->id_idx == -1) + if (cmd->id_var == NULL) { struct ccase *c = casereader_peek (input, 0); - assert (cmd->id_var == NULL); - cmd->id_idx = case_get_value_cnt (c); input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); } - /* FIXME: Filter out missing factor variables */ - /* Remove cases on a listwise basis if requested */ if ( cmd->missing_pw == false) input = casereader_create_filter_missing (input, cmd->dep_vars, cmd->n_dep_vars, - cmd->exclude, + cmd->dep_excl, NULL, NULL); @@ -1863,13 +1858,13 @@ run_examine (struct examine *cmd, struct casereader *input) } } - if (cmd->histogram) + if (cmd->histogramplot) show_histogram (cmd, i); if (cmd->npplot) show_npplot (cmd, i); - if (cmd->spreadlevel) + if (cmd->spreadlevelplot) show_spreadlevel (cmd, i); if (cmd->descriptives) @@ -1918,11 +1913,12 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.iacts = iacts_mem = pool_zalloc (examine.pool, sizeof (struct interaction *)); examine.iacts[0] = interaction_create (NULL); - examine.exclude = MV_ANY; - examine.histogram = false; + examine.dep_excl = MV_ANY; + examine.fctr_excl = MV_ANY; + examine.histogramplot = false; examine.npplot = false; examine.boxplot = false; - examine.spreadlevel = false; + examine.spreadlevelplot = false; examine.sl_power = 0; examine.dict = dataset_dict (ds); @@ -2102,11 +2098,19 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "EXCLUDE")) { - examine.exclude = MV_ANY; + examine.dep_excl = MV_ANY; } else if (lex_match_id (lexer, "INCLUDE")) { - examine.exclude = MV_SYSTEM; + examine.dep_excl = MV_SYSTEM; + } + else if (lex_match_id (lexer, "REPORT")) + { + examine.fctr_excl = MV_NEVER; + } + else if (lex_match_id (lexer, "NOREPORT")) + { + examine.fctr_excl = MV_ANY; } else { @@ -2149,13 +2153,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "HISTOGRAM")) { - examine.histogram = true; + examine.histogramplot = true; } else if (lex_match_id (lexer, "SPREADLEVEL")) { - examine.spreadlevel = true; + examine.spreadlevelplot = true; examine.sl_power = 0; - if (lex_match (lexer, T_LPAREN)) + if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer)) { examine.sl_power = lex_integer (lexer); @@ -2166,13 +2170,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "NONE")) { - examine.histogram = false; + examine.histogramplot = false; examine.npplot = false; examine.boxplot = false; } else if (lex_match (lexer, T_ALL)) { - examine.histogram = true; + examine.histogramplot = true; examine.npplot = true; examine.boxplot = true; } @@ -2222,6 +2226,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) { examine.n_iacts--; examine.iacts = &iacts_mem[1]; + interaction_destroy (iacts_mem[0]); } @@ -2279,7 +2284,6 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) for (i = 0; i < examine.n_iacts; ++i) interaction_destroy (examine.iacts[i]); - free (examine.ptiles); free (examine.dep_vars); pool_destroy (examine.pool);