X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.c;h=cfe380d5b52c0181bf8f392b1da82c4973f30bae;hb=c84078d8498785e9a52945cc63fb663cd48027af;hp=a20396397d74d87def5a0181dbc30502079b8ab6;hpb=b3fcf4b1644bf4af9b5eb7b0b0f8856c51118128;p=pspp diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index a20396397d..cfe380d5b5 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -1,6 +1,6 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2012 Free Software Foundation, Inc. + Copyright (C) 2012, 2013, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -129,9 +129,9 @@ struct examine size_t n_percentiles; bool npplot; - bool histogram; + bool histogramplot; bool boxplot; - bool spreadlevel; + bool spreadlevelplot; int sl_power; enum bp_mode boxplot_mode; @@ -298,22 +298,29 @@ show_boxplot_grouped (const struct examine *cmd, int iact_idx) const struct ccase *c = categoricals_get_case_by_category_real (cmd->cats, iact_idx, grp); - const struct exploratory_stats *es = + struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); ds_init_empty (&label); for (ivar_idx = 0; ivar_idx < iact->n_vars; ++ivar_idx) { + struct string l; const struct variable *ivar = iact->vars[ivar_idx]; const union value *val = case_data (c, ivar); - - ds_put_cstr (&label, var_to_string (ivar)); - ds_put_cstr (&label, " = "); - append_value_name (ivar, val, &label); - ds_put_cstr (&label, "; "); + ds_init_empty (&l); + + append_value_name (ivar, val, &l); + ds_ltrim (&l, ss_cstr (" ")); + + ds_put_substring (&label, l.ss); + if (ivar_idx < iact->n_vars - 1) + ds_put_cstr (&label, "; "); + + ds_destroy (&l); } boxplot_add_box (boxplot, es[v].box_whisker, ds_cstr (&label)); + es[v].box_whisker = NULL; ds_destroy (&label); } @@ -384,11 +391,12 @@ show_boxplot_variabled (const struct examine *cmd, int iact_idx) for (v = 0; v < cmd->n_dep_vars; ++v) { - const struct exploratory_stats *es = + struct exploratory_stats *es = categoricals_get_user_data_by_category_real (cmd->cats, iact_idx, grp); boxplot_add_box (boxplot, es[v].box_whisker, var_to_string (cmd->dep_vars[v])); + es[v].box_whisker = NULL; } boxplot_submit (boxplot); @@ -599,6 +607,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) const int nc = heading_columns + cmd->n_percentiles; t = tab_create (nc, nr); + tab_title (t, _("Percentiles")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -718,7 +727,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat, 0, percentile_calculate (es->percentiles[p], cmd->pc_alg), - 0); + NULL, RC_OTHER); if (cmd->ptiles[p] == 25.0) { @@ -726,7 +735,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[0], - 0); + NULL, RC_OTHER); } else if (cmd->ptiles[p] == 50.0) { @@ -734,7 +743,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[1], - 0); + NULL, RC_OTHER); } else if (cmd->ptiles[p] == 75.0) { @@ -742,7 +751,7 @@ percentiles_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 1, 0, hinges[2], - 0); + NULL, RC_OTHER); } } @@ -778,6 +787,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) const int nc = 2 + heading_columns; t = tab_create (nc, nr); + tab_title (t, _("Descriptives")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -883,12 +893,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat, - 0, m1, 0); + 0, m1, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat, - 0, calc_semean (m2, m0), 0); + 0, calc_semean (m2, m0), NULL, RC_OTHER); tab_text_format (t, 1 + iact->n_vars, @@ -908,7 +918,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 1, - 0, m1 - tval * calc_semean (m2, m0), 0); + 0, m1 - tval * calc_semean (m2, m0), NULL, RC_OTHER); tab_text (t, @@ -921,7 +931,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 2, - 0, m1 + tval * calc_semean (m2, m0), 0); + 0, m1 + tval * calc_semean (m2, m0), NULL, RC_OTHER); tab_text (t, @@ -936,7 +946,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 3, 0, trimmed_mean_calculate (es->trimmed_mean), - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -950,7 +960,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 4, 0, percentile_calculate (es->quartiles[1], cmd->pc_alg), - 0); + NULL, RC_OTHER); tab_text (t, @@ -963,7 +973,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 5, - 0, m2, 0); + 0, m2, NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -975,7 +985,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 6, - 0, sqrt (m2), 0); + 0, sqrt (m2), NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -989,7 +999,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 7, 0, es->minima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1003,7 +1013,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 8, 0, es->maxima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1017,7 +1027,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + 9, 0, es->maxima[0].val - es->minima[0].val, - 0); + NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1033,7 +1043,7 @@ descriptives_report (const struct examine *cmd, int iact_idx) 0, percentile_calculate (es->quartiles[2], cmd->pc_alg) - percentile_calculate (es->quartiles[0], cmd->pc_alg), - 0); + NULL, RC_OTHER); @@ -1048,12 +1058,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, m3, 0); + 0, m3, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat + 11, - 0, calc_seskew (m0), 0); + 0, calc_seskew (m0), NULL, RC_OTHER); tab_text (t, 1 + iact->n_vars, @@ -1065,12 +1075,12 @@ descriptives_report (const struct examine *cmd, int iact_idx) tab_double (t, 1 + iact->n_vars + 2, heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, m4, 0); + 0, m4, NULL, RC_OTHER); tab_double (t, 1 + iact->n_vars + 3, heading_rows + v * rows_per_var + i * rows_per_cat + 12, - 0, calc_sekurt (m0), 0); + 0, calc_sekurt (m0), NULL, RC_OTHER); } free (prev_val); @@ -1097,6 +1107,7 @@ extremes_report (const struct examine *cmd, int iact_idx) const int nc = 2 + heading_columns; t = tab_create (nc, nr); + tab_title (t, _("Extreme Values")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -1213,7 +1224,7 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + e, TAB_RIGHT, e + 1, - &F_8_0); + NULL, RC_INTEGER); /* The casenumber */ if (cmd->id_var) @@ -1230,14 +1241,14 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + e, TAB_RIGHT, es->maxima[e].identity.f, - &F_8_0); + NULL, RC_INTEGER); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + e, 0, es->maxima[e].val, - var_get_print_format (cmd->dep_vars[v])); + var_get_print_format (cmd->dep_vars[v]), RC_OTHER); tab_double (t, @@ -1245,7 +1256,7 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, TAB_RIGHT, e + 1, - &F_8_0); + NULL, RC_INTEGER); /* The casenumber */ if (cmd->id_var) @@ -1262,14 +1273,14 @@ extremes_report (const struct examine *cmd, int iact_idx) heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, TAB_RIGHT, es->minima[e].identity.f, - &F_8_0); + NULL, RC_INTEGER); tab_double (t, heading_columns + 1, heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, 0, es->minima[e].val, - var_get_print_format (cmd->dep_vars[v])); + var_get_print_format (cmd->dep_vars[v]), RC_OTHER); } } free (prev_val); @@ -1296,6 +1307,7 @@ summary_report (const struct examine *cmd, int iact_idx) const int nc = 6 + heading_columns; t = tab_create (nc, nr); + tab_set_format (t, RC_WEIGHT, wfmt); tab_title (t, _("Case Processing Summary")); tab_headers (t, heading_columns, 0, heading_rows, 0); @@ -1410,7 +1422,7 @@ summary_report (const struct examine *cmd, int iact_idx) heading_rows + n_cats * v + i, 0, es[v].non_missing, - wfmt); + NULL, RC_WEIGHT); tab_text_format (t, @@ -1427,7 +1439,7 @@ summary_report (const struct examine *cmd, int iact_idx) heading_rows + n_cats * v + i, 0, es[v].missing, - wfmt); + NULL, RC_WEIGHT); tab_text_format (t, heading_columns + 3, @@ -1441,7 +1453,7 @@ summary_report (const struct examine *cmd, int iact_idx) heading_rows + n_cats * v + i, 0, total, - wfmt); + NULL, RC_WEIGHT); /* This can only be 100% can't it? */ tab_text_format (t, @@ -1522,6 +1534,25 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, const struct examine *examine = aux1; struct exploratory_stats *es = user_data; + bool this_case_is_missing = false; + /* LISTWISE missing must be dealt with here */ + if (!examine->missing_pw) + { + for (v = 0; v < examine->n_dep_vars; v++) + { + const struct variable *var = examine->dep_vars[v]; + + if (var_is_value_missing (var, case_data (c, var), examine->dep_excl)) + { + es[v].missing += weight; + this_case_is_missing = true; + } + } + } + + if (this_case_is_missing) + return; + for (v = 0; v < examine->n_dep_vars; v++) { struct ccase *outcase ; @@ -1578,7 +1609,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) struct casereader *reader; struct ccase *c; - if (examine->histogram) + if (examine->histogramplot && es[v].non_missing > 0) { /* Sturges Rule */ double bin_width = fabs (es[v].minimum - es[v].maximum) @@ -1601,13 +1632,15 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ; value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ; } - + + bool warn = true; for (reader = casereader_clone (es[v].sorted_reader); (c = casereader_read (reader)) != NULL; case_unref (c)) { const double val = case_data_idx (c, EX_VAL)->f; - const double wt = case_data_idx (c, EX_WT)->f; - + double wt = case_data_idx (c, EX_WT)->f; + wt = var_force_valid_weight (examine->wv, wt, &warn); + moments_pass_two (es[v].mom, val, wt); if (es[v].histogram) @@ -1645,7 +1678,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) } casereader_destroy (reader); - if (examine->calc_extremes > 0) + if (examine->calc_extremes > 0 && es[v].non_missing > 0) { assert (es[v].minima[0].val == es[v].minimum); assert (es[v].maxima[0].val == es[v].maximum); @@ -1767,6 +1800,12 @@ cleanup_exploratory_stats (struct examine *cmd) statistic_destroy (&es[v].histogram->parent); moments_destroy (es[v].mom); + if (es[v].box_whisker) + { + stat = &es[v].box_whisker->parent.parent; + stat->destroy (stat); + } + casereader_destroy (es[v].sorted_reader); } } @@ -1795,27 +1834,16 @@ run_examine (struct examine *cmd, struct casereader *input) categoricals_set_payload (cmd->cats, &payload, cmd, NULL); - if (cmd->id_idx == -1) + if (cmd->id_var == NULL) { struct ccase *c = casereader_peek (input, 0); - assert (cmd->id_var == NULL); - cmd->id_idx = case_get_value_cnt (c); input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); } - /* Remove cases on a listwise basis if requested */ - if ( cmd->missing_pw == false) - input = casereader_create_filter_missing (input, - cmd->dep_vars, - cmd->n_dep_vars, - cmd->dep_excl, - NULL, - NULL); - for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { @@ -1828,6 +1856,10 @@ run_examine (struct examine *cmd, struct casereader *input) { summary_report (cmd, i); + const size_t n_cats = categoricals_n_count (cmd->cats, i); + if (n_cats == 0) + continue; + if (cmd->disp_extremes > 0) extremes_report (cmd, i); @@ -1850,13 +1882,13 @@ run_examine (struct examine *cmd, struct casereader *input) } } - if (cmd->histogram) + if (cmd->histogramplot) show_histogram (cmd, i); if (cmd->npplot) show_npplot (cmd, i); - if (cmd->spreadlevel) + if (cmd->spreadlevelplot) show_spreadlevel (cmd, i); if (cmd->descriptives) @@ -1907,12 +1939,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.dep_excl = MV_ANY; examine.fctr_excl = MV_ANY; - examine.histogram = false; + examine.histogramplot = false; examine.npplot = false; examine.boxplot = false; - examine.spreadlevel = false; + examine.spreadlevelplot = false; examine.sl_power = 0; - + examine.dep_vars = NULL; + examine.n_dep_vars = 0; examine.dict = dataset_dict (ds); /* Accept an optional, completely pointless "/VARIABLES=" */ @@ -1969,6 +2002,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) int extr = 5; if (lex_match (lexer, T_LPAREN)) { + if (!lex_force_int (lexer)) + goto error; extr = lex_integer (lexer); if (extr < 0) @@ -2145,13 +2180,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "HISTOGRAM")) { - examine.histogram = true; + examine.histogramplot = true; } else if (lex_match_id (lexer, "SPREADLEVEL")) { - examine.spreadlevel = true; + examine.spreadlevelplot = true; examine.sl_power = 0; - if (lex_match (lexer, T_LPAREN)) + if (lex_match (lexer, T_LPAREN) && lex_force_int (lexer)) { examine.sl_power = lex_integer (lexer); @@ -2162,13 +2197,13 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "NONE")) { - examine.histogram = false; + examine.histogramplot = false; examine.npplot = false; examine.boxplot = false; } else if (lex_match (lexer, T_ALL)) { - examine.histogram = true; + examine.histogramplot = true; examine.npplot = true; examine.boxplot = true; } @@ -2274,6 +2309,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) caseproto_unref (examine.ex_proto); + for (i = 0; i < examine.n_iacts; ++i) + interaction_destroy (examine.iacts[i]); free (examine.ptiles); free (examine.dep_vars); pool_destroy (examine.pool);