X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.q;h=3d0b077c5fa9a04720163bac05e663d2d5680636;hb=0547b7cd533a294a8d4f80463298777403bedc1a;hp=312547f3f67132380f33d9a204d7049cfb1a670f;hpb=ed09f0f21add5e56e8395a5e8589cda6f96420bf;p=pspp-builds.git diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index 312547f3..3d0b077c 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2008 Free Software Foundation, Inc. + Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -122,6 +122,9 @@ struct factor_metrics /* Sum of all weights, including those for missing values */ double n; + /* Sum of weights of non_missing values */ + double n_valid; + double mean; double variance; @@ -195,7 +198,7 @@ factor_destroy (struct xfactor *fctr) } static struct xfactor level0_factor; -static struct ll_list factor_list = LL_INITIALIZER (factor_list); +static struct ll_list factor_list; /* Parse the clause specifying the factors */ static int examine_parse_independent_vars (struct lexer *lexer, @@ -204,6 +207,7 @@ static int examine_parse_independent_vars (struct lexer *lexer, /* Output functions */ static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, const struct xfactor *f); @@ -228,7 +232,7 @@ static void show_extremes (const struct variable **dependent_var, static void run_examine (struct cmd_examine *, struct casereader *, struct dataset *); -static void output_examine (void); +static void output_examine (const struct dictionary *dict); void factor_calc (const struct ccase *c, int case_no, @@ -265,6 +269,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) subc_list_double_create (&percentile_list); percentile_algorithm = PC_HAVERAGE; + ll_init (&factor_list); + if ( !parse_examine (lexer, ds, &cmd, NULL) ) { subc_list_double_destroy (&percentile_list); @@ -369,14 +375,14 @@ np_plot (struct np *np, const char *label) chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5); { - struct ccase c; struct casereader *reader = casewriter_make_reader (np->writer); - while (casereader_read (reader, &c)) + struct ccase *c; + while ((c = casereader_read (reader)) != NULL) { - chart_datum (np_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_NS)->f); - chart_datum (dnp_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_DNS)->f); + chart_datum (np_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_NS)->f); + chart_datum (dnp_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_DNS)->f); - case_destroy (&c); + case_unref (c); } casereader_destroy (reader); } @@ -623,11 +629,11 @@ show_boxplot_variables (const struct variable **dependent_var, /* Show all the appropriate tables */ static void -output_examine (void) +output_examine (const struct dictionary *dict) { struct ll *ll; - show_summary (dependent_vars, n_dependent_vars, &level0_factor); + show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor); if ( cmd.a_statistics[XMN_ST_EXTREME] ) show_extremes (dependent_vars, n_dependent_vars, &level0_factor); @@ -654,7 +660,7 @@ output_examine (void) ll != ll_null (&factor_list); ll = ll_next (ll)) { struct xfactor *factor = ll_data (ll, struct xfactor, ll); - show_summary (dependent_vars, n_dependent_vars, factor); + show_summary (dependent_vars, n_dependent_vars, dict, factor); if ( cmd.a_statistics[XMN_ST_EXTREME] ) show_extremes (dependent_vars, n_dependent_vars, factor); @@ -866,7 +872,7 @@ static void examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, const struct dictionary *dict, struct xfactor *factor) { - struct ccase c; + struct ccase *c; const struct variable *wv = dict_get_weight (dict); int v; int n_extrema = 1; @@ -878,20 +884,21 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, n_extrema = cmd->st_n; - if (casereader_peek (reader, 0, &c)) + c = casereader_peek (reader, 0); + if (c != NULL) { if ( level > 0) { result->value[0] = - value_dup (case_data (&c, factor->indep_var[0]), + value_dup (case_data (c, factor->indep_var[0]), var_get_width (factor->indep_var[0])); if ( level > 1) result->value[1] = - value_dup (case_data (&c, factor->indep_var[1]), + value_dup (case_data (c, factor->indep_var[1]), var_get_width (factor->indep_var[1])); } - case_destroy (&c); + case_unref (c); } for (v = 0; v < n_dependent_vars; ++v) @@ -927,33 +934,37 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, /* Sort or just iterate, whilst calculating moments etc */ - while (casereader_read (input, &c)) + while ((c = casereader_read (input)) != NULL) { const casenumber loc = - case_data_idx (&c, casereader_get_value_cnt (reader) - 1)->f; + case_data_idx (c, casereader_get_value_cnt (reader) - 1)->f; - const double weight = wv ? case_data (&c, wv)->f : 1.0; + const double weight = wv ? case_data (c, wv)->f : 1.0; + const union value *value = case_data (c, dependent_vars[v]); if (weight != SYSMIS) minimize (&result->metrics[v].cmin, weight); moments1_add (result->metrics[v].moments, - case_data (&c, dependent_vars[v])->f, + value->f, weight); result->metrics[v].n += weight; + if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) ) + result->metrics[v].n_valid += weight; + extrema_add (result->metrics[v].maxima, - case_data (&c, dependent_vars[v])->f, + value->f, weight, loc); extrema_add (result->metrics[v].minima, - case_data (&c, dependent_vars[v])->f, + value->f, weight, loc); - casewriter_write (writer, &c); + casewriter_write (writer, c); } casereader_destroy (input); result->metrics[v].up_reader = casewriter_make_reader (writer); @@ -984,7 +995,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, for (i = 0 ; i < metric->n_ptiles; ++i) { metric->ptl[i] = (struct percentile *) - percentile_create (percentile_list.data[i] / 100.0, metric->n); + percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); if ( percentile_list.data[i] == 25) metric->quartiles[0] = metric->ptl[i]; @@ -994,8 +1005,8 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, metric->quartiles[2] = metric->ptl[i]; } - metric->tukey_hinges = tukey_hinges_create (metric->n, metric->cmin); - metric->trimmed_mean = trimmed_mean_create (metric->n, 0.05); + metric->tukey_hinges = tukey_hinges_create (metric->n_valid, metric->cmin); + metric->trimmed_mean = trimmed_mean_create (metric->n_valid, 0.05); n_os = metric->n_ptiles + 2; @@ -1028,7 +1039,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, /* FIXME: Do this in the above loop */ if ( cmd->a_plot[XMN_PLT_HISTOGRAM] ) { - struct ccase c; + struct ccase *c; struct casereader *input = casereader_clone (reader); for (v = 0; v < n_dependent_vars; ++v) @@ -1059,18 +1070,18 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, metric->histogram = histogram_create (10, min->value, max->value); } - while (casereader_read (input, &c)) + while ((c = casereader_read (input)) != NULL) { - const double weight = wv ? case_data (&c, wv)->f : 1.0; + const double weight = wv ? case_data (c, wv)->f : 1.0; for (v = 0; v < n_dependent_vars; ++v) { struct factor_metrics *metric = &result->metrics[v]; if ( metric->histogram) histogram_add ((struct histogram *) metric->histogram, - case_data (&c, dependent_vars[v])->f, weight); + case_data (c, dependent_vars[v])->f, weight); } - case_destroy (&c); + case_unref (c); } casereader_destroy (input); } @@ -1106,17 +1117,18 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, { struct ll *ll; const struct dictionary *dict = dataset_dict (ds); - struct ccase c; + struct ccase *c; struct casereader *level0 = casereader_clone (input); - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); ll_init (&level0_factor.result_list); @@ -1169,7 +1181,7 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, casereader_destroy (input); - output_examine (); + output_examine (dict); factor_destroy (&level0_factor); @@ -1189,8 +1201,12 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, const struct xfactor *fctr) { + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + static const char *subtitle[]= { N_("Valid"), @@ -1382,10 +1398,10 @@ show_summary (const struct variable **dependent_var, int n_dep_var, result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ; /* Total Valid */ - tab_float (tbl, heading_columns, + tab_double (tbl, heading_columns, heading_rows + j + v * ll_count (&fctr->result_list), TAB_LEFT, - n, 8, 0); + n, wfmt); tab_text (tbl, heading_columns + 1, heading_rows + j + v * ll_count (&fctr->result_list), @@ -1393,11 +1409,11 @@ show_summary (const struct variable **dependent_var, int n_dep_var, "%g%%", n * 100.0 / result->metrics[v].n); /* Total Missing */ - tab_float (tbl, heading_columns + 2, + tab_double (tbl, heading_columns + 2, heading_rows + j + v * ll_count (&fctr->result_list), TAB_LEFT, result->metrics[v].n - n, - 8, 0); + wfmt); tab_text (tbl, heading_columns + 3, heading_rows + j + v * ll_count (&fctr->result_list), @@ -1407,11 +1423,11 @@ show_summary (const struct variable **dependent_var, int n_dep_var, ); /* Total Valid + Missing */ - tab_float (tbl, heading_columns + 4, + tab_double (tbl, heading_columns + 4, heading_rows + j + v * ll_count (&fctr->result_list), TAB_LEFT, result->metrics[v].n, - 8, 0); + wfmt); tab_text (tbl, heading_columns + 5, heading_rows + j + v * ll_count (&fctr->result_list), @@ -1610,93 +1626,93 @@ show_descriptives (const struct variable **dependent_var, /* Now the statistics ... */ - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].mean, - 8, 2); + NULL); - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].se_mean, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].mean - t * result->metrics[v].se_mean, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].mean + t * result->metrics[v].se_mean, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, TAB_CENTER, trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, TAB_CENTER, percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm), - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].variance, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, TAB_CENTER, sqrt (result->metrics[v].variance), - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, TAB_CENTER, percentile_calculate (result->metrics[v].quartiles[2], percentile_algorithm) - percentile_calculate (result->metrics[v].quartiles[0], percentile_algorithm), - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].skewness, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].kurtosis, - 8, 3); + NULL); - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, TAB_CENTER, calc_seskew (result->metrics[v].n), - 8, 3); + NULL); - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, TAB_CENTER, calc_sekurt (result->metrics[v].n), - 8, 3); + NULL); { struct extremum *minimum, *maximum ; @@ -1707,23 +1723,23 @@ show_descriptives (const struct variable **dependent_var, maximum = ll_data (max_ll, struct extremum, ll); minimum = ll_data (min_ll, struct extremum, ll); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, TAB_CENTER, minimum->value, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, TAB_CENTER, maximum->value, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, TAB_CENTER, maximum->value - minimum->value, - 8, 3); + NULL); } } } @@ -1850,18 +1866,19 @@ show_extremes (const struct variable **dependent_var, while (weight-- > 0 && e < cmd.st_n) { - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + row_result_start + cmd.st_n + e, TAB_RIGHT, minimum->value, - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, - heading_rows + row_var_start + row_result_start + cmd.st_n + e, + tab_fixed (tbl, n_cols - 2, + heading_rows + row_var_start + + row_result_start + cmd.st_n + e, TAB_RIGHT, minimum->location, - 8, 0); + 10, 0); ++e; } @@ -1877,18 +1894,20 @@ show_extremes (const struct variable **dependent_var, while (weight-- > 0 && e < cmd.st_n) { - tab_float (tbl, n_cols - 1, - heading_rows + row_var_start + row_result_start + e, + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + + row_result_start + e, TAB_RIGHT, maximum->value, - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, - heading_rows + row_var_start + row_result_start + e, + tab_fixed (tbl, n_cols - 2, + heading_rows + row_var_start + + row_result_start + e, TAB_RIGHT, maximum->location, - 8, 0); + 10, 0); ++e; } @@ -2065,12 +2084,12 @@ show_percentiles (const struct variable **dependent_var, for (j = 0; j < n_percentiles; ++j) { double hinge = SYSMIS; - tab_float (tbl, n_cols - n_percentiles + j, + tab_double (tbl, n_cols - n_percentiles + j, heading_rows + row_var_start + i * PERCENTILE_ROWS, TAB_CENTER, percentile_calculate (result->metrics[v].ptl[j], percentile_algorithm), - 8, 2 + NULL ); if ( result->metrics[v].ptl[j]->ptile == 0.5) @@ -2081,11 +2100,11 @@ show_percentiles (const struct variable **dependent_var, hinge = hinges[2]; if ( hinge != SYSMIS) - tab_float (tbl, n_cols - n_percentiles + j, + tab_double (tbl, n_cols - n_percentiles + j, heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, TAB_CENTER, hinge, - 8, 2 + NULL ); }