X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fexamine.q;h=4a7f1f031562d948bb5c03d11f35bf7f82bb9b10;hb=c6fe58a22249f4f486b42f35fd8bd537c91e8e6e;hp=e341b18a352f294298a4fbb004eb9c2672412fd6;hpb=015e221b0f8578afee769528572c76387f26c629;p=pspp-builds.git diff --git a/src/language/stats/examine.q b/src/language/stats/examine.q index e341b18a..4a7f1f03 100644 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2008 Free Software Foundation, Inc. + Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -34,9 +34,9 @@ #include #include #include -#include #include #include +#include #include #include #include @@ -122,6 +122,9 @@ struct factor_metrics /* Sum of all weights, including those for missing values */ double n; + /* Sum of weights of non_missing values */ + double n_valid; + double mean; double variance; @@ -195,7 +198,7 @@ factor_destroy (struct xfactor *fctr) } static struct xfactor level0_factor; -static struct ll_list factor_list = LL_INITIALIZER (factor_list); +static struct ll_list factor_list; /* Parse the clause specifying the factors */ static int examine_parse_independent_vars (struct lexer *lexer, @@ -204,6 +207,7 @@ static int examine_parse_independent_vars (struct lexer *lexer, /* Output functions */ static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, const struct xfactor *f); @@ -228,7 +232,7 @@ static void show_extremes (const struct variable **dependent_var, static void run_examine (struct cmd_examine *, struct casereader *, struct dataset *); -static void output_examine (void); +static void output_examine (const struct dictionary *dict); void factor_calc (const struct ccase *c, int case_no, @@ -265,6 +269,8 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) subc_list_double_create (&percentile_list); percentile_algorithm = PC_HAVERAGE; + ll_init (&factor_list); + if ( !parse_examine (lexer, ds, &cmd, NULL) ) { subc_list_double_destroy (&percentile_list); @@ -369,14 +375,14 @@ np_plot (struct np *np, const char *label) chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5); { - struct ccase c; struct casereader *reader = casewriter_make_reader (np->writer); - while (casereader_read (reader, &c)) + struct ccase *c; + while ((c = casereader_read (reader)) != NULL) { - chart_datum (np_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_NS)->f); - chart_datum (dnp_chart, 0, case_data_idx (&c, NP_IDX_Y)->f, case_data_idx (&c, NP_IDX_DNS)->f); + chart_datum (np_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_NS)->f); + chart_datum (dnp_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_DNS)->f); - case_destroy (&c); + case_unref (c); } casereader_destroy (reader); } @@ -623,11 +629,11 @@ show_boxplot_variables (const struct variable **dependent_var, /* Show all the appropriate tables */ static void -output_examine (void) +output_examine (const struct dictionary *dict) { struct ll *ll; - show_summary (dependent_vars, n_dependent_vars, &level0_factor); + show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor); if ( cmd.a_statistics[XMN_ST_EXTREME] ) show_extremes (dependent_vars, n_dependent_vars, &level0_factor); @@ -654,7 +660,7 @@ output_examine (void) ll != ll_null (&factor_list); ll = ll_next (ll)) { struct xfactor *factor = ll_data (ll, struct xfactor, ll); - show_summary (dependent_vars, n_dependent_vars, factor); + show_summary (dependent_vars, n_dependent_vars, dict, factor); if ( cmd.a_statistics[XMN_ST_EXTREME] ) show_extremes (dependent_vars, n_dependent_vars, factor); @@ -866,7 +872,7 @@ static void examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, const struct dictionary *dict, struct xfactor *factor) { - struct ccase c; + struct ccase *c; const struct variable *wv = dict_get_weight (dict); int v; int n_extrema = 1; @@ -878,20 +884,21 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, n_extrema = cmd->st_n; - if (casereader_peek (reader, 0, &c)) + c = casereader_peek (reader, 0); + if (c != NULL) { if ( level > 0) { result->value[0] = - value_dup (case_data (&c, factor->indep_var[0]), + value_dup (case_data (c, factor->indep_var[0]), var_get_width (factor->indep_var[0])); if ( level > 1) result->value[1] = - value_dup (case_data (&c, factor->indep_var[1]), + value_dup (case_data (c, factor->indep_var[1]), var_get_width (factor->indep_var[1])); } - case_destroy (&c); + case_unref (c); } for (v = 0; v < n_dependent_vars; ++v) @@ -911,11 +918,11 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, { /* In this case, we need to sort the data, so we create a sorting casewriter */ - struct case_ordering *up_ordering = case_ordering_create (); - - case_ordering_add_var (up_ordering, dependent_vars[v], SRT_ASCEND); - writer = sort_create_writer (up_ordering, + struct subcase up_ordering; + subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND); + writer = sort_create_writer (&up_ordering, casereader_get_value_cnt (reader)); + subcase_destroy (&up_ordering); } else { @@ -927,33 +934,37 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, /* Sort or just iterate, whilst calculating moments etc */ - while (casereader_read (input, &c)) + while ((c = casereader_read (input)) != NULL) { const casenumber loc = - case_data_idx (&c, casereader_get_value_cnt (reader) - 1)->f; + case_data_idx (c, casereader_get_value_cnt (reader) - 1)->f; - const double weight = wv ? case_data (&c, wv)->f : 1.0; + const double weight = wv ? case_data (c, wv)->f : 1.0; + const union value *value = case_data (c, dependent_vars[v]); if (weight != SYSMIS) minimize (&result->metrics[v].cmin, weight); moments1_add (result->metrics[v].moments, - case_data (&c, dependent_vars[v])->f, + value->f, weight); result->metrics[v].n += weight; + if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) ) + result->metrics[v].n_valid += weight; + extrema_add (result->metrics[v].maxima, - case_data (&c, dependent_vars[v])->f, + value->f, weight, loc); extrema_add (result->metrics[v].minima, - case_data (&c, dependent_vars[v])->f, + value->f, weight, loc); - casewriter_write (writer, &c); + casewriter_write (writer, c); } casereader_destroy (input); result->metrics[v].up_reader = casewriter_make_reader (writer); @@ -984,7 +995,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, for (i = 0 ; i < metric->n_ptiles; ++i) { metric->ptl[i] = (struct percentile *) - percentile_create (percentile_list.data[i] / 100.0, metric->n); + percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); if ( percentile_list.data[i] == 25) metric->quartiles[0] = metric->ptl[i]; @@ -994,8 +1005,8 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, metric->quartiles[2] = metric->ptl[i]; } - metric->tukey_hinges = tukey_hinges_create (metric->n, metric->cmin); - metric->trimmed_mean = trimmed_mean_create (metric->n, 0.05); + metric->tukey_hinges = tukey_hinges_create (metric->n_valid, metric->cmin); + metric->trimmed_mean = trimmed_mean_create (metric->n_valid, 0.05); n_os = metric->n_ptiles + 2; @@ -1028,7 +1039,7 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, /* FIXME: Do this in the above loop */ if ( cmd->a_plot[XMN_PLT_HISTOGRAM] ) { - struct ccase c; + struct ccase *c; struct casereader *input = casereader_clone (reader); for (v = 0; v < n_dependent_vars; ++v) @@ -1059,18 +1070,18 @@ examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, metric->histogram = histogram_create (10, min->value, max->value); } - while (casereader_read (input, &c)) + while ((c = casereader_read (input)) != NULL) { - const double weight = wv ? case_data (&c, wv)->f : 1.0; + const double weight = wv ? case_data (c, wv)->f : 1.0; for (v = 0; v < n_dependent_vars; ++v) { struct factor_metrics *metric = &result->metrics[v]; if ( metric->histogram) histogram_add ((struct histogram *) metric->histogram, - case_data (&c, dependent_vars[v])->f, weight); + case_data (c, dependent_vars[v])->f, weight); } - case_destroy (&c); + case_unref (c); } casereader_destroy (input); } @@ -1106,17 +1117,18 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, { struct ll *ll; const struct dictionary *dict = dataset_dict (ds); - struct ccase c; + struct ccase *c; struct casereader *level0 = casereader_clone (input); - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); ll_init (&level0_factor.result_list); @@ -1131,15 +1143,10 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, struct casereader *group = NULL; struct casereader *level1; struct casegrouper *grouper1 = NULL; - struct case_ordering *ordering1 = case_ordering_create (); - case_ordering_add_var (ordering1, factor->indep_var[0], SRT_ASCEND); level1 = casereader_clone (input); - - level1 = sort_execute (level1, - case_ordering_clone (ordering1)); - grouper1 = casegrouper_create_case_ordering (level1, ordering1); - case_ordering_destroy (ordering1); + level1 = sort_execute_1var (level1, factor->indep_var[0]); + grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1); while (casegrouper_get_next_group (grouper1, &group)) { @@ -1152,16 +1159,12 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, int n_groups = 0; struct casereader *group2 = NULL; struct casegrouper *grouper2 = NULL; - struct case_ordering *ordering2 = case_ordering_create (); - case_ordering_add_var (ordering2, - factor->indep_var[1], SRT_ASCEND); - group_copy = sort_execute (group_copy, - case_ordering_clone (ordering2)); - grouper2 = - casegrouper_create_case_ordering (group_copy, ordering2); + group_copy = sort_execute_1var (group_copy, + factor->indep_var[1]); - case_ordering_destroy (ordering2); + grouper2 = casegrouper_create_vars (group_copy, + &factor->indep_var[1], 1); while (casegrouper_get_next_group (grouper2, &group2)) { @@ -1178,7 +1181,7 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, casereader_destroy (input); - output_examine (); + output_examine (dict); factor_destroy (&level0_factor); @@ -1198,8 +1201,12 @@ run_examine (struct cmd_examine *cmd, struct casereader *input, static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, const struct xfactor *fctr) { + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + static const char *subtitle[]= { N_("Valid"), @@ -1236,7 +1243,7 @@ show_summary (const struct variable **dependent_var, int n_dep_var, tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions); + tab_dim (tbl, tab_natural_dimensions, NULL); /* Outline the box */ tab_box (tbl, @@ -1337,8 +1344,8 @@ show_summary (const struct variable **dependent_var, int n_dep_var, { if ( last_value == NULL || - compare_values (last_value, result->value[0], - fctr->indep_var[0])) + compare_values_short (last_value, result->value[0], + fctr->indep_var[0])) { struct string str; @@ -1391,10 +1398,10 @@ show_summary (const struct variable **dependent_var, int n_dep_var, result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ; /* Total Valid */ - tab_float (tbl, heading_columns, + tab_double (tbl, heading_columns, heading_rows + j + v * ll_count (&fctr->result_list), TAB_LEFT, - n, 8, 0); + n, wfmt); tab_text (tbl, heading_columns + 1, heading_rows + j + v * ll_count (&fctr->result_list), @@ -1402,11 +1409,11 @@ show_summary (const struct variable **dependent_var, int n_dep_var, "%g%%", n * 100.0 / result->metrics[v].n); /* Total Missing */ - tab_float (tbl, heading_columns + 2, + tab_double (tbl, heading_columns + 2, heading_rows + j + v * ll_count (&fctr->result_list), TAB_LEFT, result->metrics[v].n - n, - 8, 0); + wfmt); tab_text (tbl, heading_columns + 3, heading_rows + j + v * ll_count (&fctr->result_list), @@ -1416,11 +1423,11 @@ show_summary (const struct variable **dependent_var, int n_dep_var, ); /* Total Valid + Missing */ - tab_float (tbl, heading_columns + 4, + tab_double (tbl, heading_columns + 4, heading_rows + j + v * ll_count (&fctr->result_list), TAB_LEFT, result->metrics[v].n, - 8, 0); + wfmt); tab_text (tbl, heading_columns + 5, heading_rows + j + v * ll_count (&fctr->result_list), @@ -1473,7 +1480,7 @@ show_descriptives (const struct variable **dependent_var, tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions); + tab_dim (tbl, tab_natural_dimensions, NULL); /* Outline the box */ tab_box (tbl, @@ -1619,93 +1626,93 @@ show_descriptives (const struct variable **dependent_var, /* Now the statistics ... */ - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].mean, - 8, 2); + NULL); - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].se_mean, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].mean - t * result->metrics[v].se_mean, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].mean + t * result->metrics[v].se_mean, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, TAB_CENTER, trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, TAB_CENTER, percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm), - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].variance, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, TAB_CENTER, sqrt (result->metrics[v].variance), - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, TAB_CENTER, percentile_calculate (result->metrics[v].quartiles[2], percentile_algorithm) - percentile_calculate (result->metrics[v].quartiles[0], percentile_algorithm), - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].skewness, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, TAB_CENTER, result->metrics[v].kurtosis, - 8, 3); + NULL); - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, TAB_CENTER, calc_seskew (result->metrics[v].n), - 8, 3); + NULL); - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, TAB_CENTER, calc_sekurt (result->metrics[v].n), - 8, 3); + NULL); { struct extremum *minimum, *maximum ; @@ -1716,23 +1723,23 @@ show_descriptives (const struct variable **dependent_var, maximum = ll_data (max_ll, struct extremum, ll); minimum = ll_data (min_ll, struct extremum, ll); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, TAB_CENTER, minimum->value, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, TAB_CENTER, maximum->value, - 8, 3); + NULL); - tab_float (tbl, n_cols - 2, + tab_double (tbl, n_cols - 2, heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, TAB_CENTER, maximum->value - minimum->value, - 8, 3); + NULL); } } } @@ -1786,7 +1793,7 @@ show_extremes (const struct variable **dependent_var, tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions); + tab_dim (tbl, tab_natural_dimensions, NULL); /* Outline the box */ tab_box (tbl, @@ -1859,18 +1866,19 @@ show_extremes (const struct variable **dependent_var, while (weight-- > 0 && e < cmd.st_n) { - tab_float (tbl, n_cols - 1, + tab_double (tbl, n_cols - 1, heading_rows + row_var_start + row_result_start + cmd.st_n + e, TAB_RIGHT, minimum->value, - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, - heading_rows + row_var_start + row_result_start + cmd.st_n + e, + tab_fixed (tbl, n_cols - 2, + heading_rows + row_var_start + + row_result_start + cmd.st_n + e, TAB_RIGHT, minimum->location, - 8, 0); + 10, 0); ++e; } @@ -1886,18 +1894,20 @@ show_extremes (const struct variable **dependent_var, while (weight-- > 0 && e < cmd.st_n) { - tab_float (tbl, n_cols - 1, - heading_rows + row_var_start + row_result_start + e, + tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + + row_result_start + e, TAB_RIGHT, maximum->value, - 8, 2); + NULL); - tab_float (tbl, n_cols - 2, - heading_rows + row_var_start + row_result_start + e, + tab_fixed (tbl, n_cols - 2, + heading_rows + row_var_start + + row_result_start + e, TAB_RIGHT, maximum->location, - 8, 0); + 10, 0); ++e; } @@ -1988,7 +1998,7 @@ show_percentiles (const struct variable **dependent_var, tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); - tab_dim (tbl, tab_natural_dimensions); + tab_dim (tbl, tab_natural_dimensions, NULL); /* Outline the box */ tab_box (tbl, @@ -2074,12 +2084,12 @@ show_percentiles (const struct variable **dependent_var, for (j = 0; j < n_percentiles; ++j) { double hinge = SYSMIS; - tab_float (tbl, n_cols - n_percentiles + j, + tab_double (tbl, n_cols - n_percentiles + j, heading_rows + row_var_start + i * PERCENTILE_ROWS, TAB_CENTER, percentile_calculate (result->metrics[v].ptl[j], percentile_algorithm), - 8, 2 + NULL ); if ( result->metrics[v].ptl[j]->ptile == 0.5) @@ -2090,11 +2100,11 @@ show_percentiles (const struct variable **dependent_var, hinge = hinges[2]; if ( hinge != SYSMIS) - tab_float (tbl, n_cols - n_percentiles + j, + tab_double (tbl, n_cols - n_percentiles + j, heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, TAB_CENTER, hinge, - 8, 2 + NULL ); }