X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fmeans.c;h=8fa1ba81cd51f7326e228c6a2d2047a5804e74a1;hb=eff2701d5d282bfc0c1b52bd2af985705b11b307;hp=0ddadf8009de38a81a962f495120fad85b8a08e8;hpb=2c043e1bf0d36ae7b3900bf3c2b98196f67d5b7d;p=pspp diff --git a/src/language/stats/means.c b/src/language/stats/means.c index 0ddadf8009..8fa1ba81cd 100644 --- a/src/language/stats/means.c +++ b/src/language/stats/means.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -384,7 +384,7 @@ first_update (void *stat, double w UNUSED, double x) double *f = stat; if (*f == SYSMIS) - *f = x; + *f = x; } static double @@ -395,6 +395,13 @@ first_get (const struct per_var_data *pvd UNUSED, void *stat) return *f; } +enum + { + MEANS_MEAN = 0, + MEANS_N, + MEANS_STDDEV + }; + /* Table of cell_specs */ static const struct cell_spec cell_spec[] = { {N_("Mean"), "MEAN", NULL, NULL, arithmean_get}, @@ -434,23 +441,26 @@ struct summary }; +struct layer +{ + size_t n_factor_vars; + const struct variable **factor_vars; +}; + /* The thing parsed after TABLES= */ struct mtable { size_t n_dep_vars; const struct variable **dep_vars; - size_t n_interactions; + int n_layers; + struct layer *layers; + struct interaction **interactions; struct summary *summary; - size_t *n_factor_vars; - const struct variable ***factor_vars; - int ii; - int n_layers; - struct categoricals *cats; }; @@ -467,6 +477,8 @@ struct means /* Missing value class for dependent variables */ enum mv_class dep_exclude; + bool listwise_exclude; + /* an array indicating which statistics are to be calculated */ int *cells; @@ -482,75 +494,51 @@ static void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds); -/* Append all the variables belonging to layer and all subsequent layers - to iact. And then append iact to the means->interaction. - This is a recursive function. - */ -static void -iact_append_factor (struct mtable *means, int layer, - const struct interaction *iact) -{ - int v; - const struct variable **fv; - - if (layer >= means->n_layers) - return; - - fv = means->factor_vars[layer]; - - for (v = 0; v < means->n_factor_vars[layer]; ++v) - { - struct interaction *nexti = interaction_clone (iact); - interaction_add_variable (nexti, fv[v]); - - iact_append_factor (means, layer + 1, nexti); - - if (layer == means->n_layers - 1) - { - means->interactions[means->ii++] = nexti; - } - } -} static bool parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, struct mtable *table) { table->ii = 0; table->n_layers = 0; - table->factor_vars = NULL; - table->n_factor_vars = NULL; + table->layers = NULL; + table->interactions = NULL; /* Dependent variable (s) */ - if (!parse_variables_const (lexer, cmd->dict, - &table->dep_vars, &table->n_dep_vars, - PV_NO_DUPLICATE | PV_NUMERIC)) + if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict, + &table->dep_vars, &table->n_dep_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) return false; /* Factor variable (s) */ - while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) + while (lex_match (lexer, T_BY)) { - if (lex_match (lexer, T_BY)) - { - table->n_layers++; - table->factor_vars = - xrealloc (table->factor_vars, - sizeof (*table->factor_vars) * table->n_layers); - - table->n_factor_vars = - xrealloc (table->n_factor_vars, - sizeof (*table->n_factor_vars) * table->n_layers); - - if (!parse_variables_const (lexer, cmd->dict, - &table->factor_vars[table->n_layers - 1], - &table->n_factor_vars[table->n_layers - - 1], - PV_NO_DUPLICATE)) - return false; - - } + table->n_layers++; + table->layers = + pool_realloc (cmd->pool, table->layers, + sizeof (*table->layers) * table->n_layers); + + if (!parse_variables_const_pool + (lexer, cmd->pool, cmd->dict, + &table->layers[table->n_layers - 1].factor_vars, + &table->layers[table->n_layers - 1].n_factor_vars, + PV_NO_DUPLICATE)) + return false; } + /* There is always at least one layer. + However the final layer is the total, and not + normally considered by the user as a + layer. + */ + + table->n_layers++; + table->layers = + pool_realloc (cmd->pool, table->layers, + sizeof (*table->layers) * table->n_layers); + table->layers[table->n_layers - 1].factor_vars = NULL; + table->layers[table->n_layers - 1].n_factor_vars = 0; + return true; } @@ -583,15 +571,18 @@ cmd_means (struct lexer *lexer, struct dataset *ds) struct means means; bool more_tables = true; + means.pool = pool_create (); + means.exclude = MV_ANY; means.dep_exclude = MV_ANY; + means.listwise_exclude = false; means.table = NULL; means.n_tables = 0; means.dict = dataset_dict (ds); means.n_cells = 3; - means.cells = xcalloc (means.n_cells, sizeof (*means.cells)); + means.cells = pool_calloc (means.pool, means.n_cells, sizeof (*means.cells)); /* The first three items (MEAN, COUNT, STDDEV) are the default */ @@ -602,7 +593,8 @@ cmd_means (struct lexer *lexer, struct dataset *ds) /* Optional TABLES = */ if (lex_match_id (lexer, "TABLES")) { - lex_force_match (lexer, T_EQUALS); + if (! lex_force_match (lexer, T_EQUALS)) + goto error; } @@ -611,9 +603,9 @@ cmd_means (struct lexer *lexer, struct dataset *ds) while (more_tables) { means.n_tables ++; - means.table = xrealloc (means.table, means.n_tables * sizeof (*means.table)); + means.table = pool_realloc (means.pool, means.table, means.n_tables * sizeof (*means.table)); - if (! parse_means_table_syntax (lexer, &means, + if (! parse_means_table_syntax (lexer, &means, &means.table[means.n_tables - 1])) { goto error; @@ -626,7 +618,7 @@ cmd_means (struct lexer *lexer, struct dataset *ds) if (lex_is_variable (lexer, means.dict, 1) ) { more_tables = true; - lex_force_match (lexer, T_SLASH); + lex_match (lexer, T_SLASH); } } } @@ -639,10 +631,10 @@ cmd_means (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "MISSING")) { /* - If no MISSING subcommand is specified, each combination of - a dependent variable and categorical variables is handled - separately. - */ + If no MISSING subcommand is specified, each combination of + a dependent variable and categorical variables is handled + separately. + */ lex_match (lexer, T_EQUALS); if (lex_match_id (lexer, "INCLUDE")) { @@ -663,22 +655,21 @@ cmd_means (struct lexer *lexer, struct dataset *ds) be dropped FOR THAT TABLE ONLY. */ { - means.exclude = MV_ANY; - means.dep_exclude = MV_ANY; + means.listwise_exclude = true; } else if (lex_match_id (lexer, "DEPENDENT")) /* - Use the command "/MISSING=DEPENDENT" to - include user-missing values for the categorical variables, - while excluding them for the dependent variables. + Use the command "/MISSING=DEPENDENT" to + include user-missing values for the categorical variables, + while excluding them for the dependent variables. - Cases are dropped only when user-missing values - appear in dependent variables. User-missing - values for categorical variables are treated according to - their face value. + Cases are dropped only when user-missing values + appear in dependent variables. User-missing + values for categorical variables are treated according to + their face value. - Cases are ALWAYS dropped when System Missing values appear - in the categorical variables. + Cases are ALWAYS dropped when System Missing values appear + in the categorical variables. */ { means.dep_exclude = MV_ANY; @@ -699,17 +690,44 @@ cmd_means (struct lexer *lexer, struct dataset *ds) while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH) { - int k; - for (k = 0; k < n_C; ++k) + int k = 0; + if (lex_match (lexer, T_ALL)) { - if (lex_match_id (lexer, cell_spec[k].keyword)) - { - means.cells = - xrealloc (means.cells, - ++means.n_cells * sizeof (*means.cells)); + int x; + means.cells = + pool_realloc (means.pool, means.cells, + (means.n_cells += n_C) * sizeof (*means.cells)); - means.cells[means.n_cells - 1] = k; - break; + for (x = 0; x < n_C; ++x) + means.cells[means.n_cells - (n_C - 1 - x) - 1] = x; + } + else if (lex_match_id (lexer, "NONE")) + { + /* Do nothing */ + } + else if (lex_match_id (lexer, "DEFAULT")) + { + means.cells = + pool_realloc (means.pool, means.cells, + (means.n_cells += 3) * sizeof (*means.cells)); + + means.cells[means.n_cells - 2 - 1] = MEANS_MEAN; + means.cells[means.n_cells - 1 - 1] = MEANS_N; + means.cells[means.n_cells - 0 - 1] = MEANS_STDDEV; + } + else + { + for (; k < n_C; ++k) + { + if (lex_match_id (lexer, cell_spec[k].keyword)) + { + means.cells = + pool_realloc (means.pool, means.cells, + ++means.n_cells * sizeof (*means.cells)); + + means.cells[means.n_cells - 1] = k; + break; + } } } if (k >= n_C) @@ -726,33 +744,31 @@ cmd_means (struct lexer *lexer, struct dataset *ds) } } - means.pool = pool_create (); for (t = 0; t < means.n_tables; ++t) - { - struct mtable *table = &means.table[t]; - table->n_interactions = 1; - for (l = 0; l < table->n_layers; ++l) - { - const int n_vars = table->n_factor_vars[l]; - table->n_interactions *= n_vars; - } - - table->interactions = - xcalloc (table->n_interactions, sizeof (*table->interactions)); - - table->summary = - xcalloc (table->n_dep_vars * table->n_interactions, sizeof (*table->summary)); - + { + struct mtable *table = &means.table[t]; - if (table->n_layers > 0) - iact_append_factor (table, 0, interaction_create (NULL)); - else - table->interactions[0] = interaction_create (NULL); + table->interactions = + pool_calloc (means.pool, table->n_layers, sizeof (*table->interactions)); - } + table->summary = + pool_calloc (means.pool, table->n_dep_vars * table->n_layers, sizeof (*table->summary)); + for (l = 0; l < table->n_layers; ++l) + { + int v; + const struct layer *lyr = &table->layers[l]; + const int n_vars = lyr->n_factor_vars; + table->interactions[l] = interaction_create (NULL); + for (v = 0; v < n_vars ; ++v) + { + interaction_add_variable (table->interactions[l], + lyr->factor_vars[v]); + } + } + } { struct casegrouper *grouper; @@ -768,11 +784,34 @@ cmd_means (struct lexer *lexer, struct dataset *ds) ok = proc_commit (ds) && ok; } + for (t = 0; t < means.n_tables; ++t) + { + int l; + struct mtable *table = &means.table[t]; + if (table->interactions) + for (l = 0; l < table->n_layers; ++l) + { + interaction_destroy (table->interactions[l]); + } + } + pool_destroy (means.pool); return CMD_SUCCESS; -error: + error: + for (t = 0; t < means.n_tables; ++t) + { + int l; + struct mtable *table = &means.table[t]; + if (table->interactions) + for (l = 0; l < table->n_layers; ++l) + { + interaction_destroy (table->interactions[l]); + } + } + + pool_destroy (means.pool); return CMD_FAILURE; } @@ -807,23 +846,39 @@ struct per_cat_data bool warn; }; + +static void +destroy_n (const void *aux1 UNUSED, void *aux2, void *user_data) +{ + struct mtable *table = aux2; + int v; + struct per_cat_data *per_cat_data = user_data; + struct per_var_data *pvd = per_cat_data->pvd; + + for (v = 0; v < table->n_dep_vars; ++v) + { + struct per_var_data *pp = &pvd[v]; + moments1_destroy (pp->mom); + } +} + static void * create_n (const void *aux1, void *aux2) { int i, v; const struct means *means = aux1; struct mtable *table = aux2; - struct per_cat_data *per_cat_data = xmalloc (sizeof *per_cat_data); + struct per_cat_data *per_cat_data = pool_malloc (means->pool, sizeof *per_cat_data); - struct per_var_data *pvd = xcalloc (table->n_dep_vars, sizeof *pvd); + struct per_var_data *pvd = pool_calloc (means->pool, table->n_dep_vars, sizeof *pvd); for (v = 0; v < table->n_dep_vars; ++v) { enum moment maxmom = MOMENT_KURTOSIS; struct per_var_data *pp = &pvd[v]; - pp->cell_stats = xcalloc (means->n_cells, sizeof *pp->cell_stats); - + pp->cell_stats = pool_calloc (means->pool, means->n_cells, sizeof *pp->cell_stats); + for (i = 0; i < means->n_cells; ++i) { @@ -858,9 +913,10 @@ update_n (const void *aux1, void *aux2, void *user_data, const struct ccase *c, const double x = case_data (c, table->dep_vars[v])->f; - for (i = 0; i < table->n_interactions; ++i) + for (i = 0; i < table->n_layers; ++i) { - if ( is_missing (means, table->dep_vars[v], table->interactions[i], c)) + if ( is_missing (means, table->dep_vars[v], + table->interactions[i], c)) goto end; } @@ -905,12 +961,11 @@ calculate_n (const void *aux1, void *aux2, void *user_data) } } - static void run_means (struct means *cmd, struct casereader *input, const struct dataset *ds UNUSED) { - int i,t; + int t; const struct variable *wv = dict_get_weight (cmd->dict); struct ccase *c; struct casereader *reader; @@ -918,40 +973,48 @@ run_means (struct means *cmd, struct casereader *input, struct payload payload; payload.create = create_n; payload.update = update_n; - payload.destroy = calculate_n; - + payload.calculate = calculate_n; + payload.destroy = destroy_n; + for (t = 0; t < cmd->n_tables; ++t) - { - struct mtable *table = &cmd->table[t]; - table->cats - = categoricals_create (table->interactions, - table->n_interactions, wv, cmd->exclude); + { + struct mtable *table = &cmd->table[t]; + table->cats + = categoricals_create (table->interactions, + table->n_layers, wv, cmd->dep_exclude, cmd->exclude); - categoricals_set_payload (table->cats, &payload, cmd, table); - } + categoricals_set_payload (table->cats, &payload, cmd, table); + } - for (reader = casereader_clone (input); + for (reader = input; (c = casereader_read (reader)) != NULL; case_unref (c)) { for (t = 0; t < cmd->n_tables; ++t) { + bool something_missing = false; int v; struct mtable *table = &cmd->table[t]; for (v = 0; v < table->n_dep_vars; ++v) { int i; - for (i = 0; i < table->n_interactions; ++i) + for (i = 0; i < table->n_layers; ++i) { const bool missing = is_missing (cmd, table->dep_vars[v], table->interactions[i], c); if (missing) - table->summary[v * table->n_interactions + i].missing++; + { + something_missing = true; + table->summary[v * table->n_layers + i].missing++; + } else - table->summary[v * table->n_interactions + i].non_missing++; + table->summary[v * table->n_layers + i].non_missing++; } } + if ( something_missing && cmd->listwise_exclude) + continue; + categoricals_update (table->cats, c); } } @@ -967,20 +1030,22 @@ run_means (struct means *cmd, struct casereader *input, for (t = 0; t < cmd->n_tables; ++t) { + int i; const struct mtable *table = &cmd->table[t]; output_case_processing_summary (table); - for (i = 0; i < table->n_interactions; ++i) + for (i = 0; i < table->n_layers; ++i) { output_report (cmd, i, table); } - categoricals_destroy (table->cats); } + } + static void output_case_processing_summary (const struct mtable *table) { @@ -989,7 +1054,7 @@ output_case_processing_summary (const struct mtable *table) const int heading_rows = 3; struct tab_table *t; - const int nr = heading_rows + table->n_interactions * table->n_dep_vars; + const int nr = heading_rows + table->n_layers * table->n_dep_vars; const int nc = 7; t = tab_create (nc, nr); @@ -1026,9 +1091,9 @@ output_case_processing_summary (const struct mtable *table) { const struct variable *var = table->dep_vars[v]; const char *dv_name = var_to_string (var); - for (i = 0; i < table->n_interactions; ++i) + for (i = 0; i < table->n_layers; ++i) { - const int row = v * table->n_interactions + i; + const int row = v * table->n_layers + i; const struct interaction *iact = table->interactions[i]; casenumber n_total; @@ -1042,32 +1107,32 @@ output_case_processing_summary (const struct mtable *table) TAB_LEFT | TAT_TITLE, ds_cstr (&str)); - n_total = table->summary[row].missing + + n_total = table->summary[row].missing + table->summary[row].non_missing; tab_double (t, 1, row + heading_rows, - 0, table->summary[row].non_missing, &F_8_0); + 0, table->summary[row].non_missing, NULL, RC_INTEGER); tab_text_format (t, 2, row + heading_rows, - 0, _("%g%%"), + 0, _("%g%%"), table->summary[row].non_missing / (double) n_total * 100.0); tab_double (t, 3, row + heading_rows, - 0, table->summary[row].missing, &F_8_0); + 0, table->summary[row].missing, NULL, RC_INTEGER); tab_text_format (t, 4, row + heading_rows, - 0, _("%g%%"), + 0, _("%g%%"), table->summary[row].missing / (double) n_total * 100.0); tab_double (t, 5, row + heading_rows, - 0, table->summary[row].missing + - table->summary[row].non_missing, &F_8_0); + 0, table->summary[row].missing + + table->summary[row].non_missing, NULL, RC_INTEGER); tab_text_format (t, 6, row + heading_rows, - 0, _("%g%%"), + 0, _("%g%%"), n_total / (double) n_total * 100.0); @@ -1079,7 +1144,6 @@ output_case_processing_summary (const struct mtable *table) } - static void output_report (const struct means *cmd, int iact_idx, const struct mtable *table) @@ -1107,7 +1171,7 @@ output_report (const struct means *cmd, int iact_idx, tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1); tab_hline (t, TAL_2, 0, nc - 1, heading_rows); - tab_vline (t, TAL_2, iact->n_vars, 0, nr - 1); + tab_vline (t, TAL_2, heading_columns, 0, nr - 1); for (i = 0; i < iact->n_vars; ++i) { @@ -1134,7 +1198,7 @@ output_report (const struct means *cmd, int iact_idx, tab_text (t, 0, heading_rows + dv * n_cats, TAB_RIGHT | TAT_TITLE, - var_get_name (table->dep_vars[dv]) + var_to_string (table->dep_vars[dv]) ); if ( dv > 0) @@ -1174,7 +1238,7 @@ output_report (const struct means *cmd, int iact_idx, tab_double (t, heading_columns + i, heading_rows + grp + dv * n_cats, - 0, result, 0); + 0, result, NULL, RC_OTHER); } } }