X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fstats%2Fmeans-parser.c;fp=src%2Flanguage%2Fstats%2Fmeans-parser.c;h=73822ecce14c2588efbe7811c4fe278936ffde7a;hb=ad7ae8bb105c925b951e241fd6a3d1cb93d614a0;hp=0000000000000000000000000000000000000000;hpb=814952415c6322e9ab08376426fb99d851765e20;p=pspp diff --git a/src/language/stats/means-parser.c b/src/language/stats/means-parser.c new file mode 100644 index 0000000000..73822ecce1 --- /dev/null +++ b/src/language/stats/means-parser.c @@ -0,0 +1,313 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/case.h" +#include "data/casegrouper.h" +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/dictionary.h" +#include "data/format.h" +#include "data/variable.h" + +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" + +#include "libpspp/hmap.h" +#include "libpspp/bt.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" + +#include "means.h" + +/* Parse the /TABLES stanza of the command. */ +static bool +parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, + struct mtable *table) +{ + table->n_layers = 0; + table->layers = NULL; + + /* Dependent variable (s) */ + if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict, + &table->dep_vars, &table->n_dep_vars, + PV_NO_DUPLICATE | PV_NUMERIC)) + return false; + + + /* Factor variable (s) */ + while (lex_match (lexer, T_BY)) + { + struct layer *layer = xzalloc (sizeof *layer); + hmap_init (&layer->instances.map); + + table->n_layers++; + table->layers = xrealloc (table->layers, + table->n_layers * sizeof *table->layers); + + table->layers[table->n_layers - 1] = layer; + + if (!parse_variables_const_pool + (lexer, cmd->pool, cmd->dict, + &layer->factor_vars, + &layer->n_factor_vars, + PV_NO_DUPLICATE)) + return false; + } + + return true; +} + +/* Match a variable. + If the match succeeds, the variable will be placed in VAR. + Returns true if successful */ +static bool +lex_is_variable (struct lexer *lexer, const struct dictionary *dict, + int n) +{ + const char *tstr; + if (lex_next_token (lexer, n) != T_ID) + return false; + + tstr = lex_next_tokcstr (lexer, n); + + if (NULL == dict_lookup_var (dict, tstr) ) + return false; + + return true; +} + +static bool +means_parse (struct lexer *lexer, struct means *means) +{ + /* Optional TABLES = */ + if (lex_match_id (lexer, "TABLES")) + { + if (! lex_force_match (lexer, T_EQUALS)) + return false; + } + + bool more_tables = true; + /* Parse the "tables" */ + while (more_tables) + { + means->n_tables ++; + means->table = pool_realloc (means->pool, means->table, means->n_tables * sizeof (*means->table)); + + if (! parse_means_table_syntax (lexer, means, + &means->table[means->n_tables - 1])) + { + return false; + } + + /* Look ahead to see if there are more tables to be parsed */ + more_tables = false; + if ( T_SLASH == lex_next_token (lexer, 0) ) + { + if (lex_is_variable (lexer, means->dict, 1) ) + { + more_tables = true; + lex_match (lexer, T_SLASH); + } + } + } + + /* /MISSING subcommand */ + while (lex_token (lexer) != T_ENDCMD) + { + lex_match (lexer, T_SLASH); + + if (lex_match_id (lexer, "MISSING")) + { + /* + If no MISSING subcommand is specified, each combination of + a dependent variable and categorical variables is handled + separately. + */ + lex_match (lexer, T_EQUALS); + if (lex_match_id (lexer, "INCLUDE")) + { + /* + Use the subcommand "/MISSING=INCLUDE" to include user-missing + values in the analysis. + */ + + means->exclude = MV_SYSTEM; + means->dep_exclude = MV_SYSTEM; + } + else if (lex_match_id (lexer, "TABLE")) + /* + This is the default. (I think). + Every case containing a complete set of variables for a given + table. If any variable, categorical or dependent for in a table + is missing (as defined by what?), then that variable will + be dropped FOR THAT TABLE ONLY. + */ + { + means->listwise_exclude = true; + } + else if (lex_match_id (lexer, "DEPENDENT")) + /* + Use the command "/MISSING=DEPENDENT" to + include user-missing values for the categorical variables, + while excluding them for the dependent variables. + + Cases are dropped only when user-missing values + appear in dependent variables. User-missing + values for categorical variables are treated according to + their face value. + + Cases are ALWAYS dropped when System Missing values appear + in the categorical variables. + */ + { + means->dep_exclude = MV_ANY; + means->exclude = MV_SYSTEM; + } + else + { + lex_error (lexer, NULL); + return false; + } + } + else if (lex_match_id (lexer, "CELLS")) + { + lex_match (lexer, T_EQUALS); + + /* The default values become overwritten */ + means->n_statistics = 0; + free (means->statistics); + means->statistics = 0; + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match (lexer, T_ALL)) + { + free (means->statistics); + means->statistics = xcalloc (n_MEANS_STATISTICS, sizeof (*means->statistics)); + means->n_statistics = n_MEANS_STATISTICS; + int i; + for (i = 0; i < n_MEANS_STATISTICS; ++i) + { + means->statistics[i] = i; + } + } + else if (lex_match_id (lexer, "NONE")) + { + means->n_statistics = 0; + free (means->statistics); + means->statistics = 0; + } + else if (lex_match_id (lexer, "DEFAULT")) + { + means->n_statistics = 3; + means->statistics = xcalloc (3, sizeof *means->statistics); + means->statistics[0] = MEANS_MEAN; + means->statistics[1] = MEANS_N; + means->statistics[2] = MEANS_STDDEV; + } + else + { + int i; + for (i = 0; i < n_MEANS_STATISTICS; ++i) + { + const struct cell_spec *cs = cell_spec + i; + if (lex_match_id (lexer, cs->keyword)) + { + means->statistics + = xrealloc (means->statistics, + (means->n_statistics + 1) + * sizeof (*means->statistics)); + + means->statistics[means->n_statistics] = i; + means->n_statistics++; + break; + } + } + + if (i >= n_MEANS_STATISTICS) + { + lex_error (lexer, NULL); + return false; + } + } + } + } + else + { + lex_error (lexer, NULL); + return false; + } + } + return true; +} + +int +cmd_means (struct lexer *lexer, struct dataset *ds) +{ + struct means means; + means.pool = pool_create (); + + means.exclude = MV_ANY; + means.dep_exclude = MV_ANY; + means.listwise_exclude = false; + means.table = NULL; + means.n_tables = 0; + + means.dict = dataset_dict (ds); + + means.n_statistics = 3; + means.statistics = xcalloc (3, sizeof *means.statistics); + means.statistics[0] = MEANS_MEAN; + means.statistics[1] = MEANS_N; + means.statistics[2] = MEANS_STDDEV; + + if (! means_parse (lexer, &means)) + goto error; + + { + struct casegrouper *grouper; + struct casereader *group; + bool ok; + + grouper = casegrouper_create_splits (proc_open (ds), means.dict); + while (casegrouper_get_next_group (grouper, &group)) + { + run_means (&means, group, ds); + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + } + + for (int t = 0; t < means.n_tables; ++t) + { + const struct mtable *table = means.table + t; + means_shipout (table, &means); + } + + return CMD_SUCCESS; + + error: + + return CMD_FAILURE; +} + + + + +