6bbae8938579aea56b23d1e2be216b323b3a8601
[pspp] / src / language / stats / means-parser.c
1 /* PSPP - a program for statistical analysis.
2    Copyright (C) 2011, 2012, 2013, 2019 Free Software Foundation, Inc.
3
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation, either version 3 of the License, or
7    (at your option) any later version.
8
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
16
17 #include <config.h>
18
19 #include "data/case.h"
20 #include "data/casegrouper.h"
21 #include "data/casereader.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/variable.h"
26
27 #include "language/lexer/lexer.h"
28 #include "language/lexer/variable-parser.h"
29
30 #include "libpspp/pool.h"
31
32 #include "means.h"
33
34 /* Parse the /TABLES stanza of the command.  */
35 static bool
36 parse_means_table_syntax (struct lexer *lexer, const struct means *cmd,
37                           struct mtable *table)
38 {
39   memset (table, 0, sizeof *table);
40
41   /* Dependent variable (s) */
42   if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict,
43                                    &table->dep_vars, &table->n_dep_vars,
44                                    PV_NO_DUPLICATE | PV_NUMERIC))
45     return false;
46
47   /* Factor variable (s) */
48   while (lex_match (lexer, T_BY))
49     {
50       struct layer *layer = pool_zalloc (cmd->pool, sizeof *layer);
51
52       table->layers =
53         pool_nrealloc (cmd->pool, table->layers, table->n_layers + 1,
54                        sizeof *table->layers);
55       table->layers[table->n_layers] = layer;
56       table->n_layers++;
57
58       if (!parse_variables_const_pool
59           (lexer, cmd->pool, cmd->dict,
60            &layer->factor_vars,
61            &layer->n_factor_vars,
62            PV_NO_DUPLICATE))
63         return false;
64     }
65
66   return true;
67 }
68
69 /* Match a variable.
70    If the match succeeds, the variable will be placed in VAR.
71    Returns true if successful */
72 static bool
73 lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
74                  int n)
75 {
76   const char *tstr;
77   if (lex_next_token (lexer, n) !=  T_ID)
78     return false;
79
80   tstr = lex_next_tokcstr (lexer, n);
81
82   if (NULL == dict_lookup_var (dict, tstr))
83     return false;
84
85   return true;
86 }
87
88 bool
89 means_parse (struct lexer *lexer, struct means *means)
90 {
91   /*   Optional TABLES =   */
92   if (lex_match_id (lexer, "TABLES"))
93     {
94       if (! lex_force_match (lexer, T_EQUALS))
95         return false;
96     }
97
98   bool more_tables = true;
99   /* Parse the "tables" */
100   while (more_tables)
101     {
102       means->table = pool_realloc (means->pool, means->table,
103                                    (means->n_tables + 1) * sizeof (*means->table));
104
105       if (! parse_means_table_syntax (lexer, means,
106                                       &means->table[means->n_tables]))
107         {
108           return false;
109         }
110       means->n_tables ++;
111
112       /* Look ahead to see if there are more tables to be parsed */
113       more_tables = false;
114       if (T_SLASH == lex_next_token (lexer, 0))
115         {
116           if (lex_is_variable (lexer, means->dict, 1))
117             {
118               more_tables = true;
119               lex_match (lexer, T_SLASH);
120             }
121         }
122     }
123
124   /* /MISSING subcommand */
125   while (lex_token (lexer) != T_ENDCMD)
126     {
127       lex_match (lexer, T_SLASH);
128
129       if (lex_match_id (lexer, "MISSING"))
130         {
131           /*
132             If no MISSING subcommand is specified, each combination of
133             a dependent variable and categorical variables is handled
134             separately.
135           */
136           lex_match (lexer, T_EQUALS);
137           if (lex_match_id (lexer, "INCLUDE"))
138             {
139               /*
140                 Use the subcommand  "/MISSING=INCLUDE" to include user-missing
141                 values in the analysis.
142               */
143
144               means->ctrl_exclude = MV_SYSTEM;
145               means->dep_exclude = MV_SYSTEM;
146             }
147           else if (lex_match_id (lexer, "DEPENDENT"))
148             /*
149               Use the command "/MISSING=DEPENDENT" to
150               include user-missing values for the categorical variables,
151               while excluding them for the dependent variables.
152
153               Cases are dropped only when user-missing values
154               appear in dependent  variables.  User-missing
155               values for categorical variables are treated according to
156               their face value.
157
158               Cases are ALWAYS dropped when System Missing values appear
159               in the categorical variables.
160             */
161             {
162               means->dep_exclude = MV_ANY;
163               means->ctrl_exclude = MV_SYSTEM;
164             }
165           else
166             {
167               lex_error (lexer, NULL);
168               return false;
169             }
170         }
171       else if (lex_match_id (lexer, "CELLS"))
172         {
173           lex_match (lexer, T_EQUALS);
174
175           /* The default values become overwritten */
176           means->n_statistics = 0;
177           pool_free (means->pool, means->statistics);
178           means->statistics = 0;
179           while (lex_token (lexer) != T_ENDCMD
180                  && lex_token (lexer) != T_SLASH)
181             {
182               if (lex_match (lexer, T_ALL))
183                 {
184                   pool_free (means->pool, means->statistics);
185                   means->statistics = pool_calloc (means->pool,
186                                                    n_MEANS_STATISTICS,
187                                                    sizeof (*means->statistics));
188                   means->n_statistics = n_MEANS_STATISTICS;
189                   int i;
190                   for (i = 0; i < n_MEANS_STATISTICS; ++i)
191                     {
192                       means->statistics[i] = i;
193                     }
194                 }
195               else if (lex_match_id (lexer, "NONE"))
196                 {
197                   means->n_statistics = 0;
198                   pool_free (means->pool, means->statistics);
199                   means->statistics = 0;
200                 }
201               else if (lex_match_id (lexer, "DEFAULT"))
202                 {
203                   pool_free (means->pool, means->statistics);
204                   means->statistics = pool_calloc (means->pool,
205                                                    3,
206                                                    sizeof *means->statistics);
207                   means->statistics[0] = MEANS_MEAN;
208                   means->statistics[1] = MEANS_N;
209                   means->statistics[2] = MEANS_STDDEV;
210                 }
211               else
212                 {
213                   int i;
214                   for (i = 0; i < n_MEANS_STATISTICS; ++i)
215                     {
216                       const struct cell_spec *cs = cell_spec + i;
217                       if (lex_match_id (lexer, cs->keyword))
218                         {
219                           means->statistics
220                             = pool_realloc (means->pool,
221                                            means->statistics,
222                                            (means->n_statistics + 1)
223                                            * sizeof (*means->statistics));
224
225                           means->statistics[means->n_statistics] = i;
226                           means->n_statistics++;
227                           break;
228                         }
229                     }
230
231                   if (i >= n_MEANS_STATISTICS)
232                     {
233                       lex_error (lexer, NULL);
234                       return false;
235                     }
236                 }
237             }
238         }
239       else
240         {
241           lex_error (lexer, NULL);
242           return false;
243         }
244     }
245   return true;
246 }