1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "data/case.h"
21 #include "data/casegrouper.h"
22 #include "data/casereader.h"
23 #include "data/dataset.h"
24 #include "data/dictionary.h"
25 #include "data/variable.h"
26 #include "language/command.h"
27 #include "language/lexer/lexer.h"
28 #include "language/lexer/variable-parser.h"
30 #include "math/categoricals.h"
31 #include "math/interaction.h"
33 #include "output/tab.h"
36 #define _(msgid) gettext (msgid)
37 #define N_(msgid) (msgid)
41 /* Printable title for output */
44 /* Keyword for syntax */
48 /* Table of cell_specs */
49 static const struct cell_spec cell_spec[] =
51 {N_("Means"), "MEANS"},
53 {N_("Std. Deviation"), "STDDEV"},
54 {N_("Median"), "MEDIAN"},
55 {N_("Group Median"), "GMEDIAN"},
56 {N_("S.E. Mean"), "SEMEAN"},
60 {N_("Range"), "RANGE"},
61 {N_("Variance"), "VARIANCE"},
62 {N_("Kurtosis"), "KURTOSIS"},
63 {N_("S.E. Kurt"), "SEKURT"},
64 {N_("Skewness"), "SKEW"},
65 {N_("S.E. Skew"), "SESKEW"},
66 {N_("First"), "FIRST"},
68 {N_("Percent N"), "NPCT"},
69 {N_("Percent Sum"), "SPCT"},
70 {N_("Harmonic Mean"), "HARMONIC"},
71 {N_("Geom. Mean"), "GEOMETRIC"}
74 #define n_C (sizeof (cell_spec) / sizeof (struct cell_spec))
79 const struct variable **dep_vars;
81 size_t n_interactions;
82 struct interaction **interactions;
84 size_t *n_factor_vars;
85 const struct variable ***factor_vars;
91 const struct dictionary *dict;
93 enum mv_class exclude;
95 /* an array indicating which statistics are to be calculated */
101 struct categoricals *cats;
106 run_means (struct means *cmd, struct casereader *input,
107 const struct dataset *ds);
109 /* Append all the variables belonging to layer and all subsequent layers
110 to iact. And then append iact to the means->interaction.
111 This is a recursive function.
114 iact_append_factor (struct means *means, int layer, const struct interaction *iact)
117 const struct variable **fv ;
119 if (layer >= means->n_layers)
122 fv = means->factor_vars[layer];
124 for (v = 0; v < means->n_factor_vars[layer]; ++v)
126 struct interaction *nexti = interaction_clone (iact);
128 interaction_add_variable (nexti, fv[v]);
130 iact_append_factor (means, layer + 1, nexti);
132 if (layer == means->n_layers - 1)
134 means->interactions[means->ii++] = nexti;
140 cmd_means (struct lexer *lexer, struct dataset *ds)
146 means.n_factor_vars = NULL;
147 means.factor_vars = NULL;
151 means.n_dep_vars = 0;
152 means.dict = dataset_dict (ds);
155 means.cells = xcalloc (means.n_cells, sizeof (*means.cells));
157 /* The first three items (MEANS, COUNT, STDDEV) are the default */
158 for (i = 0; i < 3 ; ++i)
162 /* Optional TABLES = */
163 if (lex_match_id (lexer, "TABLES"))
165 lex_force_match (lexer, T_EQUALS);
168 /* Dependent variable (s) */
169 if (!parse_variables_const (lexer, means.dict,
170 &means.dep_vars, &means.n_dep_vars,
171 PV_NO_DUPLICATE | PV_NUMERIC))
174 /* Factor variable (s) */
175 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
177 if (lex_match (lexer, T_BY))
181 xrealloc (means.factor_vars,
182 sizeof (*means.factor_vars) * means.n_layers);
183 means.n_factor_vars =
184 xrealloc (means.n_factor_vars,
185 sizeof (*means.n_factor_vars) * means.n_layers);
187 if (!parse_variables_const (lexer, means.dict,
188 &means.factor_vars[means.n_layers - 1],
189 &means.n_factor_vars[means.n_layers -
191 PV_NO_DUPLICATE | PV_NUMERIC))
197 /* /MISSING subcommand */
198 while (lex_token (lexer) != T_ENDCMD)
200 lex_match (lexer, T_SLASH);
202 if (lex_match_id (lexer, "MISSING"))
204 lex_match (lexer, T_EQUALS);
205 while (lex_token (lexer) != T_ENDCMD
206 && lex_token (lexer) != T_SLASH)
208 if (lex_match_id (lexer, "INCLUDE"))
210 means.exclude = MV_SYSTEM;
212 else if (lex_match_id (lexer, "EXCLUDE"))
214 means.exclude = MV_ANY;
218 lex_error (lexer, NULL);
223 else if (lex_match_id (lexer, "CELLS"))
225 lex_match (lexer, T_EQUALS);
227 /* The default values become overwritten */
229 while (lex_token (lexer) != T_ENDCMD
230 && lex_token (lexer) != T_SLASH)
233 for (k = 0; k < n_C; ++k)
235 if (lex_match_id (lexer, cell_spec[k].keyword))
238 xrealloc (means.cells,
239 ++means.n_cells * sizeof (*means.cells));
241 means.cells[means.n_cells - 1] = k;
247 lex_error (lexer, NULL);
254 lex_error (lexer, NULL);
260 means.n_interactions = 1;
261 for (l = 0; l < means.n_layers; ++l)
263 const int n_vars = means.n_factor_vars[l];
264 means.n_interactions *= n_vars;
268 xcalloc (means.n_interactions, sizeof (*means.interactions));
272 iact_append_factor (&means, 0, interaction_create (NULL));
275 struct casegrouper *grouper;
276 struct casereader *group;
279 grouper = casegrouper_create_splits (proc_open (ds), means.dict);
280 while (casegrouper_get_next_group (grouper, &group))
282 run_means (&means, group, ds);
284 ok = casegrouper_destroy (grouper);
285 ok = proc_commit (ds) && ok;
293 free (means.dep_vars);
298 static void output_case_processing_summary (const struct means *cmd);
299 static void output_report (const struct means *,
300 const struct interaction *);
303 run_means (struct means *cmd, struct casereader *input,
304 const struct dataset *ds)
307 const struct variable *wv = dict_get_weight (cmd->dict);
309 struct casereader *reader;
311 bool warn_bad_weight = true;
314 = categoricals_create (cmd->interactions,
315 cmd->n_interactions, wv, cmd->exclude, 0, 0, 0, 0);
318 for (reader = casereader_clone (input);
319 (c = casereader_read (reader)) != NULL; case_unref (c))
321 double weight = dict_get_case_weight (cmd->dict, c, &warn_bad_weight);
323 printf ("%g\n", case_data_idx (c, 0)->f);
324 categoricals_update (cmd->cats, c);
326 casereader_destroy (reader);
328 categoricals_done (cmd->cats);
330 output_case_processing_summary (cmd);
332 for (i = 0; i < cmd->n_interactions; ++i)
334 output_report (cmd, cmd->interactions[i]);
340 output_case_processing_summary (const struct means *cmd)
343 const int heading_columns = 1;
344 const int heading_rows = 3;
347 const int nr = heading_rows + cmd->n_interactions;
350 t = tab_create (nc, nr);
351 tab_title (t, _("Case Processing Summary"));
353 tab_headers (t, heading_columns, 0, heading_rows, 0);
355 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
357 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
358 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
361 tab_joint_text (t, heading_columns, 0,
362 nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
364 tab_joint_text (t, 1, 1, 2, 1, TAB_CENTER | TAT_TITLE, _("Included"));
365 tab_joint_text (t, 3, 1, 4, 1, TAB_CENTER | TAT_TITLE, _("Excluded"));
366 tab_joint_text (t, 5, 1, 6, 1, TAB_CENTER | TAT_TITLE, _("Total"));
368 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
369 tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
372 for (i = 0; i < 3; ++i)
374 tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
376 tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
380 for (i = 0; i < cmd->n_interactions; ++i)
382 const struct interaction *iact = cmd->interactions[i];
385 ds_init_empty (&str);
386 interaction_to_string (iact, &str);
388 size_t n = categoricals_n_count (cmd->cats, i);
390 tab_text (t, 0, i + heading_rows, TAB_LEFT | TAT_TITLE, ds_cstr (&str));
392 printf ("Count %d is %d\n", i, n);
404 output_report (const struct means *cmd, const struct interaction *iact)
407 const int heading_columns = 0;
408 const int heading_rows = 1;
412 const int nc = heading_columns + iact->n_vars + cmd->n_cells;
415 t = tab_create (nc, nr);
416 tab_title (t, _("Report"));
418 tab_headers (t, heading_columns, 0, heading_rows, 0);
420 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
422 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
423 tab_vline (t, TAL_2, iact->n_vars, 0, nr - 1);
425 for (i = 0; i < iact->n_vars; ++i)
427 tab_text (t, heading_columns + i, 0, TAB_CENTER | TAT_TITLE,
428 var_to_string (iact->vars[i]));
431 for (i = 0; i < cmd->n_cells; ++i)
433 tab_text (t, heading_columns + iact->n_vars + i, 0,
434 TAB_CENTER | TAT_TITLE,
435 gettext (cell_spec[cmd->cells[i]].title));
438 tab_text (t, heading_columns + 1, 5, TAB_CENTER | TAT_TITLE, "data");