1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2010, 2011, 2012 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/data-out.h"
20 #include "data/dataset.h"
21 #include "data/dictionary.h"
22 #include "data/mrset.h"
23 #include "data/value-labels.h"
24 #include "data/variable.h"
25 #include "language/command.h"
26 #include "language/lexer/lexer.h"
27 #include "language/lexer/variable-parser.h"
28 #include "libpspp/assertion.h"
29 #include "libpspp/hmap.h"
30 #include "libpspp/i18n.h"
31 #include "libpspp/message.h"
32 #include "libpspp/str.h"
33 #include "libpspp/stringi-map.h"
34 #include "libpspp/stringi-set.h"
35 #include "output/pivot-table.h"
37 #include "gl/xalloc.h"
40 #define N_(msgid) msgid
41 #define _(msgid) gettext (msgid)
43 static bool parse_group (struct lexer *, struct dictionary *, enum mrset_type);
44 static bool parse_delete (struct lexer *, struct dictionary *);
45 static bool parse_display (struct lexer *, struct dictionary *);
48 cmd_mrsets (struct lexer *lexer, struct dataset *ds)
50 struct dictionary *dict = dataset_dict (ds);
52 while (lex_match (lexer, T_SLASH))
56 if (lex_match_id (lexer, "MDGROUP"))
57 ok = parse_group (lexer, dict, MRSET_MD);
58 else if (lex_match_id (lexer, "MCGROUP"))
59 ok = parse_group (lexer, dict, MRSET_MC);
60 else if (lex_match_id (lexer, "DELETE"))
61 ok = parse_delete (lexer, dict);
62 else if (lex_match_id (lexer, "DISPLAY"))
63 ok = parse_display (lexer, dict);
67 lex_error_expecting (lexer, "MDGROUP", "MCGROUP",
79 parse_group (struct lexer *lexer, struct dictionary *dict,
82 const char *subcommand_name = type == MRSET_MD ? "MDGROUP" : "MCGROUP";
84 struct mrset *mrset = XZALLOC (struct mrset);
86 mrset->cat_source = MRSET_VARLABELS;
88 bool labelsource_varlabel = false;
89 bool has_value = false;
94 int labelsource_start = 0;
95 int labelsource_end = 0;
98 while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
100 if (lex_match_id (lexer, "NAME"))
102 if (!lex_force_match (lexer, T_EQUALS) || !lex_force_id (lexer))
104 char *error = mrset_is_valid_name__ (lex_tokcstr (lexer),
105 dict_get_encoding (dict));
108 lex_error (lexer, "%s", error);
114 mrset->name = xstrdup (lex_tokcstr (lexer));
117 else if (lex_match_id (lexer, "VARIABLES"))
119 if (!lex_force_match (lexer, T_EQUALS))
123 vars_start = lex_ofs (lexer);
124 if (!parse_variables (lexer, dict, &mrset->vars, &mrset->n_vars,
125 PV_SAME_TYPE | PV_NO_SCRATCH))
127 vars_end = lex_ofs (lexer) - 1;
129 if (mrset->n_vars < 2)
131 lex_ofs_error (lexer, vars_start, vars_end,
132 _("At least two variables are required."));
136 else if (lex_match_id (lexer, "LABEL"))
138 label_start = lex_ofs (lexer) - 1;
139 if (!lex_force_match (lexer, T_EQUALS) || !lex_force_string (lexer))
141 label_end = lex_ofs (lexer);
144 mrset->label = ss_xstrdup (lex_tokss (lexer));
147 else if (type == MRSET_MD && lex_match_id (lexer, "LABELSOURCE"))
149 if (!lex_force_match_phrase (lexer, "=VARLABEL"))
152 labelsource_varlabel = true;
153 labelsource_start = lex_ofs (lexer) - 3;
154 labelsource_end = lex_ofs (lexer) - 1;
156 else if (type == MRSET_MD && lex_match_id (lexer, "VALUE"))
158 if (!lex_force_match (lexer, T_EQUALS))
162 value_ofs = lex_ofs (lexer);
163 if (lex_is_number (lexer))
165 if (!lex_is_integer (lexer))
167 lex_error (lexer, _("Numeric VALUE must be an integer."));
170 value_destroy (&mrset->counted, mrset->width);
171 mrset->counted.f = lex_integer (lexer);
174 else if (lex_is_string (lexer))
179 s = recode_string (dict_get_encoding (dict), "UTF-8",
180 lex_tokcstr (lexer), -1);
183 /* Trim off trailing spaces, but don't trim the string until
184 it's empty because a width of 0 is a numeric type. */
185 while (width > 1 && s[width - 1] == ' ')
188 value_destroy (&mrset->counted, mrset->width);
189 value_init (&mrset->counted, width);
190 memcpy (mrset->counted.s, s, width);
191 mrset->width = width;
197 lex_error (lexer, _("Syntax error expecting integer or string."));
202 else if (type == MRSET_MD && lex_match_id (lexer, "CATEGORYLABELS"))
204 if (!lex_force_match (lexer, T_EQUALS))
207 if (lex_match_id (lexer, "VARLABELS"))
208 mrset->cat_source = MRSET_VARLABELS;
209 else if (lex_match_id (lexer, "COUNTEDVALUES"))
210 mrset->cat_source = MRSET_COUNTEDVALUES;
213 lex_error_expecting (lexer, "VARLABELS", "COUNTEDVALUES");
219 if (type == MRSET_MD)
220 lex_error_expecting (lexer, "NAME", "VARIABLES", "LABEL",
221 "LABELSOURCE", "VALUE", "CATEGORYLABELS");
223 lex_error_expecting (lexer, "NAME", "VARIABLES", "LABEL");
228 if (mrset->name == NULL)
230 lex_spec_missing (lexer, subcommand_name, "NAME");
233 else if (mrset->n_vars == 0)
235 lex_spec_missing (lexer, subcommand_name, "VARIABLES");
239 if (type == MRSET_MD)
241 /* Check that VALUE is specified and is valid for the VARIABLES. */
244 lex_spec_missing (lexer, subcommand_name, "VALUE");
248 if (var_is_alpha (mrset->vars[0]) != (mrset->width > 0))
250 msg (SE, _("VARIABLES and VALUE must have the same type."));
251 if (var_is_alpha (mrset->vars[0]))
252 lex_ofs_msg (lexer, SN, vars_start, vars_end,
253 _("These are string variables."));
255 lex_ofs_msg (lexer, SN, vars_start, vars_end,
256 _("These are numeric variables."));
257 if (mrset->width > 0)
258 lex_ofs_msg (lexer, SN, value_ofs, value_ofs,
259 _("This is a string value."));
261 lex_ofs_msg (lexer, SN, value_ofs, value_ofs,
262 _("This is a numeric value."));
265 if (var_is_alpha (mrset->vars[0]))
267 const struct variable *shortest_var = NULL;
268 int min_width = INT_MAX;
270 for (size_t i = 0; i < mrset->n_vars; i++)
272 int width = var_get_width (mrset->vars[i]);
273 if (width < min_width)
275 shortest_var = mrset->vars[i];
279 if (mrset->width > min_width)
281 msg (SE, _("The VALUE string must be no longer than the "
282 "narrowest variable in the group."));
283 lex_ofs_msg (lexer, SN, value_ofs, value_ofs,
284 _("The VALUE string is %d bytes long."),
286 lex_ofs_msg (lexer, SN, vars_start, vars_end,
287 _("Variable %s has a width of %d bytes."),
288 var_get_name (shortest_var), min_width);
293 /* Implement LABELSOURCE=VARLABEL. */
294 if (labelsource_varlabel)
296 if (mrset->cat_source != MRSET_COUNTEDVALUES)
297 lex_ofs_msg (lexer, SW, labelsource_start, labelsource_end,
298 _("MDGROUP subcommand for group %s specifies "
299 "LABELSOURCE=VARLABEL but not "
300 "CATEGORYLABELS=COUNTEDVALUES. "
301 "Ignoring LABELSOURCE."),
303 else if (mrset->label)
305 msg (SW, _("MDGROUP subcommand for group %s specifies both "
306 "LABEL and LABELSOURCE, but only one of these "
307 "subcommands may be used at a time. "
308 "Ignoring LABELSOURCE."),
310 lex_ofs_msg (lexer, SN, label_start, label_end,
311 _("Here is the %s setting."), "LABEL");
312 lex_ofs_msg (lexer, SN, labelsource_start, labelsource_end,
313 _("Here is the %s setting."), "LABELSOURCE");
317 mrset->label_from_var_label = true;
318 for (size_t i = 0; mrset->label == NULL && i < mrset->n_vars; i++)
320 const char *label = var_get_label (mrset->vars[i]);
323 mrset->label = xstrdup (label);
330 /* Warn if categories cannot be distinguished in output. */
331 if (mrset->cat_source == MRSET_VARLABELS)
333 struct stringi_map seen;
336 stringi_map_init (&seen);
337 for (i = 0; i < mrset->n_vars; i++)
339 const struct variable *var = mrset->vars[i];
340 const char *name = var_get_name (var);
341 const char *label = var_get_label (var);
344 const char *other_name = stringi_map_find (&seen, label);
346 if (other_name == NULL)
347 stringi_map_insert (&seen, label, name);
349 lex_ofs_msg (lexer, SW, vars_start, vars_end,
350 _("Variables %s and %s specified as part of "
351 "multiple dichotomy group %s have the same "
352 "variable label. Categories represented by "
353 "these variables will not be distinguishable "
355 other_name, name, mrset->name);
358 stringi_map_destroy (&seen);
362 struct stringi_map seen = STRINGI_MAP_INITIALIZER (seen);
363 for (size_t i = 0; i < mrset->n_vars; i++)
365 const struct variable *var = mrset->vars[i];
366 const char *name = var_get_name (var);
369 value_clone (&value, &mrset->counted, mrset->width);
370 value_resize (&value, mrset->width, var_get_width (var));
372 const struct val_labs *val_labs = var_get_value_labels (var);
373 const char *label = val_labs_find (val_labs, &value);
375 lex_ofs_msg (lexer, SW, vars_start, vars_end,
376 _("Variable %s specified as part of multiple "
377 "dichotomy group %s (which has "
378 "CATEGORYLABELS=COUNTEDVALUES) has no value "
379 "label for its counted value. This category "
380 "will not be distinguishable in output."),
384 const char *other_name = stringi_map_find (&seen, label);
386 if (other_name == NULL)
387 stringi_map_insert (&seen, label, name);
389 lex_ofs_msg (lexer, SW, vars_start, vars_end,
390 _("Variables %s and %s specified as part of "
391 "multiple dichotomy group %s (which has "
392 "CATEGORYLABELS=COUNTEDVALUES) have the same "
393 "value label for the group's counted "
394 "value. These categories will not be "
395 "distinguishable in output."),
396 other_name, name, mrset->name);
399 value_destroy (&value, var_get_width (var));
401 stringi_map_destroy (&seen);
406 /* Warn if categories cannot be distinguished in output. */
409 struct hmap_node hmap_node;
413 const char *var_name;
417 struct hmap categories = HMAP_INITIALIZER (categories);
418 for (size_t i = 0; i < mrset->n_vars; i++)
420 const struct variable *var = mrset->vars[i];
421 const char *name = var_get_name (var);
422 int width = var_get_width (var);
423 const struct val_labs *val_labs = var_get_value_labels (var);
425 const struct val_lab *vl;
426 for (vl = val_labs_first (val_labs); vl != NULL;
427 vl = val_labs_next (val_labs, vl))
429 const union value *value = val_lab_get_value (vl);
430 const char *label = val_lab_get_label (vl);
431 unsigned int hash = value_hash (value, width, 0);
434 HMAP_FOR_EACH_WITH_HASH (c, struct category, hmap_node,
437 if (width == c->width
438 && value_equal (value, &c->value, width))
440 if (!c->warned && utf8_strcasecmp (c->label, label))
442 char *s = data_out (value, var_get_encoding (var),
443 var_get_print_format (var),
444 settings_get_fmt_settings ());
446 lex_ofs_msg (lexer, SW, vars_start, vars_end,
447 _("Variables specified on MCGROUP should "
448 "have the same categories, but %s and "
449 "%s (and possibly others) in multiple "
450 "category group %s have different "
451 "value labels for value %s."),
452 c->var_name, name, mrset->name, s);
459 c = xmalloc (sizeof *c);
460 *c = (struct category) {
466 value_clone (&c->value, value, width);
467 hmap_insert (&categories, &c->hmap_node, hash);
473 struct category *c, *next;
474 HMAP_FOR_EACH_SAFE (c, next, struct category, hmap_node, &categories)
476 value_destroy (&c->value, c->width);
477 hmap_delete (&categories, &c->hmap_node);
480 hmap_destroy (&categories);
483 dict_add_mrset (dict, mrset);
487 mrset_destroy (mrset);
492 parse_mrset_names (struct lexer *lexer, struct dictionary *dict,
493 struct stringi_set *mrset_names)
495 if (!lex_force_match_phrase (lexer, "NAME="))
498 stringi_set_init (mrset_names);
499 if (lex_match (lexer, T_LBRACK))
501 while (!lex_match (lexer, T_RBRACK))
503 if (!lex_force_id (lexer))
505 if (dict_lookup_mrset (dict, lex_tokcstr (lexer)) == NULL)
507 lex_error (lexer, _("No multiple response set named %s."),
508 lex_tokcstr (lexer));
509 stringi_set_destroy (mrset_names);
512 stringi_set_insert (mrset_names, lex_tokcstr (lexer));
516 else if (lex_match (lexer, T_ALL))
518 size_t n_sets = dict_get_n_mrsets (dict);
521 for (i = 0; i < n_sets; i++)
522 stringi_set_insert (mrset_names, dict_get_mrset (dict, i)->name);
526 lex_error_expecting (lexer, "`['", "ALL");
534 parse_delete (struct lexer *lexer, struct dictionary *dict)
536 struct stringi_set mrset_names;
538 if (!parse_mrset_names (lexer, dict, &mrset_names))
541 const struct stringi_set_node *node;
542 STRINGI_SET_FOR_EACH (name, node, &mrset_names)
543 dict_delete_mrset (dict, name);
544 stringi_set_destroy (&mrset_names);
550 parse_display (struct lexer *lexer, struct dictionary *dict)
552 struct stringi_set mrset_names_set;
553 if (!parse_mrset_names (lexer, dict, &mrset_names_set))
556 size_t n = stringi_set_count (&mrset_names_set);
559 if (dict_get_n_mrsets (dict) == 0)
560 lex_next_msg (lexer, SN, -1, -1,
561 _("The active dataset dictionary does not contain any "
562 "multiple response sets."));
563 stringi_set_destroy (&mrset_names_set);
567 struct pivot_table *table = pivot_table_create (
568 N_("Multiple Response Sets"));
570 pivot_dimension_create (
571 table, PIVOT_AXIS_COLUMN, N_("Attributes"),
572 N_("Label"), N_("Encoding"), N_("Counted Value"), N_("Member Variables"));
574 struct pivot_dimension *mrsets = pivot_dimension_create (
575 table, PIVOT_AXIS_ROW, N_("Name"));
576 mrsets->root->show_label = true;
578 char **mrset_names = stringi_set_get_sorted_array (&mrset_names_set);
579 for (size_t i = 0; i < n; i++)
581 const struct mrset *mrset = dict_lookup_mrset (dict, mrset_names[i]);
583 int row = pivot_category_create_leaf (
584 mrsets->root, pivot_value_new_user_text (mrset->name, -1));
586 if (mrset->label != NULL)
587 pivot_table_put2 (table, 0, row,
588 pivot_value_new_user_text (mrset->label, -1));
590 pivot_table_put2 (table, 1, row,
591 pivot_value_new_text (mrset->type == MRSET_MD
595 if (mrset->type == MRSET_MD)
596 pivot_table_put2 (table, 2, row,
597 pivot_value_new_value (
598 &mrset->counted, mrset->width,
599 &F_8_0, dict_get_encoding (dict)));
601 /* Variable names. */
602 struct string var_names = DS_EMPTY_INITIALIZER;
603 for (size_t j = 0; j < mrset->n_vars; j++)
604 ds_put_format (&var_names, "%s\n", var_get_name (mrset->vars[j]));
605 ds_chomp_byte (&var_names, '\n');
606 pivot_table_put2 (table, 3, row,
607 pivot_value_new_user_text_nocopy (
608 ds_steal_cstr (&var_names)));
611 stringi_set_destroy (&mrset_names_set);
613 pivot_table_submit (table);