1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2017 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/case.h"
20 #include "data/casereader.h"
21 #include "data/casewriter.h"
22 #include "data/dataset.h"
23 #include "data/dictionary.h"
24 #include "data/format.h"
25 #include "data/transformations.h"
26 #include "data/variable.h"
27 #include "language/command.h"
28 #include "language/data-io/data-parser.h"
29 #include "language/data-io/data-reader.h"
30 #include "language/data-io/file-handle.h"
31 #include "language/data-io/inpt-pgm.h"
32 #include "language/data-io/placement-parser.h"
33 #include "language/lexer/lexer.h"
34 #include "language/lexer/variable-parser.h"
35 #include "libpspp/i18n.h"
36 #include "libpspp/message.h"
39 #include "gl/xalloc.h"
42 #define _(msgid) gettext (msgid)
44 /* DATA LIST transformation data. */
47 struct data_parser *parser; /* Parser. */
48 struct dfm_reader *reader; /* Data file reader. */
49 struct variable *end; /* Variable specified on END subcommand. */
52 static trns_free_func data_list_trns_free;
53 static trns_proc_func data_list_trns_proc;
70 enum triangle triangle;
71 enum diagonal diagonal;
72 const struct variable *rowtype;
73 const struct variable *varname;
74 int n_continuous_vars;
78 valid rowtype_ values:
95 /* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable
96 to the string STR. VAR must be of type string.
99 set_varname_column (struct ccase *outcase, const struct matrix_format *mformat,
100 const char *str, int len)
102 const struct variable *var = mformat->varname;
103 uint8_t *s = value_str_rw (case_data_rw (outcase, var), len);
105 strncpy ((char *) s, str, len);
109 static struct casereader *
110 preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux)
112 struct matrix_format *mformat = aux;
113 const struct caseproto *proto = casereader_get_proto (casereader0);
114 struct casewriter *writer;
115 writer = autopaging_writer_create (proto);
117 double *temp_matrix =
118 xcalloc (sizeof (*temp_matrix),
119 mformat->n_continuous_vars * mformat->n_continuous_vars);
121 /* Make an initial pass to populate our temporary matrix */
122 struct casereader *pass0 = casereader_clone (casereader0);
124 int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0;
125 for (; (c = casereader_read (pass0)) != NULL; case_unref (c))
127 int c_offset = (mformat->triangle == UPPER) ? row : 0;
128 if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL)
130 const union value *v = case_data (c, mformat->rowtype);
131 const char *val = (const char *) value_str (v, 8);
132 if (0 == strncasecmp (val, "corr ", 8) ||
133 0 == strncasecmp (val, "cov ", 8))
136 for (col = c_offset; col < mformat->n_continuous_vars; ++col)
138 const struct variable *var =
140 1 + col - c_offset + var_get_dict_index (mformat->varname));
142 double e = case_data (c, var)->f;
145 temp_matrix [col + mformat->n_continuous_vars * row] = e;
146 temp_matrix [row + mformat->n_continuous_vars * col] = e;
151 casereader_destroy (pass0);
153 /* Now make a second pass to fill in the other triangle from our
155 const int idx = var_get_dict_index (mformat->varname);
157 struct ccase *prev_case = NULL;
158 for (; (c = casereader_read (casereader0)) != NULL; prev_case = c)
160 case_unref (prev_case);
161 struct ccase *outcase = case_create (proto);
162 case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto));
163 const union value *v = case_data (c, mformat->rowtype);
164 const char *val = (const char *) value_str (v, 8);
165 if (0 == strncasecmp (val, "corr ", 8) ||
166 0 == strncasecmp (val, "cov ", 8))
169 const struct variable *var = dict_get_var (dict, idx + 1 + row);
170 set_varname_column (outcase, mformat, var_get_name (var), 8);
171 value_copy (case_data_rw (outcase, mformat->rowtype), v, 8);
173 for (col = 0; col < mformat->n_continuous_vars; ++col)
175 union value *dest_val =
176 case_data_rw_idx (outcase,
177 1 + col + var_get_dict_index (mformat->varname));
178 dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row];
179 if (col == row && mformat->diagonal == NO_DIAGONAL)
186 set_varname_column (outcase, mformat, " ", 8);
189 casewriter_write (writer, outcase);
192 /* If NODIAGONAL is specified, then a final case must be written */
193 if (mformat->diagonal == NO_DIAGONAL)
196 struct ccase *outcase = case_create (proto);
199 case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto));
202 const struct variable *var = dict_get_var (dict, idx + 1 + row);
203 set_varname_column (outcase, mformat, var_get_name (var), 8);
205 for (col = 0; col < mformat->n_continuous_vars; ++col)
207 union value *dest_val =
208 case_data_rw_idx (outcase, 1 + col +
209 var_get_dict_index (mformat->varname));
210 dest_val->f = temp_matrix [col + mformat->n_continuous_vars * row];
211 if (col == row && mformat->diagonal == NO_DIAGONAL)
215 casewriter_write (writer, outcase);
219 case_unref (prev_case);
222 struct casereader *reader1 = casewriter_make_reader (writer);
223 casereader_destroy (casereader0);
228 cmd_matrix (struct lexer *lexer, struct dataset *ds)
230 struct dictionary *dict;
231 struct data_parser *parser;
232 struct dfm_reader *reader;
233 struct file_handle *fh = NULL;
234 char *encoding = NULL;
235 struct matrix_format mformat;
240 mformat.triangle = LOWER;
241 mformat.diagonal = DIAGONAL;
243 dict = (in_input_program ()
245 : dict_create (get_default_encoding ()));
246 parser = data_parser_create (dict);
249 data_parser_set_type (parser, DP_DELIMITED);
250 data_parser_set_warn_missing_fields (parser, false);
251 data_parser_set_span (parser, false);
253 mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8);
254 mformat.varname = dict_create_var (dict, "VARNAME_", 8);
256 mformat.n_continuous_vars = 0;
258 if (! lex_force_match_id (lexer, "VARIABLES"))
261 lex_match (lexer, T_EQUALS);
263 if (! parse_mixed_vars (lexer, dict, &names, &n_names, 0))
266 for (i = 0; i < n_names; ++i)
272 for (i = 0; i < n_names; ++i)
274 if (0 == strcasecmp (names[i], "ROWTYPE_"))
276 const struct fmt_spec fmt = fmt_for_input (FMT_A, 8, 0);
277 data_parser_add_delimited_field (parser,
279 var_get_case_index (mformat.rowtype),
284 const struct fmt_spec fmt = fmt_for_input (FMT_F, 10, 4);
285 struct variable *v = dict_create_var (dict, names[i], 0);
286 var_set_both_formats (v, &fmt);
287 data_parser_add_delimited_field (parser,
289 var_get_case_index (mformat.varname) +
290 ++mformat.n_continuous_vars,
294 for (i = 0; i < n_names; ++i)
298 while (lex_token (lexer) != T_ENDCMD)
300 if (! lex_force_match (lexer, T_SLASH))
303 if (lex_match_id (lexer, "FORMAT"))
305 lex_match (lexer, T_EQUALS);
307 while (lex_token (lexer) != T_SLASH && (lex_token (lexer) != T_ENDCMD))
309 if (lex_match_id (lexer, "LIST"))
311 data_parser_set_span (parser, false);
313 else if (lex_match_id (lexer, "FREE"))
315 data_parser_set_span (parser, true);
317 else if (lex_match_id (lexer, "UPPER"))
319 mformat.triangle = UPPER;
321 else if (lex_match_id (lexer, "LOWER"))
323 mformat.triangle = LOWER;
325 else if (lex_match_id (lexer, "FULL"))
327 mformat.triangle = FULL;
329 else if (lex_match_id (lexer, "DIAGONAL"))
331 mformat.diagonal = DIAGONAL;
333 else if (lex_match_id (lexer, "NODIAGONAL"))
335 mformat.diagonal = NO_DIAGONAL;
339 lex_error (lexer, NULL);
344 else if (lex_match_id (lexer, "FILE"))
346 lex_match (lexer, T_EQUALS);
348 fh = fh_parse (lexer, FH_REF_FILE | FH_REF_INLINE, NULL);
352 else if (lex_match_id (lexer, "SPLIT"))
354 lex_match (lexer, T_EQUALS);
355 struct variable **split_vars = NULL;
357 if (! parse_variables (lexer, dict, &split_vars, &n_split_vars, 0))
363 for (i = 0; i < n_split_vars; ++i)
365 const struct fmt_spec fmt = fmt_for_input (FMT_F, 4, 0);
366 var_set_both_formats (split_vars[i], &fmt);
368 dict_reorder_vars (dict, split_vars, n_split_vars);
369 mformat.n_continuous_vars -= n_split_vars;
374 lex_error (lexer, NULL);
379 if (mformat.diagonal == NO_DIAGONAL && mformat.triangle == FULL)
381 msg (SE, _("FORMAT = FULL and FORMAT = NODIAGONAL are mutually exclusive."));
386 fh = fh_inline_file ();
387 fh_set_default_handle (fh);
389 if (!data_parser_any_fields (parser))
391 msg (SE, _("At least one variable must be specified."));
395 if (lex_end_of_command (lexer) != CMD_SUCCESS)
398 reader = dfm_open_reader (fh, lexer, encoding);
402 if (in_input_program ())
404 struct data_list_trns *trns = xmalloc (sizeof *trns);
405 trns->parser = parser;
406 trns->reader = reader;
408 add_transformation (ds, data_list_trns_proc, data_list_trns_free, trns);
412 data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat);
418 return CMD_DATA_LIST;
421 data_parser_destroy (parser);
422 if (!in_input_program ())
426 return CMD_CASCADING_FAILURE;
430 /* Input procedure. */
432 /* Destroys DATA LIST transformation TRNS.
433 Returns true if successful, false if an I/O error occurred. */
435 data_list_trns_free (void *trns_)
437 struct data_list_trns *trns = trns_;
438 data_parser_destroy (trns->parser);
439 dfm_close_reader (trns->reader);
444 /* Handle DATA LIST transformation TRNS, parsing data into *C. */
446 data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
448 struct data_list_trns *trns = trns_;
451 *c = case_unshare (*c);
452 if (data_parser_parse (trns->parser, trns->reader, *c))
453 retval = TRNS_CONTINUE;
454 else if (dfm_reader_error (trns->reader) || dfm_eof (trns->reader) > 1)
456 /* An I/O error, or encountering end of file for a second
457 time, should be escalated into a more serious error. */
461 retval = TRNS_END_FILE;
463 /* If there was an END subcommand handle it. */
464 if (trns->end != NULL)
466 double *end = &case_data_rw (*c, trns->end)->f;
467 if (retval == TRNS_END_FILE)
470 retval = TRNS_CONTINUE;