X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Flanguage%2Fstats%2Fmatrix.c;h=bcae19971750704affa257ef127ca17693ed9b53;hb=f4c07cacab73e6f4b72e6393f394e990501b5916;hp=28ad99ba12241eb31f81fef526c32dfc47f362b9;hpb=e609c7d767ffd600fd844327b292590259cbb0de;p=pspp diff --git a/src/language/stats/matrix.c b/src/language/stats/matrix.c index 28ad99ba12..bcae199717 100644 --- a/src/language/stats/matrix.c +++ b/src/language/stats/matrix.c @@ -59,6 +59,7 @@ #include "output/pivot-table.h" #include "gl/c-ctype.h" +#include "gl/ftoastr.h" #include "gl/minmax.h" #include "gl/xsize.h" @@ -3405,7 +3406,7 @@ struct matrix_cmd int c1, c2; enum fmt_type format; int w; - int d; + //int d; bool symmetric; bool reread; } @@ -3416,11 +3417,15 @@ struct matrix_cmd struct write_file *wf; struct matrix_expr *expression; int c1, c2; - enum fmt_type format; - int w; - int d; + + /* If this is nonnull, WRITE uses this format. + + If this is NULL, WRITE uses free-field format with as many + digits of precision as needed. */ + struct fmt_spec *format; + bool triangular; - bool hold; /* XXX */ + bool hold; } write; @@ -4676,14 +4681,15 @@ matrix_parse_read (struct matrix_state *s) } lex_get (s->lexer); } - else if (!fmt_from_name (p, &read->format)) + else if (fmt_from_name (p, &read->format)) + lex_get (s->lexer); + else { struct fmt_spec format; if (!parse_format_specifier (s->lexer, &format)) goto error; read->format = format.type; read->w = format.w; - read->d = format.d; } } else @@ -4771,28 +4777,78 @@ error: return NULL; } +static void +parse_error (const struct dfm_reader *reader, enum fmt_type format, + struct substring data, size_t y, size_t x, + int first_column, int last_column, char *error) +{ + int line_number = dfm_get_line_number (reader); + struct msg_location *location = xmalloc (sizeof *location); + *location = (struct msg_location) { + .file_name = xstrdup (dfm_get_file_name (reader)), + .first_line = line_number, + .last_line = line_number + 1, + .first_column = first_column, + .last_column = last_column, + }; + struct msg *m = xmalloc (sizeof *m); + *m = (struct msg) { + .category = MSG_C_DATA, + .severity = MSG_S_WARNING, + .location = location, + .text = xasprintf (_("Error reading \"%.*s\" as format %s " + "for matrix row %zu, column %zu: %s"), + (int) data.length, data.string, fmt_name (format), + y + 1, x + 1, error), + }; + msg_emit (m); + + free (error); +} + static void matrix_read_set_field (struct read_command *read, struct dfm_reader *reader, - gsl_matrix *m, struct substring p, size_t y, size_t x) + gsl_matrix *m, struct substring p, size_t y, size_t x, + const char *line_start) { const char *input_encoding = dfm_reader_get_encoding (reader); - union value v; - char *error = data_in (p, input_encoding, read->format, - settings_get_fmt_settings (), &v, 0, NULL); - /* XXX report error if value is missing */ + char *error; + double f; + if (fmt_is_numeric (read->format)) + { + union value v; + error = data_in (p, input_encoding, read->format, + settings_get_fmt_settings (), &v, 0, NULL); + if (!error && v.f == SYSMIS) + error = xstrdup (_("Matrix data may not contain missing value.")); + f = v.f; + } + else + { + uint8_t s[sizeof (double)]; + union value v = { .s = s }; + error = data_in (p, input_encoding, read->format, + settings_get_fmt_settings (), &v, sizeof s, "UTF-8"); + memcpy (&f, s, sizeof f); + } + if (error) - msg (SW, _("GET parse error (%.*s): %s"), (int) p.length, p.string, error); + { + int c1 = utf8_count_columns (line_start, p.string - line_start) + 1; + int c2 = c1 + ss_utf8_count_columns (p) - 1; + parse_error (reader, read->format, p, y, x, c1, c2, error); + } else { - gsl_matrix_set (m, y, x, v.f); + gsl_matrix_set (m, y, x, f); if (read->symmetric && x != y) - gsl_matrix_set (m, x, y, v.f); + gsl_matrix_set (m, x, y, f); } } static bool matrix_read_line (struct read_command *read, struct dfm_reader *reader, - struct substring *line) + struct substring *line, const char **startp) { if (dfm_eof (reader)) { @@ -4800,8 +4856,10 @@ matrix_read_line (struct read_command *read, struct dfm_reader *reader, return false; } dfm_expand_tabs (reader); - *line = ss_substr (dfm_get_record (reader), - read->c1 - 1, read->c2 - read->c1); + struct substring record = dfm_get_record (reader); + /* XXX need to recode record into UTF-8 */ + *startp = record.string; + *line = ss_utf8_columns (record, read->c1 - 1, read->c2 - read->c1); return true; } @@ -4814,6 +4872,7 @@ matrix_read (struct read_command *read, struct dfm_reader *reader, size_t nx = read->symmetric ? y + 1 : m->size2; struct substring line = ss_empty (); + const char *line_start = line.string; for (size_t x = 0; x < nx; x++) { struct substring p; @@ -4824,7 +4883,7 @@ matrix_read (struct read_command *read, struct dfm_reader *reader, ss_ltrim (&line, ss_cstr (" ,")); if (!ss_is_empty (line)) break; - if (!matrix_read_line (read, reader, &line)) + if (!matrix_read_line (read, reader, &line, &line_start)) return; dfm_forward_record (reader); } @@ -4833,7 +4892,7 @@ matrix_read (struct read_command *read, struct dfm_reader *reader, } else { - if (!matrix_read_line (read, reader, &line)) + if (!matrix_read_line (read, reader, &line, &line_start)) return; size_t fields_per_line = (read->c2 - read->c1) / read->w; int f = x % fields_per_line; @@ -4843,7 +4902,7 @@ matrix_read (struct read_command *read, struct dfm_reader *reader, p = ss_substr (line, read->w * f, read->w); } - matrix_read_set_field (read, reader, m, p, y, x); + matrix_read_set_field (read, reader, m, p, y, x, line_start); } if (read->w) @@ -4852,8 +4911,11 @@ matrix_read (struct read_command *read, struct dfm_reader *reader, { ss_ltrim (&line, ss_cstr (" ,")); if (!ss_is_empty (line)) - msg (SW, _("Trailing garbage on line \"%.*s\""), - (int) line.length, line.string); + { + /* XXX */ + msg (SW, _("Trailing garbage on line \"%.*s\""), + (int) line.length, line.string); + } } } } @@ -4908,7 +4970,9 @@ matrix_cmd_execute_read (struct read_command *read) if (d[0] < 0 || d[0] > SIZE_MAX || d[1] < 0 || d[1] > SIZE_MAX) { - msg (SE, _("SIZE (%g,%g) is outside valid range."), d[0], d[1]); + msg (SE, _("Matrix dimensions %g×%g specified on SIZE " + "are outside valid range."), + d[0], d[1]); free (iv0.indexes); free (iv1.indexes); return; @@ -4941,8 +5005,8 @@ matrix_cmd_execute_read (struct read_command *read) { if (size[0] != submatrix_size[0] || size[1] != submatrix_size[1]) { - msg (SE, _("SIZE (%zu,%zu) differs from submatrix dimensions " - "%zu×%zu."), + msg (SE, _("Matrix dimensions %zu×%zu specified on SIZE " + "differ from submatrix dimensions %zu×%zu."), size[0], size[1], submatrix_size[0], submatrix_size[1]); free (iv0.indexes); @@ -4981,7 +5045,6 @@ matrix_parse_write (struct matrix_state *s) struct matrix_cmd *cmd = xmalloc (sizeof *cmd); *cmd = (struct matrix_cmd) { .type = MCMD_WRITE, - .write = { .format = FMT_F }, }; struct file_handle *fh = NULL; @@ -4994,7 +5057,8 @@ matrix_parse_write (struct matrix_state *s) int by = 0; int repetitions = 0; int record_width = 0; - bool seen_format = false; + enum fmt_type format = FMT_F; + bool has_format = false; while (lex_match (s->lexer, T_SLASH)) { if (lex_match_id (s->lexer, "OUTFILE")) @@ -5067,12 +5131,11 @@ matrix_parse_write (struct matrix_state *s) write->hold = true; else if (lex_match_id (s->lexer, "FORMAT")) { - if (seen_format) + if (has_format || write->format) { lex_sbc_only_once ("FORMAT"); goto error; } - seen_format = true; lex_match (s->lexer, T_EQUALS); @@ -5084,21 +5147,25 @@ matrix_parse_write (struct matrix_state *s) { repetitions = atoi (p); p += strspn (p, "0123456789"); - if (!fmt_from_name (p, &write->format)) + if (!fmt_from_name (p, &format)) { lex_error (s->lexer, _("Unknown format %s."), p); goto error; } + has_format = true; lex_get (s->lexer); } - else if (!fmt_from_name (p, &write->format)) + else if (fmt_from_name (p, &format)) { - struct fmt_spec format; - if (!parse_format_specifier (s->lexer, &format)) + has_format = true; + lex_get (s->lexer); + } + else + { + struct fmt_spec spec; + if (!parse_format_specifier (s->lexer, &spec)) goto error; - write->format = format.type; - write->w = format.w; - write->d = format.d; + write->format = xmemdup (&spec, sizeof spec); } } else @@ -5155,7 +5222,7 @@ matrix_parse_write (struct matrix_state *s) goto error; } int w = (repetitions ? record_width / repetitions - : write->w ? write->w + : write->format ? write->format->w : by); if (by && w != by) { @@ -5169,7 +5236,24 @@ matrix_parse_write (struct matrix_state *s) w, by); goto error; } - write->w = w; + if (w && !write->format) + { + write->format = xmalloc (sizeof *write->format); + *write->format = (struct fmt_spec) { .type = format, .w = w }; + + if (!fmt_check_output (write->format)) + goto error; + }; + + if (write->format && fmt_var_width (write->format) > sizeof (double)) + { + char s[FMT_STRING_LEN_MAX + 1]; + fmt_to_string (write->format, s); + msg (SE, _("Format %s is too wide for %zu-byte matrix eleemnts."), + s, sizeof (double)); + goto error; + } + return cmd; error: @@ -5202,11 +5286,6 @@ matrix_cmd_execute_write (struct write_command *write) } const struct fmt_settings *settings = settings_get_fmt_settings (); - struct fmt_spec format = { - .type = write->format, - .w = write->w ? write->w : 40, - .d = write->d - }; struct u8_line *line = write->wf->held; for (size_t y = 0; y < m->size1; y++) { @@ -5219,11 +5298,24 @@ matrix_cmd_execute_write (struct write_command *write) int x0 = write->c1; for (size_t x = 0; x < nx; x++) { - /* XXX string values */ - union value v = { .f = gsl_matrix_get (m, y, x) }; - char *s = (write->w - ? data_out (&v, NULL, &format, settings) - : data_out_stretchy (&v, NULL, &format, settings, NULL)); + char *s; + double f = gsl_matrix_get (m, y, x); + if (write->format) + { + union value v; + if (fmt_is_numeric (write->format->type)) + v.f = f; + else + v.s = (uint8_t *) &f; + s = data_out (&v, NULL, write->format, settings); + } + else + { + s = xmalloc (DBL_BUFSIZE_BOUND); + if (c_dtoastr (s, DBL_BUFSIZE_BOUND, FTOASTR_UPPER_E, 0, f) + >= DBL_BUFSIZE_BOUND) + abort (); + } size_t len = strlen (s); int width = u8_width (CHAR_CAST (const uint8_t *, s), len, UTF8); if (width + x0 > write->c2) @@ -5236,7 +5328,7 @@ matrix_cmd_execute_write (struct write_command *write) u8_line_put (line, x0, x0 + width, s, len); free (s); - x0 += write->w ? write->w : width + 1; + x0 += write->format ? write->format->w : width + 1; } if (y + 1 >= m->size1 && write->hold) @@ -5247,10 +5339,10 @@ matrix_cmd_execute_write (struct write_command *write) if (!write->hold) { u8_line_destroy (line); + free (line); line = NULL; } write->wf->held = line; - dfm_close_writer (writer); gsl_matrix_free (m); } @@ -5378,11 +5470,11 @@ matrix_parse_get (struct matrix_state *s) { lex_match (s->lexer, T_EQUALS); if (lex_match_id (s->lexer, "OMIT")) - get->user.treatment = MGET_OMIT; + get->system.treatment = MGET_OMIT; else if (lex_is_number (s->lexer)) { - get->user.treatment = MGET_RECODE; - get->user.substitute = lex_number (s->lexer); + get->system.treatment = MGET_RECODE; + get->system.substitute = lex_number (s->lexer); lex_get (s->lexer); } else @@ -5398,6 +5490,10 @@ matrix_parse_get (struct matrix_state *s) goto error; } } + + if (get->user.treatment != MGET_ACCEPT) + get->system.treatment = MGET_ERROR; + return cmd; error: @@ -5462,6 +5558,22 @@ matrix_cmd_execute_get (struct get_command *get) } } + if (get->names) + { + gsl_matrix *names = gsl_matrix_alloc (n_vars, 1); + for (size_t i = 0; i < n_vars; i++) + { + char s[sizeof (double)]; + double f; + buf_copy_str_rpad (s, sizeof s, var_get_name (vars[i]), ' '); + memcpy (&f, s, sizeof f); + gsl_matrix_set (names, i, 0, f); + } + + gsl_matrix_free (get->names->value); + get->names->value = names; + } + size_t n_rows = 0; gsl_matrix *m = gsl_matrix_alloc (4, n_vars); long long int casenum = 1; @@ -6553,6 +6665,7 @@ matrix_cmd_destroy (struct matrix_cmd *cmd) case MCMD_WRITE: matrix_expr_destroy (cmd->write.expression); + free (cmd->write.format); break; case MCMD_GET: