#include "output/pivot-table.h"
#include "gl/c-ctype.h"
+#include "gl/ftoastr.h"
#include "gl/minmax.h"
#include "gl/xsize.h"
int c1, c2;
enum fmt_type format;
int w;
- int d;
+ //int d;
bool symmetric;
bool reread;
}
struct write_file *wf;
struct matrix_expr *expression;
int c1, c2;
- enum fmt_type format;
- int w;
- int d;
+
+ /* If this is nonnull, WRITE uses this format.
+
+ If this is NULL, WRITE uses free-field format with as many
+ digits of precision as needed. */
+ struct fmt_spec *format;
+
bool triangular;
- bool hold; /* XXX */
+ bool hold;
}
write;
struct get_command
{
struct matrix_lvalue *dst;
+ struct dataset *dataset;
struct file_handle *file;
char *encoding;
- struct string_array variables;
+ struct var_syntax *vars;
+ size_t n_vars;
struct matrix_var *names;
/* Treatment of missing values. */
}
lex_get (s->lexer);
}
- else if (!fmt_from_name (p, &read->format))
+ else if (fmt_from_name (p, &read->format))
+ lex_get (s->lexer);
+ else
{
struct fmt_spec format;
if (!parse_format_specifier (s->lexer, &format))
goto error;
read->format = format.type;
read->w = format.w;
- read->d = format.d;
}
}
else
return NULL;
}
+static void
+parse_error (const struct dfm_reader *reader, enum fmt_type format,
+ struct substring data, size_t y, size_t x,
+ int first_column, int last_column, char *error)
+{
+ int line_number = dfm_get_line_number (reader);
+ struct msg_location *location = xmalloc (sizeof *location);
+ *location = (struct msg_location) {
+ .file_name = xstrdup (dfm_get_file_name (reader)),
+ .first_line = line_number,
+ .last_line = line_number + 1,
+ .first_column = first_column,
+ .last_column = last_column,
+ };
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = MSG_C_DATA,
+ .severity = MSG_S_WARNING,
+ .location = location,
+ .text = xasprintf (_("Error reading \"%.*s\" as format %s "
+ "for matrix row %zu, column %zu: %s"),
+ (int) data.length, data.string, fmt_name (format),
+ y + 1, x + 1, error),
+ };
+ msg_emit (m);
+
+ free (error);
+}
+
static void
matrix_read_set_field (struct read_command *read, struct dfm_reader *reader,
- gsl_matrix *m, struct substring p, size_t y, size_t x)
+ gsl_matrix *m, struct substring p, size_t y, size_t x,
+ const char *line_start)
{
const char *input_encoding = dfm_reader_get_encoding (reader);
- union value v;
- char *error = data_in (p, input_encoding, read->format,
- settings_get_fmt_settings (), &v, 0, NULL);
- /* XXX report error if value is missing */
+ char *error;
+ double f;
+ if (fmt_is_numeric (read->format))
+ {
+ union value v;
+ error = data_in (p, input_encoding, read->format,
+ settings_get_fmt_settings (), &v, 0, NULL);
+ if (!error && v.f == SYSMIS)
+ error = xstrdup (_("Matrix data may not contain missing value."));
+ f = v.f;
+ }
+ else
+ {
+ uint8_t s[sizeof (double)];
+ union value v = { .s = s };
+ error = data_in (p, input_encoding, read->format,
+ settings_get_fmt_settings (), &v, sizeof s, "UTF-8");
+ memcpy (&f, s, sizeof f);
+ }
+
if (error)
- msg (SW, _("GET parse error (%.*s): %s"), (int) p.length, p.string, error);
+ {
+ int c1 = utf8_count_columns (line_start, p.string - line_start) + 1;
+ int c2 = c1 + ss_utf8_count_columns (p) - 1;
+ parse_error (reader, read->format, p, y, x, c1, c2, error);
+ }
else
{
- gsl_matrix_set (m, y, x, v.f);
+ gsl_matrix_set (m, y, x, f);
if (read->symmetric && x != y)
- gsl_matrix_set (m, x, y, v.f);
+ gsl_matrix_set (m, x, y, f);
}
}
static bool
matrix_read_line (struct read_command *read, struct dfm_reader *reader,
- struct substring *line)
+ struct substring *line, const char **startp)
{
if (dfm_eof (reader))
{
return false;
}
dfm_expand_tabs (reader);
- *line = ss_substr (dfm_get_record (reader),
- read->c1 - 1, read->c2 - read->c1);
+ struct substring record = dfm_get_record (reader);
+ /* XXX need to recode record into UTF-8 */
+ *startp = record.string;
+ *line = ss_utf8_columns (record, read->c1 - 1, read->c2 - read->c1);
return true;
}
size_t nx = read->symmetric ? y + 1 : m->size2;
struct substring line = ss_empty ();
+ const char *line_start = line.string;
for (size_t x = 0; x < nx; x++)
{
struct substring p;
ss_ltrim (&line, ss_cstr (" ,"));
if (!ss_is_empty (line))
break;
- if (!matrix_read_line (read, reader, &line))
+ if (!matrix_read_line (read, reader, &line, &line_start))
return;
dfm_forward_record (reader);
}
}
else
{
- if (!matrix_read_line (read, reader, &line))
+ if (!matrix_read_line (read, reader, &line, &line_start))
return;
size_t fields_per_line = (read->c2 - read->c1) / read->w;
int f = x % fields_per_line;
p = ss_substr (line, read->w * f, read->w);
}
- matrix_read_set_field (read, reader, m, p, y, x);
+ matrix_read_set_field (read, reader, m, p, y, x, line_start);
}
if (read->w)
{
ss_ltrim (&line, ss_cstr (" ,"));
if (!ss_is_empty (line))
- msg (SW, _("Trailing garbage on line \"%.*s\""),
- (int) line.length, line.string);
+ {
+ /* XXX */
+ msg (SW, _("Trailing garbage on line \"%.*s\""),
+ (int) line.length, line.string);
+ }
}
}
}
struct matrix_cmd *cmd = xmalloc (sizeof *cmd);
*cmd = (struct matrix_cmd) {
.type = MCMD_WRITE,
- .write = { .format = FMT_F },
};
struct file_handle *fh = NULL;
int by = 0;
int repetitions = 0;
int record_width = 0;
- bool seen_format = false;
+ enum fmt_type format = FMT_F;
+ bool has_format = false;
while (lex_match (s->lexer, T_SLASH))
{
if (lex_match_id (s->lexer, "OUTFILE"))
write->hold = true;
else if (lex_match_id (s->lexer, "FORMAT"))
{
- if (seen_format)
+ if (has_format || write->format)
{
lex_sbc_only_once ("FORMAT");
goto error;
}
- seen_format = true;
lex_match (s->lexer, T_EQUALS);
{
repetitions = atoi (p);
p += strspn (p, "0123456789");
- if (!fmt_from_name (p, &write->format))
+ if (!fmt_from_name (p, &format))
{
lex_error (s->lexer, _("Unknown format %s."), p);
goto error;
}
+ has_format = true;
lex_get (s->lexer);
}
- else if (!fmt_from_name (p, &write->format))
+ else if (fmt_from_name (p, &format))
{
- struct fmt_spec format;
- if (!parse_format_specifier (s->lexer, &format))
+ has_format = true;
+ lex_get (s->lexer);
+ }
+ else
+ {
+ struct fmt_spec spec;
+ if (!parse_format_specifier (s->lexer, &spec))
goto error;
- write->format = format.type;
- write->w = format.w;
- write->d = format.d;
+ write->format = xmemdup (&spec, sizeof spec);
}
}
else
goto error;
}
int w = (repetitions ? record_width / repetitions
- : write->w ? write->w
+ : write->format ? write->format->w
: by);
if (by && w != by)
{
w, by);
goto error;
}
- write->w = w;
+ if (w && !write->format)
+ {
+ write->format = xmalloc (sizeof *write->format);
+ *write->format = (struct fmt_spec) { .type = format, .w = w };
+
+ if (!fmt_check_output (write->format))
+ goto error;
+ };
+
+ if (write->format && fmt_var_width (write->format) > sizeof (double))
+ {
+ char s[FMT_STRING_LEN_MAX + 1];
+ fmt_to_string (write->format, s);
+ msg (SE, _("Format %s is too wide for %zu-byte matrix eleemnts."),
+ s, sizeof (double));
+ goto error;
+ }
+
return cmd;
error:
}
const struct fmt_settings *settings = settings_get_fmt_settings ();
- struct fmt_spec format = {
- .type = write->format,
- .w = write->w ? write->w : 40,
- .d = write->d
- };
struct u8_line *line = write->wf->held;
for (size_t y = 0; y < m->size1; y++)
{
int x0 = write->c1;
for (size_t x = 0; x < nx; x++)
{
- /* XXX string values */
- union value v = { .f = gsl_matrix_get (m, y, x) };
- char *s = (write->w
- ? data_out (&v, NULL, &format, settings)
- : data_out_stretchy (&v, NULL, &format, settings, NULL));
+ char *s;
+ double f = gsl_matrix_get (m, y, x);
+ if (write->format)
+ {
+ union value v;
+ if (fmt_is_numeric (write->format->type))
+ v.f = f;
+ else
+ v.s = (uint8_t *) &f;
+ s = data_out (&v, NULL, write->format, settings);
+ }
+ else
+ {
+ s = xmalloc (DBL_BUFSIZE_BOUND);
+ if (c_dtoastr (s, DBL_BUFSIZE_BOUND, FTOASTR_UPPER_E, 0, f)
+ >= DBL_BUFSIZE_BOUND)
+ abort ();
+ }
size_t len = strlen (s);
int width = u8_width (CHAR_CAST (const uint8_t *, s), len, UTF8);
if (width + x0 > write->c2)
u8_line_put (line, x0, x0 + width, s, len);
free (s);
- x0 += write->w ? write->w : width + 1;
+ x0 += write->format ? write->format->w : width + 1;
}
if (y + 1 >= m->size1 && write->hold)
if (!write->hold)
{
u8_line_destroy (line);
+ free (line);
line = NULL;
}
write->wf->held = line;
- dfm_close_writer (writer);
gsl_matrix_free (m);
}
*cmd = (struct matrix_cmd) {
.type = MCMD_GET,
.get = {
+ .dataset = s->dataset,
.user = { .treatment = MGET_ERROR },
.system = { .treatment = MGET_ERROR },
}
{
if (lex_match_id (s->lexer, "FILE"))
{
- if (get->variables.n)
- {
- lex_error (s->lexer, _("FILE must precede VARIABLES"));
- goto error;
- }
lex_match (s->lexer, T_EQUALS);
fh_unref (get->file);
- get->file = fh_parse (s->lexer, FH_REF_FILE, s->session);
- if (!get->file)
- goto error;
+ if (lex_match (s->lexer, T_ASTERISK))
+ get->file = NULL;
+ else
+ {
+ get->file = fh_parse (s->lexer, FH_REF_FILE, s->session);
+ if (!get->file)
+ goto error;
+ }
}
else if (lex_match_id (s->lexer, "ENCODING"))
{
- if (get->variables.n)
- {
- lex_error (s->lexer, _("ENCODING must precede VARIABLES"));
- goto error;
- }
lex_match (s->lexer, T_EQUALS);
if (!lex_force_string (s->lexer))
goto error;
{
lex_match (s->lexer, T_EQUALS);
- struct dictionary *dict = NULL;
- if (!get->file)
+ if (get->n_vars)
{
- dict = dataset_dict (s->dataset);
- if (dict_get_var_cnt (dict) == 0)
- {
- lex_error (s->lexer, _("GET cannot read empty active file."));
- goto error;
- }
- }
- else
- {
- struct casereader *reader = any_reader_open_and_decode (
- get->file, get->encoding, &dict, NULL);
- if (!reader)
- goto error;
- casereader_destroy (reader);
- }
-
- struct variable **vars;
- size_t n_vars;
- bool ok = parse_variables (s->lexer, dict, &vars, &n_vars,
- PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH);
- if (!ok)
- {
- dict_unref (dict);
+ lex_sbc_only_once ("VARIABLES");
goto error;
}
- string_array_clear (&get->variables);
- for (size_t i = 0; i < n_vars; i++)
- string_array_append (&get->variables, var_get_name (vars[i]));
- free (vars);
- dict_unref (dict);
+ if (!var_syntax_parse (s->lexer, &get->vars, &get->n_vars))
+ goto error;
}
else if (lex_match_id (s->lexer, "NAMES"))
{
{
lex_match (s->lexer, T_EQUALS);
if (lex_match_id (s->lexer, "OMIT"))
- get->user.treatment = MGET_OMIT;
+ get->system.treatment = MGET_OMIT;
else if (lex_is_number (s->lexer))
{
- get->user.treatment = MGET_RECODE;
- get->user.substitute = lex_number (s->lexer);
+ get->system.treatment = MGET_RECODE;
+ get->system.substitute = lex_number (s->lexer);
lex_get (s->lexer);
}
else
goto error;
}
}
+
+ if (get->user.treatment != MGET_ACCEPT)
+ get->system.treatment = MGET_ERROR;
+
return cmd;
error:
}
static void
-matrix_cmd_execute_get (struct get_command *get)
+matrix_cmd_execute_get__ (struct get_command *get,
+ const struct dictionary *dict,
+ struct casereader *reader)
{
- assert (get->file); /* XXX */
-
- struct dictionary *dict;
- struct casereader *reader = any_reader_open_and_decode (
- get->file, get->encoding, &dict, NULL);
- if (!reader)
- return;
-
- const struct variable **vars = xnmalloc (
- get->variables.n ? get->variables.n : dict_get_var_cnt (dict),
- sizeof *vars);
+ struct variable **vars;
size_t n_vars = 0;
- if (get->variables.n)
+ if (get->n_vars)
{
- for (size_t i = 0; i < get->variables.n; i++)
- {
- const char *name = get->variables.strings[i];
- const struct variable *var = dict_lookup_var (dict, name);
- if (!var)
- {
- msg (SE, _("GET: Data file does not contain variable %s."),
- name);
- dict_unref (dict);
- free (vars);
- return;
- }
- if (!var_is_numeric (var))
- {
- msg (SE, _("GET: Variable %s is not numeric."), name);
- dict_unref (dict);
- free (vars);
- return;
- }
- vars[n_vars++] = var;
- }
+ if (!var_syntax_evaluate (get->vars, get->n_vars, dict,
+ &vars, &n_vars, PV_NUMERIC))
+ return;
}
else
{
- for (size_t i = 0; i < dict_get_var_cnt (dict); i++)
+ n_vars = dict_get_var_cnt (dict);
+ vars = xnmalloc (n_vars, sizeof *vars);
+ for (size_t i = 0; i < n_vars; i++)
{
- const struct variable *var = dict_get_var (dict, i);
+ struct variable *var = dict_get_var (dict, i);
if (!var_is_numeric (var))
{
msg (SE, _("GET: Variable %s is not numeric."),
var_get_name (var));
- dict_unref (dict);
free (vars);
return;
}
- vars[n_vars++] = var;
+ vars[i] = var;
}
}
+ if (get->names)
+ {
+ gsl_matrix *names = gsl_matrix_alloc (n_vars, 1);
+ for (size_t i = 0; i < n_vars; i++)
+ {
+ char s[sizeof (double)];
+ double f;
+ buf_copy_str_rpad (s, sizeof s, var_get_name (vars[i]), ' ');
+ memcpy (&f, s, sizeof f);
+ gsl_matrix_set (names, i, 0, f);
+ }
+
+ gsl_matrix_free (get->names->value);
+ get->names->value = names;
+ }
+
size_t n_rows = 0;
gsl_matrix *m = gsl_matrix_alloc (4, n_vars);
long long int casenum = 1;
if (keep)
n_rows++;
}
- casereader_destroy (reader);
if (!error)
{
m->size1 = n_rows;
}
else
gsl_matrix_free (m);
- dict_unref (dict);
free (vars);
}
+
+static void
+matrix_cmd_execute_get (struct get_command *get)
+{
+ struct dictionary *dict;
+ struct casereader *reader;
+ if (get->file)
+ {
+ reader = any_reader_open_and_decode (get->file, get->encoding,
+ &dict, NULL);
+ if (!reader)
+ return;
+ }
+ else
+ {
+ if (dict_get_var_cnt (dataset_dict (get->dataset)) == 0)
+ {
+ msg (ME, _("GET cannot read empty active file."));
+ return;
+ }
+ reader = proc_open (get->dataset);
+ dict = dict_ref (dataset_dict (get->dataset));
+ }
+
+ matrix_cmd_execute_get__ (get, dict, reader);
+
+ dict_unref (dict);
+ casereader_destroy (reader);
+ if (!get->file)
+ proc_commit (get->dataset);
+}
\f
static const char *
match_rowtype (struct lexer *lexer)
case MCMD_WRITE:
matrix_expr_destroy (cmd->write.expression);
+ free (cmd->write.format);
break;
case MCMD_GET:
matrix_lvalue_destroy (cmd->get.dst);
fh_unref (cmd->get.file);
free (cmd->get.encoding);
- string_array_destroy (&cmd->get.variables);
+ var_syntax_destroy (cmd->get.vars, cmd->get.n_vars);
break;
case MCMD_MSAVE:
return CMD_FAILURE;
struct matrix_state state = {
+ .dataset = ds,
.session = dataset_session (ds),
.lexer = lexer,
.vars = HMAP_INITIALIZER (state.vars),