#include "data/casereader.h"
#include "data/casewriter.h"
+#include "data/data-in.h"
#include "data/data-out.h"
#include "data/dataset.h"
#include "data/dictionary.h"
/* CTPO_CAT_NUMBER. */
double number;
- /* CTPO_CAT_STRING. */
- char *string;
+ /* CTPO_CAT_STRING, in dictionary encoding. */
+ struct substring string;
/* CTPO_CAT_RANGE. */
double range[2];
union
{
double number; /* CCT_NUMBER. */
- char *string; /* CCT_STRING. In dictionary encoding. */
+ struct substring string; /* CCT_STRING, in dictionary encoding. */
double range[2]; /* CCT_RANGE. */
struct
break;
case CCT_STRING:
- free (cat->string);
+ ss_dealloc (&cat->string);
break;
case CCT_SUBTOTAL:
return a->number == b->number;
case CCT_STRING:
- return strcmp (a->string, b->string);
+ return ss_equals (a->string, b->string);
case CCT_RANGE:
return a->range[0] == b->range[0] && a->range[1] == b->range[1];
}
static bool
-ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct,
+ctables_table_parse_explicit_category (struct lexer *lexer,
+ struct dictionary *dict,
+ struct ctables *ct,
struct ctables_category *cat)
{
if (lex_match_id (lexer, "OTHERNM"))
}
else if (lex_is_string (lexer))
{
- *cat = (struct ctables_category) {
- .type = CCT_STRING,
- .string = ss_xstrdup (lex_tokss (lexer)),
- };
+ struct substring s = recode_substring_pool (
+ dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
+ ss_rtrim (&s, ss_cstr (" "));
+
+ *cat = (struct ctables_category) { .type = CCT_STRING, .string = s };
lex_get (lexer);
}
else if (lex_match (lexer, T_AND))
break;
case CTPO_CAT_STRING:
- if (cat->type == CCT_STRING && !strcmp (cat->string, e->string))
+ if (cat->type == CCT_STRING && ss_equals (cat->string, e->string))
best = cat;
break;
if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH))
return false;
+ const struct fmt_spec *common_format = var_get_print_format (vars[0]);
+ for (size_t i = 1; i < n_vars; i++)
+ {
+ const struct fmt_spec *f = var_get_print_format (vars[i]);
+ if (f->type != common_format->type)
+ {
+ common_format = NULL;
+ break;
+ }
+ }
+ bool parse_strings
+ = (common_format
+ && (fmt_get_category (common_format->type)
+ & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)));
+
struct ctables_categories *c = xmalloc (sizeof *c);
*c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true };
for (size_t i = 0; i < n_vars; i++)
ctables_categories_unref (*cp);
*cp = c;
}
- free (vars);
size_t allocated_cats = 0;
if (lex_match (lexer, T_LBRACK))
int start_ofs = lex_ofs (lexer);
struct ctables_category *cat = &c->cats[c->n_cats];
- if (!ctables_table_parse_explicit_category (lexer, ct, cat))
+ if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat))
return false;
cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1);
c->n_cats++;
for (size_t i = 0; i < c->n_cats; i++)
{
struct ctables_category *cat = &c->cats[i];
- if (cat->type == CCT_POSTCOMPUTE
- && !ctables_recursive_check_postcompute (cat->pc->expr, cat,
- c, cats_location))
- return false;
+ switch (cat->type)
+ {
+ case CCT_POSTCOMPUTE:
+ if (!ctables_recursive_check_postcompute (cat->pc->expr, cat,
+ c, cats_location))
+ return false;
+ break;
+
+ case CCT_NUMBER:
+ case CCT_RANGE:
+ for (size_t j = 0; j < n_vars; j++)
+ if (var_is_alpha (vars[j]))
+ {
+ msg_at (SE, cat->location,
+ _("This category specification may be applied "
+ "only to numeric variables, but this "
+ "subcommand tries to apply it to string "
+ "variable %s."),
+ var_get_name (vars[j]));
+ return false;
+ }
+ break;
+
+ case CCT_STRING:
+ if (parse_strings)
+ {
+ union value v;
+ char *error = data_in (cat->string, dict_get_encoding (dict),
+ common_format->type,
+ settings_get_fmt_settings (),
+ &v, 0, NULL);
+ if (error)
+ {
+ msg_at (SE, cat->location,
+ _("Failed to parse category specification as "
+ "format %s: %s."),
+ fmt_name (common_format->type), error);
+ free (error);
+ return false;
+ }
+
+ ss_dealloc (&cat->string);
+
+ cat->type = CCT_NUMBER;
+ cat->number = v.f;
+ }
+ else
+ {
+ for (size_t j = 0; j < n_vars; j++)
+ if (var_is_numeric (vars[j]))
+ {
+ msg_at (SE, cat->location,
+ _("This category specification may be applied "
+ "only to string variables, but this "
+ "subcommand tries to apply it to numeric "
+ "variable %s."),
+ var_get_name (vars[j]));
+ return false;
+ }
+ }
+ break;
+
+ case CCT_MISSING:
+ case CCT_OTHERNM:
+ case CCT_SUBTOTAL:
+ case CCT_TOTAL:
+ case CCT_VALUE:
+ case CCT_LABEL:
+ case CCT_FUNCTION:
+ case CCT_EXCLUDED_MISSING:
+ break;
+ }
}
}
break;
case CCT_STRING:
- NOT_REACHED ();
+ {
+ struct substring s = ss_buffer (CHAR_CAST (char *, v->s),
+ var_get_width (var));
+ ss_rtrim (&s, ss_cstr (" "));
+ printf ("%d '%.*s' ?=? '%.*s'\n",
+ var_get_width (var),
+ (int) cat->string.length, cat->string.string,
+ (int) s.length, s.string);
+ if (ss_equals (cat->string, s))
+ return cat;
+ }
+ break;
case CCT_RANGE:
if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0])
if (var_missing)
is_missing = true;
+ printf ("ctables_cell_insert %s: ", var_get_name (var));
cats[a][i] = ctables_categories_match (
s->table->categories[var_get_dict_index (var)], value, var);
if (!cats[a][i])
break;
case CCT_STRING:
- abort (); /* XXX */
+ {
+ int width = var_get_width (var);
+ union value value;
+ value_init (&value, width);
+ value_copy_buf_rpad (&value, width,
+ CHAR_CAST (uint8_t *, c->string.string),
+ c->string.length, ' ');
+ ctables_add_occurrence (var, &value, occurrences);
+ value_destroy (&value, width);
+ }
+ break;
case CCT_RANGE:
assert (var_is_numeric (var));
\f
/* Postcomputes. */
-typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
+typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *,
+ struct dictionary *);
static void
ctables_pcexpr_destroy (struct ctables_pcexpr *e)
switch (e->op)
{
case CTPO_CAT_STRING:
- free (e->string);
+ ss_dealloc (&e->string);
break;
case CTPO_ADD:
}
static struct ctables_pcexpr *
-parse_binary_operators__ (struct lexer *lexer,
+parse_binary_operators__ (struct lexer *lexer, struct dictionary *dict,
const struct operator ops[], size_t n_ops,
parse_recursively_func *parse_next_level,
const char *chain_warning,
return lhs;
}
- struct ctables_pcexpr *rhs = parse_next_level (lexer);
+ struct ctables_pcexpr *rhs = parse_next_level (lexer, dict);
if (!rhs)
{
ctables_pcexpr_destroy (lhs);
}
static struct ctables_pcexpr *
-parse_binary_operators (struct lexer *lexer,
+parse_binary_operators (struct lexer *lexer, struct dictionary *dict,
const struct operator ops[], size_t n_ops,
parse_recursively_func *parse_next_level,
const char *chain_warning)
{
- struct ctables_pcexpr *lhs = parse_next_level (lexer);
+ struct ctables_pcexpr *lhs = parse_next_level (lexer, dict);
if (!lhs)
return NULL;
- return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
+ return parse_binary_operators__ (lexer, dict, ops, n_ops, parse_next_level,
chain_warning, lhs);
}
-static struct ctables_pcexpr *parse_add (struct lexer *);
+static struct ctables_pcexpr *parse_add (struct lexer *, struct dictionary *);
static struct ctables_pcexpr
ctpo_cat_range (double low, double high)
}
static struct ctables_pcexpr *
-parse_primary (struct lexer *lexer)
+parse_primary (struct lexer *lexer, struct dictionary *dict)
{
int start_ofs = lex_ofs (lexer);
struct ctables_pcexpr e;
}
else if (lex_is_string (lexer))
{
- e = (struct ctables_pcexpr) {
- .op = CTPO_CAT_STRING,
- .string = ss_xstrdup (lex_tokss (lexer)),
- };
+ struct substring s = recode_substring_pool (
+ dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL);
+ ss_rtrim (&s, ss_cstr (" "));
+
+ e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s };
lex_get (lexer);
}
else
if (!lex_force_match (lexer, T_RBRACK))
{
if (e.op == CTPO_CAT_STRING)
- free (e.string);
+ ss_dealloc (&e.string);
return NULL;
}
}
else if (lex_match (lexer, T_LPAREN))
{
- struct ctables_pcexpr *ep = parse_add (lexer);
+ struct ctables_pcexpr *ep = parse_add (lexer, dict);
if (!ep)
return NULL;
if (!lex_force_match (lexer, T_RPAREN))
}
static struct ctables_pcexpr *
-parse_exp (struct lexer *lexer)
+parse_exp (struct lexer *lexer, struct dictionary *dict)
{
static const struct operator op = { T_EXP, CTPO_POW };
"To disable this warning, insert parentheses.");
if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
- return parse_binary_operators (lexer, &op, 1,
+ return parse_binary_operators (lexer, dict, &op, 1,
parse_primary, chain_warning);
/* Special case for situations like "-5**6", which must be parsed as
lex_get (lexer);
struct ctables_pcexpr *node = parse_binary_operators__ (
- lexer, &op, 1, parse_primary, chain_warning, lhs);
+ lexer, dict, &op, 1, parse_primary, chain_warning, lhs);
if (!node)
return NULL;
/* Parses the unary minus level. */
static struct ctables_pcexpr *
-parse_neg (struct lexer *lexer)
+parse_neg (struct lexer *lexer, struct dictionary *dict)
{
int start_ofs = lex_ofs (lexer);
if (!lex_match (lexer, T_DASH))
- return parse_exp (lexer);
+ return parse_exp (lexer, dict);
- struct ctables_pcexpr *inner = parse_neg (lexer);
+ struct ctables_pcexpr *inner = parse_neg (lexer, dict);
if (!inner)
return NULL;
/* Parses the multiplication and division level. */
static struct ctables_pcexpr *
-parse_mul (struct lexer *lexer)
+parse_mul (struct lexer *lexer, struct dictionary *dict)
{
static const struct operator ops[] =
{
{ T_SLASH, CTPO_DIV },
};
- return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
+ return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
parse_neg, NULL);
}
/* Parses the addition and subtraction level. */
static struct ctables_pcexpr *
-parse_add (struct lexer *lexer)
+parse_add (struct lexer *lexer, struct dictionary *dict)
{
static const struct operator ops[] =
{
{ T_NEG_NUM, CTPO_ADD },
};
- return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
+ return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops,
parse_mul, NULL);
}
}
static bool
-ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
+ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict,
+ struct ctables *ct)
{
int pcompute_start = lex_ofs (lexer) - 1;
}
int expr_start = lex_ofs (lexer);
- struct ctables_pcexpr *expr = parse_add (lexer);
+ struct ctables_pcexpr *expr = parse_add (lexer, dict);
int expr_end = lex_ofs (lexer) - 1;
if (!expr || !lex_force_match (lexer, T_RPAREN))
{
}
else if (lex_match_id (lexer, "PCOMPUTE"))
{
- if (!ctables_parse_pcompute (lexer, ct))
+ if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct))
goto error;
}
else if (lex_match_id (lexer, "PPROPERTIES"))