#include "libpspp/assertion.h"
#include "libpspp/hash-functions.h"
#include "libpspp/hmap.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
#include "math/mode.h"
/* Indexed by variable dictionary index. */
enum ctables_vlabel *vlabels;
+ struct hmap postcomputes; /* Contains "struct ctables_postcompute"s. */
+
bool mrsets_count_duplicates; /* MRSETS. */
bool smissing_listwise; /* SMISSING. */
struct variable *e_weight; /* WEIGHT. */
struct ctables_postcompute
{
struct hmap_node hmap_node; /* In struct ctables's 'pcompute' hmap. */
- const char *name; /* Name, without leading &. */
+ char *name; /* Name, without leading &. */
- struct ctables_postcompute_expr *expr;
+ struct msg_location *location; /* Location of definition. */
+ struct ctables_pcexpr *expr;
char *label;
- /* XXX FORMAT */
+ struct ctables_summary_spec_set *specs;
bool hide_source_cats;
};
-struct ctables_postcompute_expr
+struct ctables_pcexpr
{
+ /* Precedence table:
+
+ ()
+ **
+ -
+ * /
+ - +
+ */
enum ctables_postcompute_op
{
/* Terminals. */
- CTPO_CAT_NUMBER,
- CTPO_CAT_STRING,
- CTPO_CAT_RANGE,
- CTPO_CAT_MISSING,
- /* XXX OTHERNM */
- /* XXX SUBTOTAL and HSUBTOTAL */
+ CTPO_CONSTANT, /* 5 */
+ CTPO_CAT_NUMBER, /* [5] */
+ CTPO_CAT_STRING, /* ["STRING"] */
+ CTPO_CAT_RANGE, /* [LO THRU 5] */
+ CTPO_CAT_MISSING, /* MISSING */
+ CTPO_CAT_OTHERNM, /* OTHERNM */
+ CTPO_CAT_SUBTOTAL, /* SUBTOTAL */
+ CTPO_CAT_TOTAL, /* TOTAL */
/* Nonterminals. */
CTPO_ADD,
CTPO_MUL,
CTPO_DIV,
CTPO_POW,
+ CTPO_NEG,
}
op;
union
{
- /* CTPO_CAT_NUMBER, CTPO_NUMBER. */
+ /* CTPO_CAT_NUMBER. */
double number;
- /* CTPO_CAT_RANGE.
+ /* CTPO_CAT_STRING. */
+ char *string;
- XXX what about string ranges? */
+ /* CTPO_CAT_RANGE. */
double range[2];
- /* CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW. */
- struct ctables_postcompute_expr *subs[2];
+ /* CTPO_CAT_SUBTOTAL. */
+ size_t subtotal_index;
+
+ /* Two elements: CTPO_ADD, CTPO_SUB, CTPO_MUL, CTPO_DIV, CTPO_POW.
+ One element: CTPO_NEG. */
+ struct ctables_pcexpr *subs[2];
};
+
+ /* Source location. */
+ int ofs[2];
+ struct msg_location *location;
};
+static void ctables_pcexpr_destroy (struct ctables_pcexpr *);
+static struct ctables_pcexpr *ctables_pcexpr_allocate_binary (
+ enum ctables_postcompute_op, struct ctables_pcexpr *sub0,
+ struct ctables_pcexpr *sub1);
+
struct ctables_summary_spec_set
{
struct ctables_summary_spec *specs;
lex_get (lexer);
if (lex_match_id (lexer, "THRU"))
{
- cat->type = CCT_RANGE;
- cat->range[0] = number;
if (lex_match_id (lexer, "HI"))
*cat = cct_range (number, DBL_MAX);
else
}
return proc_commit (ds);
}
+\f
+/* Postcomputes. */
+
+typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *);
+
+static void
+ctables_pcexpr_destroy (struct ctables_pcexpr *e)
+{
+ if (e)
+ {
+ switch (e->op)
+ {
+ case CTPO_CAT_STRING:
+ free (e->string);
+ break;
+
+ case CTPO_ADD:
+ case CTPO_SUB:
+ case CTPO_MUL:
+ case CTPO_DIV:
+ case CTPO_POW:
+ case CTPO_NEG:
+ for (size_t i = 0; i < 2; i++)
+ ctables_pcexpr_destroy (e->subs[i]);
+ break;
+
+ case CTPO_CONSTANT:
+ case CTPO_CAT_NUMBER:
+ case CTPO_CAT_RANGE:
+ case CTPO_CAT_MISSING:
+ case CTPO_CAT_OTHERNM:
+ case CTPO_CAT_SUBTOTAL:
+ case CTPO_CAT_TOTAL:
+ break;
+ }
+
+ msg_location_destroy (e->location);
+ free (e);
+ }
+}
+
+static struct ctables_pcexpr *
+ctables_pcexpr_allocate_binary (enum ctables_postcompute_op op,
+ struct ctables_pcexpr *sub0,
+ struct ctables_pcexpr *sub1)
+{
+ struct ctables_pcexpr *e = xmalloc (sizeof *e);
+ *e = (struct ctables_pcexpr) {
+ .op = op,
+ .subs = { sub0, sub1 },
+ .ofs = { sub0->ofs[0], sub1->ofs[1] }
+ };
+ return e;
+}
+
+static struct msg_location *
+ctables_pcexpr_location (struct lexer *lexer, const struct ctables_pcexpr *e_)
+{
+ if (!e_->location)
+ {
+ struct ctables_pcexpr *e = CONST_CAST (struct ctables_pcexpr *, e_);
+ e->location = lex_ofs_location (lexer, e->ofs[0], e->ofs[1]);
+ }
+ return e_->location;
+}
+
+/* How to parse an operator. */
+struct operator
+ {
+ enum token_type token;
+ enum ctables_postcompute_op op;
+ };
+
+static const struct operator *
+match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops)
+{
+ for (const struct operator *op = ops; op < ops + n_ops; op++)
+ if (lex_token (lexer) == op->token)
+ {
+ if (op->token != T_NEG_NUM)
+ lex_get (lexer);
+
+ return op;
+ }
+
+ return NULL;
+}
+
+static struct ctables_pcexpr *
+parse_binary_operators__ (struct lexer *lexer,
+ const struct operator ops[], size_t n_ops,
+ parse_recursively_func *parse_next_level,
+ const char *chain_warning,
+ struct ctables_pcexpr *lhs)
+{
+ for (int op_count = 0; ; op_count++)
+ {
+ const struct operator *op = match_operator (lexer, ops, n_ops);
+ if (!op)
+ {
+ if (op_count > 1 && chain_warning)
+ msg_at (SW, ctables_pcexpr_location (lexer, lhs),
+ "%s", chain_warning);
+
+ return lhs;
+ }
+
+ struct ctables_pcexpr *rhs = parse_next_level (lexer);
+ if (!rhs)
+ {
+ ctables_pcexpr_destroy (lhs);
+ return NULL;
+ }
+
+ lhs = ctables_pcexpr_allocate_binary (op->op, lhs, rhs);
+ }
+}
+
+static struct ctables_pcexpr *
+parse_binary_operators (struct lexer *lexer,
+ const struct operator ops[], size_t n_ops,
+ parse_recursively_func *parse_next_level,
+ const char *chain_warning)
+{
+ struct ctables_pcexpr *lhs = parse_next_level (lexer);
+ if (!lhs)
+ return NULL;
+
+ return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level,
+ chain_warning, lhs);
+}
+
+static struct ctables_pcexpr *parse_add (struct lexer *);
+
+static struct ctables_pcexpr
+ctpo_cat_range (double low, double high)
+{
+ return (struct ctables_pcexpr) {
+ .op = CTPO_CAT_RANGE,
+ .range = { low, high },
+ };
+}
+
+static struct ctables_pcexpr *
+parse_primary (struct lexer *lexer)
+{
+ int start_ofs = lex_ofs (lexer);
+ struct ctables_pcexpr e;
+ if (lex_is_number (lexer))
+ {
+ e = (struct ctables_pcexpr) { .op = CTPO_CONSTANT,
+ .number = lex_number (lexer) };
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "MISSING"))
+ e = (struct ctables_pcexpr) { .op = CTPO_CAT_MISSING };
+ else if (lex_match_id (lexer, "OTHERNM"))
+ e = (struct ctables_pcexpr) { .op = CTPO_CAT_OTHERNM };
+ else if (lex_match_id (lexer, "TOTAL"))
+ e = (struct ctables_pcexpr) { .op = CTPO_CAT_TOTAL };
+ else if (lex_match_id (lexer, "SUBTOTAL"))
+ {
+ size_t subtotal_index = 0;
+ if (lex_match (lexer, T_LBRACK))
+ {
+ if (!lex_force_int_range (lexer, "SUBTOTAL", 1, LONG_MAX))
+ return NULL;
+ subtotal_index = lex_integer (lexer);
+ lex_get (lexer);
+ if (!lex_force_match (lexer, T_RBRACK))
+ return NULL;
+ }
+ e = (struct ctables_pcexpr) { .op = CTPO_CAT_SUBTOTAL,
+ .subtotal_index = subtotal_index };
+ }
+ else if (lex_match (lexer, T_LBRACK))
+ {
+ if (lex_match_id (lexer, "LO"))
+ {
+ if (!lex_force_match_id (lexer, "THRU") || lex_force_num (lexer))
+ return false;
+ e = ctpo_cat_range (-DBL_MAX, lex_number (lexer));
+ lex_get (lexer);
+ }
+ else if (lex_is_number (lexer))
+ {
+ double number = lex_number (lexer);
+ lex_get (lexer);
+ if (lex_match_id (lexer, "THRU"))
+ {
+ if (lex_match_id (lexer, "HI"))
+ e = ctpo_cat_range (number, DBL_MAX);
+ else
+ {
+ if (!lex_force_num (lexer))
+ return false;
+ e = ctpo_cat_range (number, lex_number (lexer));
+ lex_get (lexer);
+ }
+ }
+ else
+ e = (struct ctables_pcexpr) { .op = CTPO_CAT_NUMBER,
+ .number = number };
+ }
+ else if (lex_is_string (lexer))
+ {
+ e = (struct ctables_pcexpr) {
+ .op = CTPO_CAT_STRING,
+ .string = ss_xstrdup (lex_tokss (lexer)),
+ };
+ lex_get (lexer);
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ return NULL;
+ }
+
+ if (!lex_force_match (lexer, T_RBRACK))
+ {
+ if (e.op == CTPO_CAT_STRING)
+ free (e.string);
+ return NULL;
+ }
+ }
+ else if (lex_match (lexer, T_LPAREN))
+ {
+ struct ctables_pcexpr *ep = parse_add (lexer);
+ if (!ep)
+ return NULL;
+ if (!lex_force_match (lexer, T_RPAREN))
+ {
+ ctables_pcexpr_destroy (ep);
+ return NULL;
+ }
+ return ep;
+ }
+ else
+ {
+ lex_error (lexer, NULL);
+ return NULL;
+ }
+
+ e.ofs[0] = start_ofs;
+ e.ofs[1] = lex_ofs (lexer) - 1;
+ return xmemdup (&e, sizeof e);
+}
+
+static struct ctables_pcexpr *
+ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub,
+ struct lexer *lexer, int start_ofs)
+{
+ struct ctables_pcexpr *e = xmalloc (sizeof *e);
+ *e = (struct ctables_pcexpr) {
+ .op = CTPO_NEG,
+ .subs = { sub },
+ .ofs = { start_ofs, lex_ofs (lexer) - 1 },
+ };
+ return e;
+}
+
+static struct ctables_pcexpr *
+parse_exp (struct lexer *lexer)
+{
+ static const struct operator op = { T_EXP, CTPO_POW };
+
+ const char *chain_warning =
+ _("The exponentiation operator (`**') is left-associative: "
+ "`a**b**c' equals `(a**b)**c', not `a**(b**c)'. "
+ "To disable this warning, insert parentheses.");
+
+ if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP)
+ return parse_binary_operators (lexer, &op, 1,
+ parse_primary, chain_warning);
+
+ /* Special case for situations like "-5**6", which must be parsed as
+ -(5**6). */
+
+ int start_ofs = lex_ofs (lexer);
+ struct ctables_pcexpr *lhs = xmalloc (sizeof *lhs);
+ *lhs = (struct ctables_pcexpr) {
+ .op = CTPO_CONSTANT,
+ .number = -lex_tokval (lexer),
+ .ofs = { start_ofs, lex_ofs (lexer) },
+ };
+ lex_get (lexer);
+
+ struct ctables_pcexpr *node = parse_binary_operators__ (
+ lexer, &op, 1, parse_primary, chain_warning, lhs);
+ if (!node)
+ return NULL;
+
+ return ctables_pcexpr_allocate_neg (node, lexer, start_ofs);
+}
+
+/* Parses the unary minus level. */
+static struct ctables_pcexpr *
+parse_neg (struct lexer *lexer)
+{
+ int start_ofs = lex_ofs (lexer);
+ if (!lex_match (lexer, T_DASH))
+ return parse_exp (lexer);
+
+ struct ctables_pcexpr *inner = parse_neg (lexer);
+ if (!inner)
+ return NULL;
+
+ return ctables_pcexpr_allocate_neg (inner, lexer, start_ofs);
+}
+
+/* Parses the multiplication and division level. */
+static struct ctables_pcexpr *
+parse_mul (struct lexer *lexer)
+{
+ static const struct operator ops[] =
+ {
+ { T_ASTERISK, CTPO_MUL },
+ { T_SLASH, CTPO_DIV },
+ };
+
+ return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
+ parse_neg, NULL);
+}
+
+/* Parses the addition and subtraction level. */
+static struct ctables_pcexpr *
+parse_add (struct lexer *lexer)
+{
+ static const struct operator ops[] =
+ {
+ { T_PLUS, CTPO_ADD },
+ { T_DASH, CTPO_SUB },
+ { T_NEG_NUM, CTPO_ADD },
+ };
+
+ return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops,
+ parse_mul, NULL);
+}
+
+static struct ctables_postcompute *
+ctables_find_postcompute (struct ctables *ct, const char *name)
+{
+ struct ctables_postcompute *pc;
+ HMAP_FOR_EACH_WITH_HASH (pc, struct ctables_postcompute, hmap_node,
+ utf8_hash_case_string (name, 0), &ct->postcomputes)
+ if (!utf8_strcasecmp (pc->name, name))
+ return pc;
+ return NULL;
+}
+
+static bool
+ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct)
+{
+ int start_ofs = lex_ofs (lexer) - 1;
+
+ if (!lex_force_match (lexer, T_AND) || !lex_force_id (lexer))
+ return false;
+
+ char *name = ss_xstrdup (lex_tokss (lexer));
+
+ lex_get (lexer);
+ if (!lex_force_match (lexer, T_EQUALS)
+ || !lex_force_match_id (lexer, "EXPR")
+ || !lex_force_match (lexer, T_LPAREN))
+ {
+ free (name);
+ return false;
+ }
+
+ struct ctables_pcexpr *expr = parse_add (lexer);
+ if (!expr || !lex_force_match (lexer, T_RPAREN))
+ {
+ free (name);
+ return false;
+ }
+
+ struct msg_location *location = lex_ofs_location (lexer, start_ofs,
+ lex_ofs (lexer) - 1);
+
+ struct ctables_postcompute *pc = ctables_find_postcompute (ct, name);
+ if (pc)
+ {
+ msg_at (SW, location, _("New definition of &%s will override the "
+ "previous definition."),
+ pc->name);
+ msg_at (SN, pc->location, _("This is the previous definition."));
+
+ ctables_pcexpr_destroy (pc->expr);
+ msg_location_destroy (pc->location);
+ free (name);
+ }
+ else
+ {
+ pc = xmalloc (sizeof *pc);
+ *pc = (struct ctables_postcompute) { .name = name };
+ hmap_insert (&ct->postcomputes, &pc->hmap_node,
+ utf8_hash_case_string (pc->name, 0));
+ }
+ pc->expr = expr;
+ pc->location = location;
+ return true;
+}
+
+static bool
+ctables_parse_pproperties_format (struct lexer *lexer,
+ struct ctables_summary_spec_set *sss)
+{
+ *sss = (struct ctables_summary_spec_set) { .n = 0 };
+
+ while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH
+ && !(lex_token (lexer) == T_ID
+ && (lex_id_match (ss_cstr ("LABEL"), lex_tokss (lexer))
+ || lex_id_match (ss_cstr ("HIDESOURCECATS"),
+ lex_tokss (lexer)))))
+ {
+ /* Parse function. */
+ enum ctables_summary_function function;
+ if (!parse_ctables_summary_function (lexer, &function))
+ goto error;
+
+ /* Parse percentile. */
+ double percentile = 0;
+ if (function == CTSF_PTILE)
+ {
+ if (!lex_force_num_range_closed (lexer, "PTILE", 0, 100))
+ goto error;
+ percentile = lex_number (lexer);
+ lex_get (lexer);
+ }
+
+ /* Parse format. */
+ struct fmt_spec format;
+ if (!parse_format_specifier (lexer, &format)
+ || !fmt_check_output (&format)
+ || !fmt_check_type_compat (&format, VAL_NUMERIC))
+ goto error;
+
+ if (sss->n >= sss->allocated)
+ sss->specs = x2nrealloc (sss->specs, &sss->allocated,
+ sizeof *sss->specs);
+ sss->specs[sss->n++] = (struct ctables_summary_spec) {
+ .function = function,
+ .percentile = percentile,
+ .format = format,
+ };
+ }
+ return true;
+
+error:
+ ctables_summary_spec_set_uninit (sss);
+ return false;
+}
+
+static bool
+ctables_parse_pproperties (struct lexer *lexer, struct ctables *ct)
+{
+ struct ctables_postcompute **pcs = NULL;
+ size_t n_pcs = 0;
+ size_t allocated_pcs = 0;
+
+ while (lex_match (lexer, T_AND))
+ {
+ if (!lex_force_id (lexer))
+ goto error;
+ struct ctables_postcompute *pc
+ = ctables_find_postcompute (ct, lex_tokcstr (lexer));
+ if (!pc)
+ {
+ msg (SE, _("Unknown computed category &%s."), lex_tokcstr (lexer));
+ goto error;
+ }
+
+ if (n_pcs >= allocated_pcs)
+ pcs = x2nrealloc (pcs, &allocated_pcs, sizeof *pcs);
+ pcs[n_pcs++] = pc;
+ }
+
+ while (lex_token (lexer) != T_SLASH && lex_token (lexer) != T_ENDCMD)
+ {
+ if (lex_match_id (lexer, "LABEL"))
+ {
+ lex_match (lexer, T_EQUALS);
+ if (!lex_force_string (lexer))
+ goto error;
+
+ for (size_t i = 0; i < n_pcs; i++)
+ {
+ free (pcs[i]->label);
+ pcs[i]->label = ss_xstrdup (lex_tokss (lexer));
+ }
+
+ lex_get (lexer);
+ }
+ else if (lex_match_id (lexer, "FORMAT"))
+ {
+ lex_match (lexer, T_EQUALS);
+
+ struct ctables_summary_spec_set sss;
+ if (!ctables_parse_pproperties_format (lexer, &sss))
+ goto error;
+
+ for (size_t i = 0; i < n_pcs; i++)
+ {
+ if (pcs[i]->specs)
+ ctables_summary_spec_set_uninit (pcs[i]->specs);
+ else
+ pcs[i]->specs = xmalloc (sizeof *pcs[i]->specs);
+ ctables_summary_spec_set_clone (pcs[i]->specs, &sss);
+ }
+ ctables_summary_spec_set_uninit (&sss);
+ }
+ else if (lex_match_id (lexer, "HIDESOURCECATS"))
+ {
+ lex_match (lexer, T_EQUALS);
+ bool hide_source_cats;
+ if (!parse_bool (lexer, &hide_source_cats))
+ goto error;
+ for (size_t i = 0; i < n_pcs; i++)
+ pcs[i]->hide_source_cats = hide_source_cats;
+ }
+ else
+ {
+ lex_error_expecting (lexer, "LABEL", "FORMAT", "HIDESOURCECATS");
+ goto error;
+ }
+ }
+ free (pcs);
+ return true;
+
+error:
+ free (pcs);
+ return false;
+}
int
cmd_ctables (struct lexer *lexer, struct dataset *ds)
goto error;
}
}
- /* XXX PCOMPUTE */
+ else if (lex_match_id (lexer, "PCOMPUTE"))
+ {
+ if (!ctables_parse_pcompute (lexer, ct))
+ goto error;
+ }
+ else if (lex_match_id (lexer, "PPROPERTIES"))
+ {
+ if (!ctables_parse_pproperties (lexer, ct))
+ goto error;
+ }
else if (lex_match_id (lexer, "WEIGHT"))
{
if (!lex_force_match_id (lexer, "VARIABLE"))