From: Ben Pfaff Date: Sun, 3 Jul 2022 05:44:25 +0000 (-0700) Subject: strings and dates work X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=dc64fab87d789cd1fc3556d8434bf9ca7728029c strings and dates work --- diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index 68239b85b9..4b6cb04900 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -21,6 +21,7 @@ #include "data/casereader.h" #include "data/casewriter.h" +#include "data/data-in.h" #include "data/data-out.h" #include "data/dataset.h" #include "data/dictionary.h" @@ -313,8 +314,8 @@ struct ctables_pcexpr /* CTPO_CAT_NUMBER. */ double number; - /* CTPO_CAT_STRING. */ - char *string; + /* CTPO_CAT_STRING, in dictionary encoding. */ + struct substring string; /* CTPO_CAT_RANGE. */ double range[2]; @@ -490,7 +491,7 @@ struct ctables_category union { double number; /* CCT_NUMBER. */ - char *string; /* CCT_STRING. In dictionary encoding. */ + struct substring string; /* CCT_STRING, in dictionary encoding. */ double range[2]; /* CCT_RANGE. */ struct @@ -535,7 +536,7 @@ ctables_category_uninit (struct ctables_category *cat) break; case CCT_STRING: - free (cat->string); + ss_dealloc (&cat->string); break; case CCT_SUBTOTAL: @@ -566,7 +567,7 @@ ctables_category_equal (const struct ctables_category *a, return a->number == b->number; case CCT_STRING: - return strcmp (a->string, b->string); + return ss_equals (a->string, b->string); case CCT_RANGE: return a->range[0] == b->range[0] && a->range[1] == b->range[1]; @@ -1438,7 +1439,9 @@ ctables_table_parse_subtotal (struct lexer *lexer, bool hide_subcategories, } static bool -ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct, +ctables_table_parse_explicit_category (struct lexer *lexer, + struct dictionary *dict, + struct ctables *ct, struct ctables_category *cat) { if (lex_match_id (lexer, "OTHERNM")) @@ -1480,10 +1483,11 @@ ctables_table_parse_explicit_category (struct lexer *lexer, struct ctables *ct, } else if (lex_is_string (lexer)) { - *cat = (struct ctables_category) { - .type = CCT_STRING, - .string = ss_xstrdup (lex_tokss (lexer)), - }; + struct substring s = recode_substring_pool ( + dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL); + ss_rtrim (&s, ss_cstr (" ")); + + *cat = (struct ctables_category) { .type = CCT_STRING, .string = s }; lex_get (lexer); } else if (lex_match (lexer, T_AND)) @@ -1530,7 +1534,7 @@ ctables_find_category_for_postcompute (const struct ctables_categories *cats, break; case CTPO_CAT_STRING: - if (cat->type == CCT_STRING && !strcmp (cat->string, e->string)) + if (cat->type == CCT_STRING && ss_equals (cat->string, e->string)) best = cat; break; @@ -1674,6 +1678,21 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, if (!parse_variables (lexer, dict, &vars, &n_vars, PV_NO_SCRATCH)) return false; + const struct fmt_spec *common_format = var_get_print_format (vars[0]); + for (size_t i = 1; i < n_vars; i++) + { + const struct fmt_spec *f = var_get_print_format (vars[i]); + if (f->type != common_format->type) + { + common_format = NULL; + break; + } + } + bool parse_strings + = (common_format + && (fmt_get_category (common_format->type) + & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))); + struct ctables_categories *c = xmalloc (sizeof *c); *c = (struct ctables_categories) { .n_refs = n_vars, .show_empty = true }; for (size_t i = 0; i < n_vars; i++) @@ -1683,7 +1702,6 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, ctables_categories_unref (*cp); *cp = c; } - free (vars); size_t allocated_cats = 0; if (lex_match (lexer, T_LBRACK)) @@ -1696,7 +1714,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, int start_ofs = lex_ofs (lexer); struct ctables_category *cat = &c->cats[c->n_cats]; - if (!ctables_table_parse_explicit_category (lexer, ct, cat)) + if (!ctables_table_parse_explicit_category (lexer, dict, ct, cat)) return false; cat->location = lex_ofs_location (lexer, start_ofs, lex_ofs (lexer) - 1); c->n_cats++; @@ -1710,10 +1728,78 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict, for (size_t i = 0; i < c->n_cats; i++) { struct ctables_category *cat = &c->cats[i]; - if (cat->type == CCT_POSTCOMPUTE - && !ctables_recursive_check_postcompute (cat->pc->expr, cat, - c, cats_location)) - return false; + switch (cat->type) + { + case CCT_POSTCOMPUTE: + if (!ctables_recursive_check_postcompute (cat->pc->expr, cat, + c, cats_location)) + return false; + break; + + case CCT_NUMBER: + case CCT_RANGE: + for (size_t j = 0; j < n_vars; j++) + if (var_is_alpha (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied " + "only to numeric variables, but this " + "subcommand tries to apply it to string " + "variable %s."), + var_get_name (vars[j])); + return false; + } + break; + + case CCT_STRING: + if (parse_strings) + { + union value v; + char *error = data_in (cat->string, dict_get_encoding (dict), + common_format->type, + settings_get_fmt_settings (), + &v, 0, NULL); + if (error) + { + msg_at (SE, cat->location, + _("Failed to parse category specification as " + "format %s: %s."), + fmt_name (common_format->type), error); + free (error); + return false; + } + + ss_dealloc (&cat->string); + + cat->type = CCT_NUMBER; + cat->number = v.f; + } + else + { + for (size_t j = 0; j < n_vars; j++) + if (var_is_numeric (vars[j])) + { + msg_at (SE, cat->location, + _("This category specification may be applied " + "only to string variables, but this " + "subcommand tries to apply it to numeric " + "variable %s."), + var_get_name (vars[j])); + return false; + } + } + break; + + case CCT_MISSING: + case CCT_OTHERNM: + case CCT_SUBTOTAL: + case CCT_TOTAL: + case CCT_VALUE: + case CCT_LABEL: + case CCT_FUNCTION: + case CCT_EXCLUDED_MISSING: + break; + } } } @@ -2729,7 +2815,18 @@ ctables_categories_match (const struct ctables_categories *c, break; case CCT_STRING: - NOT_REACHED (); + { + struct substring s = ss_buffer (CHAR_CAST (char *, v->s), + var_get_width (var)); + ss_rtrim (&s, ss_cstr (" ")); + printf ("%d '%.*s' ?=? '%.*s'\n", + var_get_width (var), + (int) cat->string.length, cat->string.string, + (int) s.length, s.string); + if (ss_equals (cat->string, s)) + return cat; + } + break; case CCT_RANGE: if ((cat->range[0] == -DBL_MAX || v->f >= cat->range[0]) @@ -3084,6 +3181,7 @@ ctables_cell_insert (struct ctables_section *s, if (var_missing) is_missing = true; + printf ("ctables_cell_insert %s: ", var_get_name (var)); cats[a][i] = ctables_categories_match ( s->table->categories[var_get_dict_index (var)], value, var); if (!cats[a][i]) @@ -4148,7 +4246,17 @@ ctables_add_category_occurrences (const struct variable *var, break; case CCT_STRING: - abort (); /* XXX */ + { + int width = var_get_width (var); + union value value; + value_init (&value, width); + value_copy_buf_rpad (&value, width, + CHAR_CAST (uint8_t *, c->string.string), + c->string.length, ' '); + ctables_add_occurrence (var, &value, occurrences); + value_destroy (&value, width); + } + break; case CCT_RANGE: assert (var_is_numeric (var)); @@ -4318,7 +4426,8 @@ ctables_execute (struct dataset *ds, struct ctables *ct) /* Postcomputes. */ -typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *); +typedef struct ctables_pcexpr *parse_recursively_func (struct lexer *, + struct dictionary *); static void ctables_pcexpr_destroy (struct ctables_pcexpr *e) @@ -4328,7 +4437,7 @@ ctables_pcexpr_destroy (struct ctables_pcexpr *e) switch (e->op) { case CTPO_CAT_STRING: - free (e->string); + ss_dealloc (&e->string); break; case CTPO_ADD: @@ -4393,7 +4502,7 @@ match_operator (struct lexer *lexer, const struct operator ops[], size_t n_ops) } static struct ctables_pcexpr * -parse_binary_operators__ (struct lexer *lexer, +parse_binary_operators__ (struct lexer *lexer, struct dictionary *dict, const struct operator ops[], size_t n_ops, parse_recursively_func *parse_next_level, const char *chain_warning, @@ -4410,7 +4519,7 @@ parse_binary_operators__ (struct lexer *lexer, return lhs; } - struct ctables_pcexpr *rhs = parse_next_level (lexer); + struct ctables_pcexpr *rhs = parse_next_level (lexer, dict); if (!rhs) { ctables_pcexpr_destroy (lhs); @@ -4422,20 +4531,20 @@ parse_binary_operators__ (struct lexer *lexer, } static struct ctables_pcexpr * -parse_binary_operators (struct lexer *lexer, +parse_binary_operators (struct lexer *lexer, struct dictionary *dict, const struct operator ops[], size_t n_ops, parse_recursively_func *parse_next_level, const char *chain_warning) { - struct ctables_pcexpr *lhs = parse_next_level (lexer); + struct ctables_pcexpr *lhs = parse_next_level (lexer, dict); if (!lhs) return NULL; - return parse_binary_operators__ (lexer, ops, n_ops, parse_next_level, + return parse_binary_operators__ (lexer, dict, ops, n_ops, parse_next_level, chain_warning, lhs); } -static struct ctables_pcexpr *parse_add (struct lexer *); +static struct ctables_pcexpr *parse_add (struct lexer *, struct dictionary *); static struct ctables_pcexpr ctpo_cat_range (double low, double high) @@ -4447,7 +4556,7 @@ ctpo_cat_range (double low, double high) } static struct ctables_pcexpr * -parse_primary (struct lexer *lexer) +parse_primary (struct lexer *lexer, struct dictionary *dict) { int start_ofs = lex_ofs (lexer); struct ctables_pcexpr e; @@ -4509,10 +4618,11 @@ parse_primary (struct lexer *lexer) } else if (lex_is_string (lexer)) { - e = (struct ctables_pcexpr) { - .op = CTPO_CAT_STRING, - .string = ss_xstrdup (lex_tokss (lexer)), - }; + struct substring s = recode_substring_pool ( + dict_get_encoding (dict), "UTF-8", lex_tokss (lexer), NULL); + ss_rtrim (&s, ss_cstr (" ")); + + e = (struct ctables_pcexpr) { .op = CTPO_CAT_STRING, .string = s }; lex_get (lexer); } else @@ -4524,13 +4634,13 @@ parse_primary (struct lexer *lexer) if (!lex_force_match (lexer, T_RBRACK)) { if (e.op == CTPO_CAT_STRING) - free (e.string); + ss_dealloc (&e.string); return NULL; } } else if (lex_match (lexer, T_LPAREN)) { - struct ctables_pcexpr *ep = parse_add (lexer); + struct ctables_pcexpr *ep = parse_add (lexer, dict); if (!ep) return NULL; if (!lex_force_match (lexer, T_RPAREN)) @@ -4564,7 +4674,7 @@ ctables_pcexpr_allocate_neg (struct ctables_pcexpr *sub, } static struct ctables_pcexpr * -parse_exp (struct lexer *lexer) +parse_exp (struct lexer *lexer, struct dictionary *dict) { static const struct operator op = { T_EXP, CTPO_POW }; @@ -4574,7 +4684,7 @@ parse_exp (struct lexer *lexer) "To disable this warning, insert parentheses."); if (lex_token (lexer) != T_NEG_NUM || lex_next_token (lexer, 1) != T_EXP) - return parse_binary_operators (lexer, &op, 1, + return parse_binary_operators (lexer, dict, &op, 1, parse_primary, chain_warning); /* Special case for situations like "-5**6", which must be parsed as @@ -4590,7 +4700,7 @@ parse_exp (struct lexer *lexer) lex_get (lexer); struct ctables_pcexpr *node = parse_binary_operators__ ( - lexer, &op, 1, parse_primary, chain_warning, lhs); + lexer, dict, &op, 1, parse_primary, chain_warning, lhs); if (!node) return NULL; @@ -4599,13 +4709,13 @@ parse_exp (struct lexer *lexer) /* Parses the unary minus level. */ static struct ctables_pcexpr * -parse_neg (struct lexer *lexer) +parse_neg (struct lexer *lexer, struct dictionary *dict) { int start_ofs = lex_ofs (lexer); if (!lex_match (lexer, T_DASH)) - return parse_exp (lexer); + return parse_exp (lexer, dict); - struct ctables_pcexpr *inner = parse_neg (lexer); + struct ctables_pcexpr *inner = parse_neg (lexer, dict); if (!inner) return NULL; @@ -4614,7 +4724,7 @@ parse_neg (struct lexer *lexer) /* Parses the multiplication and division level. */ static struct ctables_pcexpr * -parse_mul (struct lexer *lexer) +parse_mul (struct lexer *lexer, struct dictionary *dict) { static const struct operator ops[] = { @@ -4622,13 +4732,13 @@ parse_mul (struct lexer *lexer) { T_SLASH, CTPO_DIV }, }; - return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops, + return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops, parse_neg, NULL); } /* Parses the addition and subtraction level. */ static struct ctables_pcexpr * -parse_add (struct lexer *lexer) +parse_add (struct lexer *lexer, struct dictionary *dict) { static const struct operator ops[] = { @@ -4637,7 +4747,7 @@ parse_add (struct lexer *lexer) { T_NEG_NUM, CTPO_ADD }, }; - return parse_binary_operators (lexer, ops, sizeof ops / sizeof *ops, + return parse_binary_operators (lexer, dict, ops, sizeof ops / sizeof *ops, parse_mul, NULL); } @@ -4653,7 +4763,8 @@ ctables_find_postcompute (struct ctables *ct, const char *name) } static bool -ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct) +ctables_parse_pcompute (struct lexer *lexer, struct dictionary *dict, + struct ctables *ct) { int pcompute_start = lex_ofs (lexer) - 1; @@ -4672,7 +4783,7 @@ ctables_parse_pcompute (struct lexer *lexer, struct ctables *ct) } int expr_start = lex_ofs (lexer); - struct ctables_pcexpr *expr = parse_add (lexer); + struct ctables_pcexpr *expr = parse_add (lexer, dict); int expr_end = lex_ofs (lexer) - 1; if (!expr || !lex_force_match (lexer, T_RPAREN)) { @@ -5041,7 +5152,7 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) } else if (lex_match_id (lexer, "PCOMPUTE")) { - if (!ctables_parse_pcompute (lexer, ct)) + if (!ctables_parse_pcompute (lexer, dataset_dict (ds), ct)) goto error; } else if (lex_match_id (lexer, "PPROPERTIES")) diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 1384710f8e..d9dfa86c29 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -2,7 +2,6 @@ AT_BANNER([CTABLES]) dnl Features not yet implemented: dnl -dnl - Date/time variables and values dnl - SPLIT FILE with SEPARATE splits dnl - Definition of columns/rows when labels are rotated from one axis to another. dnl - Preprocessing to distinguish categorical from scale. @@ -44,6 +43,7 @@ dnl * MINCOLWIDTH, MAXCOLWIDTH, UNITS. dnl * EMPTY. dnl * MISSING. dnl - HIDESMALLCOUNTS. +dnl - Date/time variables and values dnl - Special formats for summary functions: NEGPAREN, NEQUAL, PAREN, PCTPAREN. dnl dnl Not for v1: