From 832d8443d561c668552cd23640d5bf99ae037a55 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Fri, 3 Jun 2022 21:14:42 -0700 Subject: [PATCH] CTABLES weird special formats --- src/language/lexer/format-parser.c | 22 +++--- src/language/lexer/format-parser.h | 6 +- src/language/stats/ctables.c | 108 ++++++++++++++++++++++++++--- tests/language/stats/ctables.at | 17 ++--- 4 files changed, 123 insertions(+), 30 deletions(-) diff --git a/src/language/lexer/format-parser.c b/src/language/lexer/format-parser.c index 85579a2ab2..aa30852d11 100644 --- a/src/language/lexer/format-parser.c +++ b/src/language/lexer/format-parser.c @@ -32,7 +32,16 @@ #include "gettext.h" #define _(msgid) gettext (msgid) -static bool +/* Parses a token taking the form of a format specifier and + returns true only if successful. Emits an error message on + failure. Stores a null-terminated string representing the + format type in TYPE, and the width and number of decimal + places in *WIDTH and *DECIMALS. + + TYPE is not checked as to whether it is really the name of a + format. Both width and decimals are considered optional. If + missing, *WIDTH or *DECIMALS or both will be set to 0. */ +bool parse_abstract_format_specifier__ (struct lexer *lexer, char type[FMT_TYPE_LEN_MAX + 1], uint16_t *width, uint8_t *decimals) @@ -79,15 +88,8 @@ error: return false; } -/* Parses a token taking the form of a format specifier and - returns true only if successful. Emits an error message on - failure. Stores a null-terminated string representing the - format type in TYPE, and the width and number of decimal - places in *WIDTH and *DECIMALS. - - TYPE is not checked as to whether it is really the name of a - format. Both width and decimals are considered optional. If - missing, *WIDTH or *DECIMALS or both will be set to 0. */ +/* Like parse_abstract_format_specifier__(), but additionally advanced past + the token if successful. */ bool parse_abstract_format_specifier (struct lexer *lexer, char type[FMT_TYPE_LEN_MAX + 1], diff --git a/src/language/lexer/format-parser.h b/src/language/lexer/format-parser.h index 7601bc46b2..6316e38762 100644 --- a/src/language/lexer/format-parser.h +++ b/src/language/lexer/format-parser.h @@ -21,14 +21,16 @@ #include "data/format.h" +struct fmt_spec; struct lexer; +bool parse_abstract_format_specifier__ (struct lexer *, + char type[FMT_TYPE_LEN_MAX + 1], + uint16_t *width, uint8_t *decimals); bool parse_abstract_format_specifier (struct lexer *, char type[FMT_TYPE_LEN_MAX + 1], uint16_t *width, uint8_t *decimals); -enum fmt_type ; -struct fmt_spec; bool parse_format_specifier (struct lexer *, struct fmt_spec *); bool parse_format_specifier_name (struct lexer *, enum fmt_type *type); diff --git a/src/language/stats/ctables.c b/src/language/stats/ctables.c index ce49e59102..418496df92 100644 --- a/src/language/stats/ctables.c +++ b/src/language/stats/ctables.c @@ -21,6 +21,7 @@ #include "data/casereader.h" #include "data/casewriter.h" +#include "data/data-out.h" #include "data/dataset.h" #include "data/dictionary.h" #include "data/mrset.h" @@ -224,6 +225,14 @@ struct ctables const struct dictionary *dict; struct pivot_table_look *look; + /* CTABLES has a number of extra formats that we implement via custom + currency specifications on an alternate fmt_settings. */ +#define CTEF_NEGPAREN FMT_CCA +#define CTEF_NEQUAL FMT_CCB +#define CTEF_PAREN FMT_CCC +#define CTEF_PCTPAREN FMT_CCD + struct fmt_settings ctables_formats; + /* If this is NULL, zeros are displayed using the normal print format. Otherwise, this string is displayed. */ char *zero; @@ -686,7 +695,10 @@ struct ctables_summary_spec enum ctables_summary_function function; double percentile; /* CTSF_PTILE only. */ char *label; - struct fmt_spec format; /* XXX extra CTABLES formats */ + + struct fmt_spec format; + bool is_ctables_format; /* Is 'format' one of CTEF_*? */ + size_t axis_idx; }; @@ -946,7 +958,8 @@ static bool add_summary_spec (struct ctables_axis *axis, enum ctables_summary_function function, double percentile, const char *label, const struct fmt_spec *format, - const struct msg_location *loc, enum ctables_summary_variant sv) + bool is_ctables_format, const struct msg_location *loc, + enum ctables_summary_variant sv) { if (axis->op == CTAO_VAR) { @@ -993,6 +1006,7 @@ add_summary_spec (struct ctables_axis *axis, .label = xstrdup (label), .format = (format ? *format : ctables_summary_default_format (function, &axis->var)), + .is_ctables_format = is_ctables_format, }; return true; } @@ -1000,7 +1014,7 @@ add_summary_spec (struct ctables_axis *axis, { for (size_t i = 0; i < 2; i++) if (!add_summary_spec (axis->subs[i], function, percentile, label, - format, loc, sv)) + format, is_ctables_format, loc, sv)) return false; return true; } @@ -1080,6 +1094,48 @@ has_digit (const char *s) return s[strcspn (s, "0123456789")] != '\0'; } +static bool +parse_ctables_format_specifier (struct lexer *lexer, struct fmt_spec *format, + bool *is_ctables_format) +{ + char type[FMT_TYPE_LEN_MAX + 1]; + if (!parse_abstract_format_specifier__ (lexer, type, &format->w, &format->d)) + return false; + + if (!strcasecmp (type, "NEGPAREN")) + format->type = CTEF_NEGPAREN; + else if (!strcasecmp (type, "NEQUAL")) + format->type = CTEF_NEQUAL; + else if (!strcasecmp (type, "PAREN")) + format->type = CTEF_PAREN; + else if (!strcasecmp (type, "PCTPAREN")) + format->type = CTEF_PCTPAREN; + else + { + *is_ctables_format = false; + return (parse_format_specifier (lexer, format) + && fmt_check_output (format) + && fmt_check_type_compat (format, VAL_NUMERIC)); + } + + if (format->w < 2) + { + msg (SE, _("Output format %s requires width 2 or greater."), type); + return false; + } + else if (format->d > format->w - 1) + { + msg (SE, _("Output format %s requires width greater than decimals."), + type); + return false; + } + else + { + *is_ctables_format = true; + return true; + } +} + static struct ctables_axis * ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) { @@ -1120,12 +1176,12 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) /* Parse format. */ struct fmt_spec format; const struct fmt_spec *formatp; + bool is_ctables_format = false; if (lex_token (ctx->lexer) == T_ID && has_digit (lex_tokcstr (ctx->lexer))) { - if (!parse_format_specifier (ctx->lexer, &format) - || !fmt_check_output (&format) - || !fmt_check_type_compat (&format, VAL_NUMERIC)) + if (!parse_ctables_format_specifier (ctx->lexer, &format, + &is_ctables_format)) { free (label); goto error; @@ -1137,7 +1193,8 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx) struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs, lex_ofs (ctx->lexer) - 1); - add_summary_spec (sub, function, percentile, label, formatp, loc, sv); + add_summary_spec (sub, function, percentile, label, formatp, + is_ctables_format, loc, sv); free (label); msg_location_destroy (loc); @@ -3633,6 +3690,14 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t) value = pivot_value_new_user_text (ct->zero, SIZE_MAX); else if (d == SYSMIS && ct->missing) value = pivot_value_new_user_text (ct->missing, SIZE_MAX); + else if (specs->specs[j].is_ctables_format) + { + char *s = data_out_stretchy (&(union value) { .f = d }, + "UTF-8", + &specs->specs[j].format, + &ct->ctables_formats, NULL); + value = pivot_value_new_user_text_nocopy (s); + } else { value = pivot_value_new_number (d); @@ -4674,15 +4739,38 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds) for (size_t i = 0; i < n_vars; i++) vlabels[i] = (enum ctables_vlabel) tvars; + struct pivot_table_look *look = pivot_table_look_unshare ( + pivot_table_look_ref (pivot_table_look_get_default ())); + look->omit_empty = false; + struct ctables *ct = xmalloc (sizeof *ct); *ct = (struct ctables) { .dict = dataset_dict (ds), - .look = pivot_table_look_unshare (pivot_table_look_ref ( - pivot_table_look_get_default ())), + .look = look, + .ctables_formats = FMT_SETTINGS_INIT, .vlabels = vlabels, .postcomputes = HMAP_INITIALIZER (ct->postcomputes), }; - ct->look->omit_empty = false; + + struct ctf + { + enum fmt_type type; + const char *dot_string; + const char *comma_string; + }; + static const struct ctf ctfs[4] = { + { CTEF_NEGPAREN, "(,,,)", "(...)" }, + { CTEF_NEQUAL, "-,N=,,", "-.N=.." }, + { CTEF_PAREN, "-,(,),", "-.(.)." }, + { CTEF_PCTPAREN, "-,(,%),", "-.(.%)." }, + }; + bool is_dot = settings_get_fmt_settings ()->decimal == '.'; + for (size_t i = 0; i < 4; i++) + { + const char *s = is_dot ? ctfs[i].dot_string : ctfs[i].comma_string; + fmt_settings_set_cc (&ct->ctables_formats, ctfs[i].type, + fmt_number_style_from_string (s)); + } if (!lex_force_match (lexer, T_SLASH)) goto error; diff --git a/tests/language/stats/ctables.at b/tests/language/stats/ctables.at index 35d7c7ffbe..461a62220b 100644 --- a/tests/language/stats/ctables.at +++ b/tests/language/stats/ctables.at @@ -13,18 +13,11 @@ dnl * U-prefix for unweighted summaries. dnl * .LCL and .UCL suffixes. dnl * .SE suffixes. dnl * Separate summary functions for totals and subtotals. -dnl - Special formats for summary functions: NEGPAREN, NEQUAL, PAREN, PCTPAREN. dnl - CATEGORIES: -dnl * THRU -dnl * OTHERNM dnl * String values dnl * Date values dnl * Data-dependent sorting. dnl - TITLES: )DATE, )TIME, )TABLE. -dnl - FORMAT: -dnl * MINCOLWIDTH, MAXCOLWIDTH, UNITS. -dnl * EMPTY. -dnl * MISSING. dnl - SMISSING (see documentation). dnl - PCOMPUTE: dnl * multi-dimensional @@ -33,7 +26,6 @@ dnl * strings dnl - PPROPERTIES: dnl * )LABEL[N]. dnl * summary statistics and formats? -dnl - HIDESMALLCOUNTS. dnl - Are string ranges a thing? dnl dnl Features not yet tested: @@ -44,6 +36,15 @@ dnl - test CLABELS ROWLABELS=LAYER. dnl - Test VLABELS. dnl - Test WEIGHT and adjustment weights. dnl - Test PCOMPUTE and PPROPERTIES. +dnl - CATEGORIES: +dnl * THRU +dnl * OTHERNM +dnl - FORMAT: +dnl * MINCOLWIDTH, MAXCOLWIDTH, UNITS. +dnl * EMPTY. +dnl * MISSING. +dnl - HIDESMALLCOUNTS. +dnl - Special formats for summary functions: NEGPAREN, NEQUAL, PAREN, PCTPAREN. dnl dnl Not for v1: dnl - Multiple response sets -- 2.30.2