From: Ben Pfaff Date: Sat, 18 Jun 2022 23:29:17 +0000 (-0700) Subject: Implement SET LEADZERO. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=bef05451ef0f1a79d5427d5d4701b2744824c0b4 Implement SET LEADZERO. --- diff --git a/NEWS b/NEWS index e0c7eeca62..253169fb23 100644 --- a/NEWS +++ b/NEWS @@ -5,6 +5,11 @@ See the end for copying conditions. Please send PSPP bug reports to bug-gnu-pspp@gnu.org. +Changes from 1.6.0 to 1.6.1: + + * The SET command now supports LEADZERO for controlling output of a + leading zero in F, COMMA, and DOT format. + Changes from 1.4.1 to 1.6.0: * In the Kruskal-Wallis test, a misleading result could occur diff --git a/doc/dev/spv-file-format.texi b/doc/dev/spv-file-format.texi index d5c93c4348..fb2c1eb164 100644 --- a/doc/dev/spv-file-format.texi +++ b/doc/dev/spv-file-format.texi @@ -1446,7 +1446,7 @@ X0 => byte*14 Y1 Y2 Y1 => string[command] string[command-local] string[language] string[charset] string[locale] - bool bool bool bool + bool[x10] bool[include-leading-zero] bool[x12] bool[x13] Y0 Y2 => CustomCurrency byte[missing] bool[x17] @end example @@ -1458,10 +1458,15 @@ Tests.'' @code{command-local} is the procedure's name, translated into the output language; it is often empty and, when it is not, sometimes the same as @code{command}. +@code{include-leading-zero} is the @code{LEADZERO} setting for the +table, where false is @code{OFF} (the default) and true is @code{ON}. +@xref{SET LEADZERO,,, pspp, PSPP}. + @code{missing} is the character used to indicate that a cell contains a missing value. It is always observed as @samp{.}. -A writer may safely use false for @code{x17}. +A writer may safely use false for @code{x10} and @code{x17} and true +for @code{x12} and @code{x13}. @subsubheading X1 diff --git a/doc/language.texi b/doc/language.texi index 71dd6a5fb7..8b8b7fa118 100644 --- a/doc/language.texi +++ b/doc/language.texi @@ -796,8 +796,10 @@ would not fit at all without it. Scientific notation with @samp{$} or @item Except in scientific notation, a decimal point is included only when it is followed by a digit. If the integer part of the number being -output is 0, and a decimal point is included, then the zero before the -decimal point is dropped. +output is 0, and a decimal point is included, then @pspp{} ordinarily +drops the zero before the decimal point. However, in @code{F}, +@code{COMMA}, or @code{DOT} formats, @pspp{} keeps the zero if +@code{SET LEADZERO} is set to @code{ON} (@pxref{SET LEADZERO}). In scientific notation, the number always includes a decimal point, even if it is not followed by a digit. diff --git a/doc/utilities.texi b/doc/utilities.texi index c20410384e..dcbfde1481 100644 --- a/doc/utilities.texi +++ b/doc/utilities.texi @@ -506,6 +506,7 @@ SET /CC@{A,B,C,D,E@}=@{'@var{npre},@var{pre},@var{suf},@var{nsuf}','@var{npre}.@var{pre}.@var{suf}.@var{nsuf}'@} /DECIMAL=@{DOT,COMMA@} /FORMAT=@var{fmt_spec} + /LEADZERO=@{ON,OFF@} /MDISPLAY=@{TEXT,TABLES@} /SMALL=@var{number} /WIB=@{NATIVE,MSBFIRST,LSBFIRST,VAX@} @@ -747,6 +748,15 @@ The default @subcmd{DOT} setting causes the decimal point character to be Allows the default numeric input/output format to be specified. The default is F8.2. @xref{Input and Output Formats}. +@item LEADZERO +@anchor{SET LEADZERO} + +Controls whether numbers with magnitude less than one are displayed +with a zero before the decimal point. For example, with @code{SET +LEADZERO=OFF}, which is the default, one-half is shown as 0.5, and +with @code{SET LEADZERO=ON}, it is shown as .5. This setting affects +only the @code{F}, @code{COMMA}, and @code{DOT} formats. + @item MDISPLAY @anchor{SET MDISPLAY} diff --git a/src/data/data-out.c b/src/data/data-out.c index 37a25a2672..cda2b75fb0 100644 --- a/src/data/data-out.c +++ b/src/data/data-out.c @@ -58,7 +58,8 @@ struct rounder bool negative; /* Is the number negative? */ }; -static void rounder_init (struct rounder *, double number, int max_decimals); +static void rounder_init (struct rounder *, const struct fmt_number_style *, + double number, int max_decimals); static int rounder_width (const struct rounder *, int decimals, int *integer_digits, bool *negative); static void rounder_format (const struct rounder *, int decimals, @@ -72,10 +73,10 @@ typedef void data_out_converter_func (const union value *, #include "format.def" static bool output_decimal (const struct rounder *, const struct fmt_spec *, - const struct fmt_settings *, bool require_affixes, - char *); + const struct fmt_number_style *, + bool require_affixes, char *); static bool output_scientific (double, const struct fmt_spec *, - const struct fmt_settings *, + const struct fmt_number_style *, bool require_affixes, char *); static double power10 (int) PURE_FUNCTION; @@ -254,18 +255,21 @@ output_number (const union value *input, const struct fmt_spec *format, output_infinite (number, format, output); else { + const struct fmt_number_style *style = + fmt_settings_get_style (settings, format->type); + if (format->type != FMT_E && fabs (number) < 1.5 * power10 (format->w)) { struct rounder r; - rounder_init (&r, number, format->d); + rounder_init (&r, style, number, format->d); - if (output_decimal (&r, format, settings, true, output) - || output_scientific (number, format, settings, true, output) - || output_decimal (&r, format, settings, false, output)) + if (output_decimal (&r, format, style, true, output) + || output_scientific (number, format, style, true, output) + || output_decimal (&r, format, style, false, output)) return; } - if (!output_scientific (number, format, settings, false, output)) + if (!output_scientific (number, format, style, false, output)) output_overflow (format, output); } } @@ -651,19 +655,16 @@ allocate_space (int request, int max_width, int *width) } /* Tries to compose the number represented by R, in the style of - FORMAT, into OUTPUT. Returns true if successful, false on - failure, which occurs if FORMAT's width is too narrow. If + FORMAT and STYLE, into OUTPUT. Returns true if successful, false on + failure, which cocurs if FORMAT's width is too narrow. If REQUIRE_AFFIXES is true, then the prefix and suffix specified by FORMAT's style must be included; otherwise, they may be omitted to make the number fit. */ static bool output_decimal (const struct rounder *r, const struct fmt_spec *format, - const struct fmt_settings *settings, bool require_affixes, + const struct fmt_number_style *style, bool require_affixes, char *output) { - const struct fmt_number_style *style = - fmt_settings_get_style (settings, format->type); - int decimals; for (decimals = format->d; decimals >= 0; decimals--) @@ -760,15 +761,13 @@ output_decimal (const struct rounder *r, const struct fmt_spec *format, return false; } -/* Formats NUMBER into OUTPUT in scientific notation according to - the style of the format specified in FORMAT. */ +/* Formats NUMBER into OUTPUT in scientific notation according to FORMAT and + STYLE. */ static bool output_scientific (double number, const struct fmt_spec *format, - const struct fmt_settings *settings, + const struct fmt_number_style *style, bool require_affixes, char *output) { - const struct fmt_number_style *style = - fmt_settings_get_style (settings, format->type); int width; int fraction_width; bool add_affixes; @@ -853,10 +852,11 @@ should_round_up (const struct rounder *r, int decimals) return digit >= '5'; } -/* Initializes R for formatting the magnitude of NUMBER to no +/* Initializes R for formatting the magnitude of NUMBER with STYLE to no more than MAX_DECIMAL decimal places. */ static void -rounder_init (struct rounder *r, double number, int max_decimals) +rounder_init (struct rounder *r, const struct fmt_number_style *style, + double number, int max_decimals) { assert (fabs (number) < 1e41); assert (max_decimals >= 0 && max_decimals <= 16); @@ -905,7 +905,7 @@ rounder_init (struct rounder *r, double number, int max_decimals) } } - if (r->string[0] == '0') + if (r->string[0] == '0' && !style->include_leading_zero) memmove (r->string, &r->string[1], strlen (r->string)); r->leading_zeros = strspn (r->string, "0."); diff --git a/src/data/format.c b/src/data/format.c index 79c6240a91..17afda6162 100644 --- a/src/data/format.c +++ b/src/data/format.c @@ -101,26 +101,38 @@ fmt_settings_get_style (const struct fmt_settings *settings, #define OPPOSITE(C) ((C) == ',' ? '.' : ',') #define AFFIX(S) { .s = (char *) (S), .width = sizeof (S) - 1 } -#define NS(PREFIX, SUFFIX, DECIMAL, GROUPING) { \ +#define NS(PREFIX, SUFFIX, DECIMAL, GROUPING, INCLUDE_LEADING_ZERO) { \ .neg_prefix = AFFIX ("-"), \ .prefix = AFFIX (PREFIX), \ .suffix = AFFIX (SUFFIX), \ .neg_suffix = AFFIX (""), \ .decimal = DECIMAL, \ .grouping = GROUPING, \ + .include_leading_zero = INCLUDE_LEADING_ZERO \ } -#define ANS(DECIMAL, GROUPING) { \ - [FMT_F] = NS( "", "", DECIMAL, 0), \ - [FMT_E] = NS( "", "", DECIMAL, 0), \ - [FMT_COMMA] = NS( "", "", DECIMAL, GROUPING), \ - [FMT_DOT] = NS( "", "", GROUPING, DECIMAL), \ - [FMT_DOLLAR] = NS("$", "", DECIMAL, GROUPING), \ - [FMT_PCT] = NS( "", "%", DECIMAL, 0), \ +#define ANS(DECIMAL, GROUPING, INCLUDE_LEADING_ZERO) { \ + [FMT_F] = NS( "", "", DECIMAL, 0, INCLUDE_LEADING_ZERO), \ + [FMT_E] = NS( "", "", DECIMAL, 0, INCLUDE_LEADING_ZERO), \ + [FMT_COMMA] = NS( "", "", DECIMAL, GROUPING, INCLUDE_LEADING_ZERO), \ + [FMT_DOT] = NS( "", "", GROUPING, DECIMAL, INCLUDE_LEADING_ZERO), \ + [FMT_DOLLAR] = NS("$", "", DECIMAL, GROUPING, false), \ + [FMT_PCT] = NS( "", "%", DECIMAL, 0, false), \ } +#define ANS2(DECIMAL, GROUPING) { \ + ANS(DECIMAL, GROUPING, false), \ + ANS(DECIMAL, GROUPING, true), \ + } + + /* First index: 0 for ',' decimal point, 1 for '.' decimal point. + Second index: 0 for no leading zero, 1 for leading zero. + Third index: TYPE. + */ + static const struct fmt_number_style styles[2][2][6] = { + ANS2 (',', '.'), + ANS2 ('.', ','), + }; - static const struct fmt_number_style period_styles[6] = ANS ('.', ','); - static const struct fmt_number_style comma_styles[6] = ANS (',', '.'); - static const struct fmt_number_style default_style = NS ("", "", '.', 0); + static const struct fmt_number_style default_style = NS ("", "", '.', 0, false); switch (type) { @@ -130,9 +142,11 @@ fmt_settings_get_style (const struct fmt_settings *settings, case FMT_DOLLAR: case FMT_PCT: case FMT_E: - return (settings->decimal == '.' - ? &period_styles[type] - : &comma_styles[type]); + { + int decimal_idx = settings->decimal == '.'; + int leadzero_idx = settings->include_leading_zero; + return &styles[decimal_idx][leadzero_idx][type]; + } case FMT_CCA: case FMT_CCB: @@ -1298,6 +1312,7 @@ fmt_number_style_from_string (const char *s) .neg_suffix = neg_suffix, .decimal = grouping == '.' ? ',' : '.', .grouping = grouping, + .include_leading_zero = false, .extra_bytes = extra_bytes, }; return style; diff --git a/src/data/format.h b/src/data/format.h index d6779d4b3f..8c54ba3a17 100644 --- a/src/data/format.h +++ b/src/data/format.h @@ -164,6 +164,7 @@ struct fmt_number_style struct fmt_affix neg_suffix; /* Negative suffix. */ char decimal; /* Decimal point: '.' or ','. */ char grouping; /* Grouping character: ',', '.', or 0. */ + bool include_leading_zero; /* Format as ".5" or "0.5"? */ /* A fmt_affix may require more bytes than its display width; for example, U+00A5 (Â¥) is 2 bytes in UTF-8 but occupies only one display column. @@ -192,6 +193,11 @@ struct fmt_settings { int epoch; /* 0 for default epoch. */ char decimal; /* '.' or ','. */ + + /* Format F, E, COMMA, and DOT with leading zero (e.g. "0.5" instead of + ".5")? */ + bool include_leading_zero; + struct fmt_number_style *ccs[FMT_N_CCS]; /* CCA through CCE. */ }; #define FMT_SETTINGS_INIT { .decimal = '.' } diff --git a/src/data/settings.c b/src/data/settings.c index 4bc36494fd..7d7934af92 100644 --- a/src/data/settings.c +++ b/src/data/settings.c @@ -615,6 +615,12 @@ settings_set_decimal_char (char decimal) the_settings.styles.decimal = decimal; } +void +settings_set_include_leading_zero (bool include_leading_zero) +{ + the_settings.styles.include_leading_zero = include_leading_zero; +} + const struct fmt_settings * settings_get_fmt_settings (void) { diff --git a/src/data/settings.h b/src/data/settings.h index 9f6b94a888..34f4b8519f 100644 --- a/src/data/settings.h +++ b/src/data/settings.h @@ -155,6 +155,7 @@ enum fmt_type; bool settings_set_cc (const char *cc_string, enum fmt_type type); void settings_set_decimal_char (char decimal); +void settings_set_include_leading_zero (bool include_leading_zero); const struct fmt_settings *settings_get_fmt_settings (void); diff --git a/src/language/utilities/set.c b/src/language/utilities/set.c index 08675179ab..e7c17765d6 100644 --- a/src/language/utilities/set.c +++ b/src/language/utilities/set.c @@ -617,6 +617,22 @@ show_JOURNAL (const struct dataset *ds UNUSED) : xstrdup (enabled)); } +static bool +parse_LEADZERO (struct lexer *lexer) +{ + int leadzero = force_parse_bool (lexer); + if (leadzero != -1) + settings_set_include_leading_zero (leadzero); + return leadzero != -1; +} + +static char * +show_LEADZERO (const struct dataset *ds UNUSED) +{ + bool leadzero = settings_get_fmt_settings ()->include_leading_zero; + return xstrdup (leadzero ? "ON" : "OFF"); +} + static bool parse_LENGTH (struct lexer *lexer) { @@ -1218,6 +1234,7 @@ static const struct setting settings[] = { { "HEADER", parse_HEADER, NULL }, { "INCLUDE", parse_INCLUDE, show_INCLUDE }, { "JOURNAL", parse_JOURNAL, show_JOURNAL }, + { "LEADZERO", parse_LEADZERO, show_LEADZERO }, { "LENGTH", parse_LENGTH, show_LENGTH }, { "LOCALE", parse_LOCALE, show_LOCALE }, { "MDISPLAY", parse_MDISPLAY, show_MDISPLAY }, diff --git a/src/output/spv/light-binary.grammar b/src/output/spv/light-binary.grammar index 05ba2a2d90..816968fdd0 100644 --- a/src/output/spv/light-binary.grammar +++ b/src/output/spv/light-binary.grammar @@ -121,7 +121,7 @@ X0 => byte*14 Y1 Y2 Y1 => string[command] string[command-local] string[language] string[charset] string[locale] - bool[x10] bool[x11] bool[x12] bool[x13] + bool[x10] bool[include-leading-zero] bool[x12] bool[x13] Y0 Y2 => CustomCurrency byte[missing] bool[x17] diff --git a/src/output/spv/spv-light-decoder.c b/src/output/spv/spv-light-decoder.c index f8c4f618b2..7bc3b1fb27 100644 --- a/src/output/spv/spv-light-decoder.c +++ b/src/output/spv/spv-light-decoder.c @@ -939,6 +939,8 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp) /* XXX warn if parsing fails */ } } + if (y1) + out->settings.include_leading_zero = y1->include_leading_zero; out->small = in->formats->x3 ? in->formats->x3->small : 0; /* Command information. */ diff --git a/src/output/spv/spv-writer.c b/src/output/spv/spv-writer.c index bb37a05e24..3d56429d66 100644 --- a/src/output/spv/spv-writer.c +++ b/src/output/spv/spv-writer.c @@ -799,7 +799,10 @@ put_y1 (struct buf *buf, const struct pivot_table *table) put_string (buf, table->language); put_string (buf, "UTF-8"); /* XXX */ put_string (buf, table->locale); - put_bytes (buf, "\0\0\1\1", 4); + put_bool (buf, false); /* x10 */ + put_bool (buf, table->settings.include_leading_zero); + put_bool (buf, true); /* x12 */ + put_bool (buf, true); /* x13 */ put_y0 (buf, table); } diff --git a/tests/data/data-out.at b/tests/data/data-out.at index 9ed4a980d0..a96bf32c78 100644 --- a/tests/data/data-out.at +++ b/tests/data/data-out.at @@ -247,6 +247,49 @@ sys.exit(1 if errors else 0) AT_CHECK([$PYTHON3 num-out-compare.py $PSPP_NUM_OUT_COMPARE_FLAGS expout.inexact output.inexact]) AT_CLEANUP +AT_SETUP([leading zeros in numeric output]) +AT_KEYWORDS([data-out LEADZERO]) +AT_DATA([data-out.sps], [dnl +DATA LIST LIST NOTABLE/x. +BEGIN DATA. +0.5 +0.99 +0.01 +0 +-0 +-0.5 +-0.99 +-0.01 +END DATA. + +PRINT/x (F5.2) x (F5.1). +EXECUTE. + +SET LEADZERO=ON. +PRINT/x (F5.2) x (F5.1). +EXECUTE. +]) +AT_CHECK([pspp -O format=csv data-out.sps], [0], [dnl +.50 .5 +.99 1.0 +.01 .0 +.00 .0 +.00 .0 +-.50 -.5 +-.99 -1.0 +-.01 .0 + +0.50 0.5 +0.99 1.0 +0.01 0.0 +0.00 0.0 +0.00 0.0 +-0.50 -0.5 +-0.99 -1.0 +-0.01 0.0 +]) +AT_CLEANUP + AT_SETUP([non-ASCII custom currency formats]) AT_KEYWORDS([data-out]) AT_DATA([data-out.sps], [dnl