From: Ben Pfaff <blp@cs.stanford.edu>
Date: Sat, 18 Jun 2022 23:29:17 +0000 (-0700)
Subject: Implement SET LEADZERO.
X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=bef05451ef0f1a79d5427d5d4701b2744824c0b4

Implement SET LEADZERO.
---

diff --git a/NEWS b/NEWS
index e0c7eeca62..253169fb23 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,11 @@ See the end for copying conditions.
 
 Please send PSPP bug reports to bug-gnu-pspp@gnu.org.
 
+Changes from 1.6.0 to 1.6.1:
+
+ * The SET command now supports LEADZERO for controlling output of a
+   leading zero in F, COMMA, and DOT format.
+
 Changes from 1.4.1 to 1.6.0:
 
  * In the Kruskal-Wallis test, a misleading result could occur
diff --git a/doc/dev/spv-file-format.texi b/doc/dev/spv-file-format.texi
index d5c93c4348..fb2c1eb164 100644
--- a/doc/dev/spv-file-format.texi
+++ b/doc/dev/spv-file-format.texi
@@ -1446,7 +1446,7 @@ X0 => byte*14 Y1 Y2
 Y1 =>
     string[command] string[command-local]
     string[language] string[charset] string[locale]
-    bool bool bool bool
+    bool[x10] bool[include-leading-zero] bool[x12] bool[x13]
     Y0
 Y2 => CustomCurrency byte[missing] bool[x17]
 @end example
@@ -1458,10 +1458,15 @@ Tests.''  @code{command-local} is the procedure's name, translated
 into the output language; it is often empty and, when it is not,
 sometimes the same as @code{command}.
 
+@code{include-leading-zero} is the @code{LEADZERO} setting for the
+table, where false is @code{OFF} (the default) and true is @code{ON}.
+@xref{SET LEADZERO,,, pspp, PSPP}.
+
 @code{missing} is the character used to indicate that a cell contains
 a missing value.  It is always observed as @samp{.}.
 
-A writer may safely use false for @code{x17}.
+A writer may safely use false for @code{x10} and @code{x17} and true
+for @code{x12} and @code{x13}.
 
 @subsubheading X1
 
diff --git a/doc/language.texi b/doc/language.texi
index 71dd6a5fb7..8b8b7fa118 100644
--- a/doc/language.texi
+++ b/doc/language.texi
@@ -796,8 +796,10 @@ would not fit at all without it.  Scientific notation with @samp{$} or
 @item
 Except in scientific notation, a decimal point is included only when
 it is followed by a digit.  If the integer part of the number being
-output is 0, and a decimal point is included, then the zero before the
-decimal point is dropped.
+output is 0, and a decimal point is included, then @pspp{} ordinarily
+drops the zero before the decimal point.  However, in @code{F},
+@code{COMMA}, or @code{DOT} formats, @pspp{} keeps the zero if
+@code{SET LEADZERO} is set to @code{ON} (@pxref{SET LEADZERO}).
 
 In scientific notation, the number always includes a decimal point,
 even if it is not followed by a digit.
diff --git a/doc/utilities.texi b/doc/utilities.texi
index c20410384e..dcbfde1481 100644
--- a/doc/utilities.texi
+++ b/doc/utilities.texi
@@ -506,6 +506,7 @@ SET
         /CC@{A,B,C,D,E@}=@{'@var{npre},@var{pre},@var{suf},@var{nsuf}','@var{npre}.@var{pre}.@var{suf}.@var{nsuf}'@}
         /DECIMAL=@{DOT,COMMA@}
         /FORMAT=@var{fmt_spec}
+        /LEADZERO=@{ON,OFF@}
         /MDISPLAY=@{TEXT,TABLES@}
         /SMALL=@var{number}
         /WIB=@{NATIVE,MSBFIRST,LSBFIRST,VAX@}
@@ -747,6 +748,15 @@ The default @subcmd{DOT} setting causes the decimal point character to be
 Allows the default numeric input/output format to be specified.  The
 default is F8.2.  @xref{Input and Output Formats}.
 
+@item LEADZERO
+@anchor{SET LEADZERO}
+
+Controls whether numbers with magnitude less than one are displayed
+with a zero before the decimal point.  For example, with @code{SET
+LEADZERO=OFF}, which is the default, one-half is shown as 0.5, and
+with @code{SET LEADZERO=ON}, it is shown as .5.  This setting affects
+only the @code{F}, @code{COMMA}, and @code{DOT} formats.
+
 @item MDISPLAY
 @anchor{SET MDISPLAY}
 
diff --git a/src/data/data-out.c b/src/data/data-out.c
index 37a25a2672..cda2b75fb0 100644
--- a/src/data/data-out.c
+++ b/src/data/data-out.c
@@ -58,7 +58,8 @@ struct rounder
     bool negative;      /* Is the number negative? */
   };
 
-static void rounder_init (struct rounder *, double number, int max_decimals);
+static void rounder_init (struct rounder *, const struct fmt_number_style *,
+                          double number, int max_decimals);
 static int rounder_width (const struct rounder *, int decimals,
                           int *integer_digits, bool *negative);
 static void rounder_format (const struct rounder *, int decimals,
@@ -72,10 +73,10 @@ typedef void data_out_converter_func (const union value *,
 #include "format.def"
 
 static bool output_decimal (const struct rounder *, const struct fmt_spec *,
-                            const struct fmt_settings *, bool require_affixes,
-                            char *);
+                            const struct fmt_number_style *,
+                            bool require_affixes, char *);
 static bool output_scientific (double, const struct fmt_spec *,
-                               const struct fmt_settings *,
+                               const struct fmt_number_style *,
                                bool require_affixes, char *);
 
 static double power10 (int) PURE_FUNCTION;
@@ -254,18 +255,21 @@ output_number (const union value *input, const struct fmt_spec *format,
     output_infinite (number, format, output);
   else
     {
+      const struct fmt_number_style *style =
+        fmt_settings_get_style (settings, format->type);
+
       if (format->type != FMT_E && fabs (number) < 1.5 * power10 (format->w))
         {
           struct rounder r;
-          rounder_init (&r, number, format->d);
+          rounder_init (&r, style, number, format->d);
 
-          if (output_decimal (&r, format, settings, true, output)
-              || output_scientific (number, format, settings, true, output)
-              || output_decimal (&r, format, settings, false, output))
+          if (output_decimal (&r, format, style, true, output)
+              || output_scientific (number, format, style, true, output)
+              || output_decimal (&r, format, style, false, output))
             return;
         }
 
-      if (!output_scientific (number, format, settings, false, output))
+      if (!output_scientific (number, format, style, false, output))
         output_overflow (format, output);
     }
 }
@@ -651,19 +655,16 @@ allocate_space (int request, int max_width, int *width)
 }
 
 /* Tries to compose the number represented by R, in the style of
-   FORMAT, into OUTPUT.  Returns true if successful, false on
-   failure, which occurs if FORMAT's width is too narrow.  If
+   FORMAT and STYLE, into OUTPUT.  Returns true if successful, false on
+   failure, which cocurs if FORMAT's width is too narrow.  If
    REQUIRE_AFFIXES is true, then the prefix and suffix specified
    by FORMAT's style must be included; otherwise, they may be
    omitted to make the number fit. */
 static bool
 output_decimal (const struct rounder *r, const struct fmt_spec *format,
-                const struct fmt_settings *settings, bool require_affixes,
+                const struct fmt_number_style *style, bool require_affixes,
                 char *output)
 {
-  const struct fmt_number_style *style =
-    fmt_settings_get_style (settings, format->type);
-
   int decimals;
 
   for (decimals = format->d; decimals >= 0; decimals--)
@@ -760,15 +761,13 @@ output_decimal (const struct rounder *r, const struct fmt_spec *format,
   return false;
 }
 
-/* Formats NUMBER into OUTPUT in scientific notation according to
-   the style of the format specified in FORMAT. */
+/* Formats NUMBER into OUTPUT in scientific notation according to FORMAT and
+   STYLE. */
 static bool
 output_scientific (double number, const struct fmt_spec *format,
-                   const struct fmt_settings *settings,
+                   const struct fmt_number_style *style,
                    bool require_affixes, char *output)
 {
-  const struct fmt_number_style *style =
-    fmt_settings_get_style (settings, format->type);
   int width;
   int fraction_width;
   bool add_affixes;
@@ -853,10 +852,11 @@ should_round_up (const struct rounder *r, int decimals)
   return digit >= '5';
 }
 
-/* Initializes R for formatting the magnitude of NUMBER to no
+/* Initializes R for formatting the magnitude of NUMBER with STYLE to no
    more than MAX_DECIMAL decimal places. */
 static void
-rounder_init (struct rounder *r, double number, int max_decimals)
+rounder_init (struct rounder *r, const struct fmt_number_style *style,
+              double number, int max_decimals)
 {
   assert (fabs (number) < 1e41);
   assert (max_decimals >= 0 && max_decimals <= 16);
@@ -905,7 +905,7 @@ rounder_init (struct rounder *r, double number, int max_decimals)
         }
     }
 
-  if (r->string[0] == '0')
+  if (r->string[0] == '0' && !style->include_leading_zero)
     memmove (r->string, &r->string[1], strlen (r->string));
 
   r->leading_zeros = strspn (r->string, "0.");
diff --git a/src/data/format.c b/src/data/format.c
index 79c6240a91..17afda6162 100644
--- a/src/data/format.c
+++ b/src/data/format.c
@@ -101,26 +101,38 @@ fmt_settings_get_style (const struct fmt_settings *settings,
 
 #define OPPOSITE(C) ((C) == ',' ? '.' : ',')
 #define AFFIX(S) { .s = (char *) (S), .width = sizeof (S) - 1 }
-#define NS(PREFIX, SUFFIX, DECIMAL, GROUPING) { \
+#define NS(PREFIX, SUFFIX, DECIMAL, GROUPING, INCLUDE_LEADING_ZERO) {        \
     .neg_prefix = AFFIX ("-"),                  \
     .prefix = AFFIX (PREFIX),                   \
     .suffix = AFFIX (SUFFIX),                   \
     .neg_suffix = AFFIX (""),                   \
     .decimal = DECIMAL,                         \
     .grouping = GROUPING,                       \
+    .include_leading_zero = INCLUDE_LEADING_ZERO \
   }
-#define ANS(DECIMAL, GROUPING) {                        \
-    [FMT_F]      = NS( "",  "", DECIMAL, 0),            \
-    [FMT_E]      = NS( "",  "", DECIMAL, 0),            \
-    [FMT_COMMA]  = NS( "",  "", DECIMAL, GROUPING),     \
-    [FMT_DOT]    = NS( "",  "", GROUPING, DECIMAL),     \
-    [FMT_DOLLAR] = NS("$",  "", DECIMAL, GROUPING),     \
-    [FMT_PCT]    = NS( "", "%", DECIMAL, 0),            \
+#define ANS(DECIMAL, GROUPING, INCLUDE_LEADING_ZERO) {                  \
+    [FMT_F]      = NS( "",  "", DECIMAL, 0, INCLUDE_LEADING_ZERO),      \
+    [FMT_E]      = NS( "",  "", DECIMAL, 0, INCLUDE_LEADING_ZERO),      \
+    [FMT_COMMA]  = NS( "",  "", DECIMAL, GROUPING, INCLUDE_LEADING_ZERO), \
+    [FMT_DOT]    = NS( "",  "", GROUPING, DECIMAL, INCLUDE_LEADING_ZERO), \
+    [FMT_DOLLAR] = NS("$",  "", DECIMAL, GROUPING, false),              \
+    [FMT_PCT]    = NS( "", "%", DECIMAL, 0, false),                     \
   }
+#define ANS2(DECIMAL, GROUPING) {               \
+    ANS(DECIMAL, GROUPING, false),              \
+    ANS(DECIMAL, GROUPING, true),               \
+  }
+
+  /* First index: 0 for ',' decimal point, 1 for '.' decimal point.
+     Second index: 0 for no leading zero, 1 for leading zero.
+     Third index: TYPE.
+  */
+  static const struct fmt_number_style styles[2][2][6] = {
+    ANS2 (',', '.'),
+    ANS2 ('.', ','),
+  };
 
-  static const struct fmt_number_style period_styles[6] = ANS ('.', ',');
-  static const struct fmt_number_style comma_styles[6] = ANS (',', '.');
-  static const struct fmt_number_style default_style = NS ("", "", '.', 0);
+  static const struct fmt_number_style default_style = NS ("", "", '.', 0, false);
 
   switch (type)
     {
@@ -130,9 +142,11 @@ fmt_settings_get_style (const struct fmt_settings *settings,
     case FMT_DOLLAR:
     case FMT_PCT:
     case FMT_E:
-      return (settings->decimal == '.'
-              ? &period_styles[type]
-              : &comma_styles[type]);
+      {
+        int decimal_idx = settings->decimal == '.';
+        int leadzero_idx = settings->include_leading_zero;
+        return &styles[decimal_idx][leadzero_idx][type];
+      }
 
     case FMT_CCA:
     case FMT_CCB:
@@ -1298,6 +1312,7 @@ fmt_number_style_from_string (const char *s)
     .neg_suffix = neg_suffix,
     .decimal = grouping == '.' ? ',' : '.',
     .grouping = grouping,
+    .include_leading_zero = false,
     .extra_bytes = extra_bytes,
   };
   return style;
diff --git a/src/data/format.h b/src/data/format.h
index d6779d4b3f..8c54ba3a17 100644
--- a/src/data/format.h
+++ b/src/data/format.h
@@ -164,6 +164,7 @@ struct fmt_number_style
     struct fmt_affix neg_suffix; /* Negative suffix. */
     char decimal;                /* Decimal point: '.' or ','. */
     char grouping;               /* Grouping character: ',', '.', or 0. */
+    bool include_leading_zero;   /* Format as ".5" or "0.5"? */
 
     /* A fmt_affix may require more bytes than its display width; for example,
        U+00A5 (Â¥) is 2 bytes in UTF-8 but occupies only one display column.
@@ -192,6 +193,11 @@ struct fmt_settings
   {
     int epoch;                               /* 0 for default epoch. */
     char decimal;                            /* '.' or ','. */
+
+    /* Format F, E, COMMA, and DOT with leading zero (e.g. "0.5" instead of
+       ".5")? */
+    bool include_leading_zero;
+
     struct fmt_number_style *ccs[FMT_N_CCS]; /* CCA through CCE. */
   };
 #define FMT_SETTINGS_INIT { .decimal = '.' }
diff --git a/src/data/settings.c b/src/data/settings.c
index 4bc36494fd..7d7934af92 100644
--- a/src/data/settings.c
+++ b/src/data/settings.c
@@ -615,6 +615,12 @@ settings_set_decimal_char (char decimal)
   the_settings.styles.decimal = decimal;
 }
 
+void
+settings_set_include_leading_zero (bool include_leading_zero)
+{
+  the_settings.styles.include_leading_zero = include_leading_zero;
+}
+
 const struct fmt_settings *
 settings_get_fmt_settings (void)
 {
diff --git a/src/data/settings.h b/src/data/settings.h
index 9f6b94a888..34f4b8519f 100644
--- a/src/data/settings.h
+++ b/src/data/settings.h
@@ -155,6 +155,7 @@ enum fmt_type;
 bool settings_set_cc (const char *cc_string, enum fmt_type type);
 
 void settings_set_decimal_char (char decimal);
+void settings_set_include_leading_zero (bool include_leading_zero);
 
 const struct fmt_settings *settings_get_fmt_settings (void);
 
diff --git a/src/language/utilities/set.c b/src/language/utilities/set.c
index 08675179ab..e7c17765d6 100644
--- a/src/language/utilities/set.c
+++ b/src/language/utilities/set.c
@@ -617,6 +617,22 @@ show_JOURNAL (const struct dataset *ds UNUSED)
           : xstrdup (enabled));
 }
 
+static bool
+parse_LEADZERO (struct lexer *lexer)
+{
+  int leadzero = force_parse_bool (lexer);
+  if (leadzero != -1)
+    settings_set_include_leading_zero (leadzero);
+  return leadzero != -1;
+}
+
+static char *
+show_LEADZERO (const struct dataset *ds UNUSED)
+{
+  bool leadzero = settings_get_fmt_settings ()->include_leading_zero;
+  return xstrdup (leadzero ? "ON" : "OFF");
+}
+
 static bool
 parse_LENGTH (struct lexer *lexer)
 {
@@ -1218,6 +1234,7 @@ static const struct setting settings[] = {
   { "HEADER", parse_HEADER, NULL },
   { "INCLUDE", parse_INCLUDE, show_INCLUDE },
   { "JOURNAL", parse_JOURNAL, show_JOURNAL },
+  { "LEADZERO", parse_LEADZERO, show_LEADZERO },
   { "LENGTH", parse_LENGTH, show_LENGTH },
   { "LOCALE", parse_LOCALE, show_LOCALE },
   { "MDISPLAY", parse_MDISPLAY, show_MDISPLAY },
diff --git a/src/output/spv/light-binary.grammar b/src/output/spv/light-binary.grammar
index 05ba2a2d90..816968fdd0 100644
--- a/src/output/spv/light-binary.grammar
+++ b/src/output/spv/light-binary.grammar
@@ -121,7 +121,7 @@ X0 => byte*14 Y1 Y2
 Y1 =>
    string[command] string[command-local]
    string[language] string[charset] string[locale]
-   bool[x10] bool[x11] bool[x12] bool[x13]
+   bool[x10] bool[include-leading-zero] bool[x12] bool[x13]
    Y0
 Y2 => CustomCurrency byte[missing] bool[x17]
 
diff --git a/src/output/spv/spv-light-decoder.c b/src/output/spv/spv-light-decoder.c
index f8c4f618b2..7bc3b1fb27 100644
--- a/src/output/spv/spv-light-decoder.c
+++ b/src/output/spv/spv-light-decoder.c
@@ -939,6 +939,8 @@ decode_spvlb_table (const struct spvlb_table *in, struct pivot_table **outp)
           /* XXX warn if parsing fails */
         }
     }
+  if (y1)
+    out->settings.include_leading_zero = y1->include_leading_zero;
   out->small = in->formats->x3 ? in->formats->x3->small : 0;
 
   /* Command information. */
diff --git a/src/output/spv/spv-writer.c b/src/output/spv/spv-writer.c
index bb37a05e24..3d56429d66 100644
--- a/src/output/spv/spv-writer.c
+++ b/src/output/spv/spv-writer.c
@@ -799,7 +799,10 @@ put_y1 (struct buf *buf, const struct pivot_table *table)
   put_string (buf, table->language);
   put_string (buf, "UTF-8");    /* XXX */
   put_string (buf, table->locale);
-  put_bytes (buf, "\0\0\1\1", 4);
+  put_bool (buf, false);        /* x10 */
+  put_bool (buf, table->settings.include_leading_zero);
+  put_bool (buf, true);         /* x12 */
+  put_bool (buf, true);         /* x13 */
   put_y0 (buf, table);
 }
 
diff --git a/tests/data/data-out.at b/tests/data/data-out.at
index 9ed4a980d0..a96bf32c78 100644
--- a/tests/data/data-out.at
+++ b/tests/data/data-out.at
@@ -247,6 +247,49 @@ sys.exit(1 if errors else 0)
 AT_CHECK([$PYTHON3 num-out-compare.py $PSPP_NUM_OUT_COMPARE_FLAGS expout.inexact output.inexact])
 AT_CLEANUP
 
+AT_SETUP([leading zeros in numeric output])
+AT_KEYWORDS([data-out LEADZERO])
+AT_DATA([data-out.sps], [dnl
+DATA LIST LIST NOTABLE/x.
+BEGIN DATA.
+0.5
+0.99
+0.01
+0
+-0
+-0.5
+-0.99
+-0.01
+END DATA.
+
+PRINT/x (F5.2) x (F5.1).
+EXECUTE.
+
+SET LEADZERO=ON.
+PRINT/x (F5.2) x (F5.1).
+EXECUTE.
+])
+AT_CHECK([pspp -O format=csv data-out.sps], [0], [dnl
+.50   .5
+.99  1.0
+.01   .0
+.00   .0
+.00   .0
+-.50  -.5
+-.99 -1.0
+-.01   .0
+
+0.50  0.5
+0.99  1.0
+0.01  0.0
+0.00  0.0
+0.00  0.0
+-0.50 -0.5
+-0.99 -1.0
+-0.01  0.0
+])
+AT_CLEANUP
+
 AT_SETUP([non-ASCII custom currency formats])
 AT_KEYWORDS([data-out])
 AT_DATA([data-out.sps], [dnl