Fixed bug #14822.

[pspp] / src / pfm-write.c
diff --git a/src/pfm-write.c b/src/pfm-write.c

index bdd93c99254ad2cfd210899bdf12ae70568758c2..cd088b7b85048be501fbd2ccbec3c45b316ddcee 100644 (file)
--- a/src/pfm-write.c
+++ b/src/pfm-write.c
@@ -14,32 +14,39 @@
  
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA. */
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA. */
  
  #include <config.h>
  #include "pfm-write.h"
  #include "error.h"
  #include <ctype.h>
  #include <errno.h>
+#include <fcntl.h>
  #include <float.h>
  #include <math.h>
  #include <stdio.h>
  #include <stdlib.h>
+#include <sys/stat.h>
  #include <time.h>
+#include <unistd.h>
  #include "alloc.h"
  #include "case.h"
  #include "dictionary.h"
  #include "error.h"
  #include "file-handle.h"
-#include "gmp.h"
  #include "hash.h"
  #include "magic.h"
+#include "misc.h"
+#include "stat-macros.h"
  #include "str.h"
  #include "value-labels.h"
  #include "var.h"
  #include "version.h"
  
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
  #include "debug-print.h"
  
  /* Portable file writer. */
@@ -52,6 +59,8 @@ struct pfm_writer
  
      size_t var_cnt;             /* Number of variables. */
      struct pfm_var *vars;       /* Variables. */
+
+    int digits;                 /* Digits of precision. */
    };
  
  /* A variable to write to the portable file. */
@@ -64,38 +73,61 @@ struct pfm_var
  static int buf_write (struct pfm_writer *, const void *, size_t);
  static int write_header (struct pfm_writer *);
  static int write_version_data (struct pfm_writer *);
-static int write_variables (struct pfm_writer *, const struct dictionary *);
+static int write_variables (struct pfm_writer *, struct dictionary *);
  static int write_value_labels (struct pfm_writer *, const struct dictionary *);
  
-/* Writes the dictionary DICT to portable file HANDLE.  Returns
-   nonzero only if successful. */
+static void format_trig_double (long double, int base_10_precision, char[]);
+static char *format_trig_int (int, bool force_sign, char[]);
+
+/* Returns default options for writing a portable file. */
+struct pfm_write_options
+pfm_writer_default_options (void) 
+{
+  struct pfm_write_options opts;
+  opts.create_writeable = true;
+  opts.type = PFM_COMM;
+  opts.digits = DBL_DIG;
+  return opts;
+}
+
+/* Writes the dictionary DICT to portable file HANDLE according
+   to the given OPTS.  Returns nonzero only if successful.  DICT
+   will not be modified, except to assign short names. */
  struct pfm_writer *
-pfm_open_writer (struct file_handle *fh, const struct dictionary *dict)
+pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
+                 struct pfm_write_options opts)
  {
    struct pfm_writer *w = NULL;
+  mode_t mode;
+  int fd;
    size_t i;
  
+  /* Create file. */
+  mode = S_IRUSR | S_IRGRP | S_IROTH;
+  if (opts.create_writeable)
+    mode |= S_IWUSR | S_IWGRP | S_IWOTH;
+  fd = open (handle_get_filename (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
+  if (fd < 0) 
+    goto open_error;
+
+  /* Open file handle. */
    if (!fh_open (fh, "portable file", "we"))
      goto error;
-  
-  /* Open the physical disk file. */
+
+  /* Initialize data structures. */
    w = xmalloc (sizeof *w);
    w->fh = fh;
-  w->file = fopen (handle_get_filename (fh), "wb");
+  w->file = fdopen (fd, "w");
+  if (w->file == NULL) 
+    {
+      close (fd);
+      goto open_error;
+    }
+  
    w->lc = 0;
    w->var_cnt = 0;
    w->vars = NULL;
    
-  /* Check that file create succeeded. */
-  if (w->file == NULL)
-    {
-      msg (ME, _("An error occurred while opening \"%s\" for writing "
-          "as a portable file: %s."),
-           handle_get_filename (fh), strerror (errno));
-      err_cond_fail ();
-      goto error;
-    }
-  
    w->var_cnt = dict_get_var_cnt (dict);
    w->vars = xmalloc (sizeof *w->vars * w->var_cnt);
    for (i = 0; i < w->var_cnt; i++) 
@@ -106,6 +138,14 @@ pfm_open_writer (struct file_handle *fh, const struct dictionary *dict)
        pv->fv = dv->fv;
      }
  
+  w->digits = opts.digits;
+  if (w->digits < 1) 
+    {
+      msg (ME, _("Invalid decimal digits count %d.  Treating as %d."),
+           w->digits, DBL_DIG);
+      w->digits = DBL_DIG;
+    }
+
    /* Write file header. */
    if (!write_header (w)
        || !write_version_data (w)
@@ -116,9 +156,16 @@ pfm_open_writer (struct file_handle *fh, const struct dictionary *dict)
  
    return w;
  
-error:
+ error:
    pfm_close_writer (w);
    return NULL;
+
+ open_error:
+  msg (ME, _("An error occurred while opening \"%s\" for writing "
+             "as a portable file: %s."),
+       handle_get_filename (fh), strerror (errno));
+  err_cond_fail ();
+  goto error;
  }
  \f  
  /* Write NBYTES starting at BUF to the portable file represented by
@@ -161,107 +208,18 @@ buf_write (struct pfm_writer *w, const void *buf_, size_t nbytes)
  static int
  write_float (struct pfm_writer *w, double d)
  {
-  int neg = 0;
-  char *mantissa;
-  int mantissa_len;
-  mp_exp_t exponent;
-  char *buf, *cp;
-  int success;
-
-  if (d < 0.)
-    {
-      d = -d;
-      neg = 1;
-    }
-  
-  if (d == fabs (SYSMIS) || d == HUGE_VAL)
-    return buf_write (w, "*.", 2);
-  
-  /* Use GNU libgmp2 to convert D into base-30. */
-  {
-    mpf_t f;
-    
-    mpf_init_set_d (f, d);
-    mantissa = mpf_get_str (NULL, &exponent, 30, 0, f);
-    mpf_clear (f);
-
-    for (cp = mantissa; *cp; cp++)
-      *cp = toupper (*cp);
-  }
-  
-  /* Choose standard or scientific notation. */
-  mantissa_len = (int) strlen (mantissa);
-  cp = buf = local_alloc (mantissa_len + 32);
-  if (neg)
-    *cp++ = '-';
-  if (mantissa_len == 0)
-    *cp++ = '0';
-  else if (exponent < -4 || exponent > (mp_exp_t) mantissa_len)
-    {
-      /* Scientific notation. */
-      *cp++ = mantissa[0];
-      *cp++ = '.';
-      cp = stpcpy (cp, &mantissa[1]);
-      cp = spprintf (cp, "%+ld", (long) (exponent - 1));
-    }
-  else if (exponent <= 0)
-    {
-      /* Standard notation, D <= 1. */
-      *cp++ = '.';
-      memset (cp, '0', -exponent);
-      cp += -exponent;
-      cp = stpcpy (cp, mantissa);
-    }
-  else 
-    {
-      /* Standard notation, D > 1. */
-      memcpy (cp, mantissa, exponent);
-      cp += exponent;
-      *cp++ = '.';
-      cp = stpcpy (cp, &mantissa[exponent]);
-    }
-  *cp++ = '/';
-  
-  success = buf_write (w, buf, cp - buf);
-  local_free (buf);
-  free (mantissa);
-  return success;
+  char buffer[64];
+  format_trig_double (d, floor (d) == d ? DBL_DIG : w->digits, buffer);
+  return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1);
  }
  
  /* Write N to the portable file as an integer field, and return success. */
  static int
  write_int (struct pfm_writer *w, int n)
  {
-  char buf[64];
-  char *bp = &buf[64];
-  int neg = 0;
-
-  *--bp = '/';
-  
-  if (n < 0)
-    {
-      n = -n;
-      neg = 1;
-    }
-  
-  do
-    {
-      int r = n % 30;
-
-      /* PORTME: character codes. */
-      if (r < 10)
-       *--bp = r + '0';
-      else
-       *--bp = r - 10 + 'A';
-
-      n /= 30;
-    }
-  while (n > 0);
-
-  if (neg)
-    *--bp = '-';
-
-  return buf_write (w, bp, &buf[64] - bp);
+  char buffer[64];
+  format_trig_int (n, false, buffer);
+  return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1);
  }
  
  /* Write S to the portable file as a string field. */
@@ -368,9 +326,11 @@ write_value (struct pfm_writer *w, union value *v, struct variable *vv)
  
  /* Write variable records, and return success. */
  static int
-write_variables (struct pfm_writer *w, const struct dictionary *dict)
+write_variables (struct pfm_writer *w, struct dictionary *dict)
  {
    int i;
+
+  dict_assign_short_names (dict);
    
    if (!buf_write (w, "4", 1) || !write_int (w, dict_get_var_cnt (dict))
        || !write_int (w, 161))
@@ -378,25 +338,43 @@ write_variables (struct pfm_writer *w, const struct dictionary *dict)
  
    for (i = 0; i < dict_get_var_cnt (dict); i++)
      {
-      static const char *miss_types[MISSING_COUNT] =
-       {
-         "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8",
-       };
-
-      const char *m;
-      int j;
-
        struct variable *v = dict_get_var (dict, i);
+      struct missing_values mv;
        
        if (!buf_write (w, "7", 1) || !write_int (w, v->width)
-         || !write_string (w, v->name)
+         || !write_string (w, v->short_name)
           || !write_format (w, &v->print) || !write_format (w, &v->write))
         return 0;
  
-      for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++)
-       if ((m[j] != ' ' && !buf_write (w, &m[j], 1))
-           || !write_value (w, &v->missing[j], v))
-         return 0;
+      /* Write missing values. */
+      mv_copy (&mv, &v->miss);
+      while (mv_has_range (&mv))
+        {
+          double x, y;
+          mv_pop_range (&mv, &x, &y);
+          if (x == LOWEST)
+            {
+              if (!buf_write (w, "9", 1) || !write_float (w, y))
+                return 0;
+            }
+          else if (y == HIGHEST)
+            {
+              if (!buf_write (w, "A", 1) || !write_float (w, y))
+                return 0;
+            }
+          else {
+            if (!buf_write (w, "B", 1) || !write_float (w, x)
+                || !write_float (w, y))
+              return 0; 
+          }
+        }
+      while (mv_has_value (&mv)) 
+        {
+          union value value;
+          mv_pop_value (&mv, &value);
+          if (!buf_write (w, "8", 1) || !write_value (w, &value, v))
+            return 0; 
+        }
  
        if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label)))
         return 0;
@@ -422,7 +400,7 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict)
  
        if (!buf_write (w, "D", 1)
           || !write_int (w, 1)
-         || !write_string (w, v->name)
+         || !write_string (w, v->short_name)
           || !write_int (w, val_labs_count (v->val_labs)))
         return 0;
  
@@ -494,3 +472,389 @@ pfm_close_writer (struct pfm_writer *w)
    free (w->vars);
    free (w);
  }
+\f
+/* Base-30 conversion.
+
+   Portable files represent numbers in base-30 format, so we need
+   to be able to convert real and integer number to that base.
+   Older versions of PSPP used libgmp to do so, but this added a
+   big library dependency to do just one thing.  Now we do it
+   ourselves internally.
+
+   Important fact: base 30 is called "trigesimal". */
+
+/* Conversion base. */
+#define BASE 30                         /* As an integer. */
+#define LDBASE ((long double) BASE)     /* As a long double. */
+
+/* This is floor(log30(2**31)), the minimum number of trigesimal
+   digits that a `long int' can hold. */
+#define CHUNK_SIZE 6                    
+
+/* pow_tab[i] = pow (30, pow (2, i)) */
+static long double pow_tab[16];
+
+/* Initializes pow_tab[]. */
+static void
+init_pow_tab (void) 
+{
+  static bool did_init = false;
+  long double power;
+  size_t i;
+
+  /* Only initialize once. */
+  if (did_init)
+    return;
+  did_init = true;
+
+  /* Set each element of pow_tab[] until we run out of numerical
+     range. */
+  i = 0;
+  for (power = 30.0L; power < DBL_MAX; power *= power)
+    {
+      assert (i < sizeof pow_tab / sizeof *pow_tab);
+      pow_tab[i++] = power;
+    }
+}
+
+/* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */
+static long double
+pow30_nonnegative (int exponent)
+{
+  long double power;
+  int i;
+
+  assert (exponent >= 0);
+  assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab));
+
+  power = 1.L;
+  for (i = 0; exponent > 0; exponent >>= 1, i++)
+    if (exponent & 1)
+      power *= pow_tab[i];
+
+  return power;
+}
+
+/* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <=
+   log30(DBL_MAX). */
+static long double
+pow30 (int exponent)
+{
+  if (exponent >= 0)
+    return pow30_nonnegative (exponent);
+  else
+    return 1.L / pow30_nonnegative (-exponent);
+}
+
+/* Returns the character corresponding to TRIG. */
+static int
+trig_to_char (int trig)
+{
+  assert (trig >= 0 && trig < 30);
+  return "0123456789ABCDEFGHIJKLMNOPQRST"[trig];
+}
+
+/* Formats the TRIG_CNT trigs in TRIGS[], writing them as
+   null-terminated STRING.  The trigesimal point is inserted
+   after TRIG_PLACES characters have been printed, if necessary
+   adding extra zeros at either end for correctness.  Returns the
+   character after the formatted number. */
+static char *
+format_trig_digits (char *string,
+                    const char trigs[], int trig_cnt, int trig_places)
+{
+  if (trig_places < 0)
+    {
+      *string++ = '.';
+      while (trig_places++ < 0)
+        *string++ = '0';
+      trig_places = -1;
+    }
+  while (trig_cnt-- > 0)
+    {
+      if (trig_places-- == 0)
+        *string++ = '.';
+      *string++ = trig_to_char (*trigs++);
+    }
+  while (trig_places-- > 0)
+    *string++ = '0';
+  *string = '\0';
+  return string;
+}
+
+/* Helper function for format_trig_int() that formats VALUE as a
+   trigesimal integer at CP.  VALUE must be nonnegative.
+   Returns the character following the formatted integer. */
+static char *
+recurse_format_trig_int (char *cp, int value)
+{
+  int trig = value % BASE;
+  value /= BASE;
+  if (value > 0)
+    cp = recurse_format_trig_int (cp, value);
+  *cp++ = trig_to_char (trig);
+  return cp;
+}
+
+/* Formats VALUE as a trigesimal integer in null-terminated
+   STRING[].  VALUE must be in the range -DBL_MAX...DBL_MAX.  If
+   FORCE_SIGN is true, a sign is always inserted; otherwise, a
+   sign is only inserted if VALUE is negative. */
+static char *
+format_trig_int (int value, bool force_sign, char string[])
+{
+  /* Insert sign. */
+  if (value < 0)
+    {
+      *string++ = '-';
+      value = -value;
+    }
+  else if (force_sign)
+    *string++ = '+';
+
+  /* Format integer. */
+  string = recurse_format_trig_int (string, value);
+  *string = '\0';
+  return string;
+}
+
+/* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant
+   rounding up or down.  Returns true if TRIGS[] represents a
+   value greater than half, false if less than half.  If TRIGS[]
+   is exactly half, examines TRIGS[-1] and returns true if odd,
+   false if even ("round to even"). */
+static bool
+should_round_up (const char trigs[], int trig_cnt)
+{
+  assert (trig_cnt > 0);
+
+  if (*trigs < BASE / 2)
+    {
+      /* Less than half: round down. */
+      return false;
+    }
+  else if (*trigs > BASE / 2)
+    {
+      /* Greater than half: round up. */
+      return true;
+    }
+  else
+    {
+      /* Approximately half: look more closely. */
+      int i;
+      for (i = 1; i < trig_cnt; i++)
+        if (trigs[i] > 0)
+          {
+            /* Slightly greater than half: round up. */
+            return true;
+          }
+
+      /* Exactly half: round to even. */
+      return trigs[-1] % 2;
+    }
+}
+
+/* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[],
+   carrying to the left as necessary.  Returns true if
+   successful, false on failure (due to a carry out of the
+   leftmost position). */
+static bool
+try_round_up (char *trigs, int trig_cnt)
+{
+  while (trig_cnt > 0)
+    {
+      char *round_trig = trigs + --trig_cnt;
+      if (*round_trig != BASE - 1)
+        {
+          /* Round this trig up to the next value. */
+          ++*round_trig;
+          return true;
+        }
+
+      /* Carry over to the next trig to the left. */
+      *round_trig = 0;
+    }
+
+  /* Ran out of trigs to carry. */
+  return false;
+}
+
+/* Converts VALUE to trigesimal format in string OUTPUT[] with the
+   equivalent of at least BASE_10_PRECISION decimal digits of
+   precision.  The output format may use conventional or
+   scientific notation.  Missing, infinite, and extreme values
+   are represented with "*.". */
+static void
+format_trig_double (long double value, int base_10_precision, char output[])
+{
+  /* Original VALUE was negative? */
+  bool negative;
+
+  /* Number of significant trigesimals. */
+  int base_30_precision;
+
+  /* Base-2 significand and exponent for original VALUE. */
+  double base_2_sig;
+  int base_2_exp;
+
+  /* VALUE as a set of trigesimals. */
+  char buffer[DBL_DIG + 16];
+  char *trigs;
+  int trig_cnt;
+
+  /* Number of trigesimal places for trigs.
+     trigs[0] has coefficient 30**(trig_places - 1),
+     trigs[1] has coefficient 30**(trig_places - 2),
+     and so on.
+     In other words, the trigesimal point is just before trigs[0].
+   */
+  int trig_places;
+
+  /* Number of trigesimal places left to write into BUFFER. */
+  int trigs_to_output;
+
+  init_pow_tab ();
+
+  /* Handle special cases. */
+  if (value == SYSMIS)
+    goto missing_value;
+  if (value == 0.)
+    goto zero;
+
+  /* Make VALUE positive. */
+  if (value < 0)
+    {
+      value = -value;
+      negative = true;
+    }
+  else
+    negative = false;
+
+  /* Adjust VALUE to roughly 30**3, by shifting the trigesimal
+     point left or right as necessary.  We approximate the
+     base-30 exponent by obtaining the base-2 exponent, then
+     multiplying by log30(2).  This approximation is sufficient
+     to ensure that the adjusted VALUE is always in the range
+     0...30**6, an invariant of the loop below. */
+  errno = 0;
+  base_2_sig = frexp (value, &base_2_exp);
+  if (errno != 0 || !finite (base_2_sig))
+    goto missing_value;
+  if (base_2_exp == 0 && base_2_sig == 0.)
+    goto zero;
+  if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L)
+    goto missing_value;
+  trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2;
+  value *= pow30 (CHUNK_SIZE - trig_places);
+
+  /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */
+  trigs = buffer;
+  trig_cnt = 0;
+  for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2);
+       trigs_to_output > 0;
+       trigs_to_output -= CHUNK_SIZE)
+    {
+      long chunk;
+      int trigs_left;
+
+      /* The current chunk is just the integer part of VALUE,
+         truncated to the nearest integer.  The chunk fits in a
+         long. */
+      chunk = value;
+      assert (pow30 (CHUNK_SIZE) <= LONG_MAX);
+      assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE));
+
+      value -= chunk;
+
+      /* Append the chunk, in base 30, to trigs[]. */
+      for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0; )
+        {
+          trigs[trig_cnt + --trigs_left] = chunk % 30;
+          chunk /= 30;
+        }
+      while (trigs_left > 0)
+        trigs[trig_cnt + --trigs_left] = 0;
+      trig_cnt += CHUNK_SIZE;
+
+      /* Proceed to the next chunk. */
+      if (value == 0.)
+        break;
+      value *= pow (LDBASE, CHUNK_SIZE);
+    }
+
+  /* Strip leading zeros. */
+  while (trig_cnt > 1 && *trigs == 0)
+    {
+      trigs++;
+      trig_cnt--;
+      trig_places--;
+    }
+
+  /* Round to requested precision, conservatively estimating the
+     required base-30 precision as 2/3 of the base-10 precision
+     (log30(10) = .68). */
+  assert (base_10_precision > 0);
+  if (base_10_precision > LDBL_DIG)
+    base_10_precision = LDBL_DIG;
+  base_30_precision = DIV_RND_UP (base_10_precision * 2, 3);
+  if (trig_cnt > base_30_precision)
+    {
+      if (should_round_up (trigs + base_30_precision,
+                           trig_cnt - base_30_precision))
+        {
+          /* Try to round up. */
+          if (try_round_up (trigs, base_30_precision))
+            {
+              /* Rounding up worked. */
+              trig_cnt = base_30_precision;
+            }
+          else
+            {
+              /* Couldn't round up because we ran out of trigs to
+                 carry into.  Do the carry here instead. */
+              *trigs = 1;
+              trig_cnt = 1;
+              trig_places++;
+            }
+        }
+      else
+        {
+          /* Round down. */
+          trig_cnt = base_30_precision;
+        }
+    }
+  else
+    {
+      /* No rounding required: fewer digits available than
+         requested. */
+    }
+
+  /* Strip trailing zeros. */
+  while (trig_cnt > 1 && trigs[trig_cnt - 1] == 0)
+    trig_cnt--;
+
+  /* Write output. */
+  if (negative)
+    *output++ = '-';
+  if (trig_places >= -1 && trig_places < trig_cnt + 3)
+    {
+      /* Use conventional notation. */
+      format_trig_digits (output, trigs, trig_cnt, trig_places);
+    }
+  else
+    {
+      /* Use scientific notation. */
+      char *op;
+      op = format_trig_digits (output, trigs, trig_cnt, trig_cnt);
+      op = format_trig_int (trig_places - trig_cnt, true, op);
+    }
+  return;
+
+ zero:
+  strcpy (output, "0");
+  return;
+
+ missing_value:
+  strcpy (output, "*.");
+  return;
+}