You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
- 02111-1307, USA. */
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
#include <config.h>
#include "pfm-write.h"
#include "error.h"
#include <ctype.h>
#include <errno.h>
+#include <fcntl.h>
#include <float.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
+#include <sys/stat.h>
#include <time.h>
+#include <unistd.h>
#include "alloc.h"
#include "case.h"
#include "dictionary.h"
#include "error.h"
#include "file-handle.h"
-#include "gmp.h"
#include "hash.h"
#include "magic.h"
+#include "misc.h"
+#include "stat-macros.h"
#include "str.h"
#include "value-labels.h"
#include "var.h"
#include "version.h"
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
#include "debug-print.h"
/* Portable file writer. */
size_t var_cnt; /* Number of variables. */
struct pfm_var *vars; /* Variables. */
+
+ int digits; /* Digits of precision. */
};
/* A variable to write to the portable file. */
static int buf_write (struct pfm_writer *, const void *, size_t);
static int write_header (struct pfm_writer *);
static int write_version_data (struct pfm_writer *);
-static int write_variables (struct pfm_writer *, const struct dictionary *);
+static int write_variables (struct pfm_writer *, struct dictionary *);
static int write_value_labels (struct pfm_writer *, const struct dictionary *);
-/* Writes the dictionary DICT to portable file HANDLE. Returns
- nonzero only if successful. */
+static void format_trig_double (long double, int base_10_precision, char[]);
+static char *format_trig_int (int, bool force_sign, char[]);
+
+/* Returns default options for writing a portable file. */
+struct pfm_write_options
+pfm_writer_default_options (void)
+{
+ struct pfm_write_options opts;
+ opts.create_writeable = true;
+ opts.type = PFM_COMM;
+ opts.digits = DBL_DIG;
+ return opts;
+}
+
+/* Writes the dictionary DICT to portable file HANDLE according
+ to the given OPTS. Returns nonzero only if successful. DICT
+ will not be modified, except to assign short names. */
struct pfm_writer *
-pfm_open_writer (struct file_handle *fh, const struct dictionary *dict)
+pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
+ struct pfm_write_options opts)
{
struct pfm_writer *w = NULL;
+ mode_t mode;
+ int fd;
size_t i;
+ /* Create file. */
+ mode = S_IRUSR | S_IRGRP | S_IROTH;
+ if (opts.create_writeable)
+ mode |= S_IWUSR | S_IWGRP | S_IWOTH;
+ fd = open (handle_get_filename (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
+ if (fd < 0)
+ goto open_error;
+
+ /* Open file handle. */
if (!fh_open (fh, "portable file", "we"))
goto error;
-
- /* Open the physical disk file. */
+
+ /* Initialize data structures. */
w = xmalloc (sizeof *w);
w->fh = fh;
- w->file = fopen (handle_get_filename (fh), "wb");
+ w->file = fdopen (fd, "w");
+ if (w->file == NULL)
+ {
+ close (fd);
+ goto open_error;
+ }
+
w->lc = 0;
w->var_cnt = 0;
w->vars = NULL;
- /* Check that file create succeeded. */
- if (w->file == NULL)
- {
- msg (ME, _("An error occurred while opening \"%s\" for writing "
- "as a portable file: %s."),
- handle_get_filename (fh), strerror (errno));
- err_cond_fail ();
- goto error;
- }
-
w->var_cnt = dict_get_var_cnt (dict);
w->vars = xmalloc (sizeof *w->vars * w->var_cnt);
for (i = 0; i < w->var_cnt; i++)
pv->fv = dv->fv;
}
+ w->digits = opts.digits;
+ if (w->digits < 1)
+ {
+ msg (ME, _("Invalid decimal digits count %d. Treating as %d."),
+ w->digits, DBL_DIG);
+ w->digits = DBL_DIG;
+ }
+
/* Write file header. */
if (!write_header (w)
|| !write_version_data (w)
return w;
-error:
+ error:
pfm_close_writer (w);
return NULL;
+
+ open_error:
+ msg (ME, _("An error occurred while opening \"%s\" for writing "
+ "as a portable file: %s."),
+ handle_get_filename (fh), strerror (errno));
+ err_cond_fail ();
+ goto error;
}
\f
/* Write NBYTES starting at BUF to the portable file represented by
static int
write_float (struct pfm_writer *w, double d)
{
- int neg = 0;
- char *mantissa;
- int mantissa_len;
- mp_exp_t exponent;
- char *buf, *cp;
- int success;
-
- if (d < 0.)
- {
- d = -d;
- neg = 1;
- }
-
- if (d == fabs (SYSMIS) || d == HUGE_VAL)
- return buf_write (w, "*.", 2);
-
- /* Use GNU libgmp2 to convert D into base-30. */
- {
- mpf_t f;
-
- mpf_init_set_d (f, d);
- mantissa = mpf_get_str (NULL, &exponent, 30, 0, f);
- mpf_clear (f);
-
- for (cp = mantissa; *cp; cp++)
- *cp = toupper (*cp);
- }
-
- /* Choose standard or scientific notation. */
- mantissa_len = (int) strlen (mantissa);
- cp = buf = local_alloc (mantissa_len + 32);
- if (neg)
- *cp++ = '-';
- if (mantissa_len == 0)
- *cp++ = '0';
- else if (exponent < -4 || exponent > (mp_exp_t) mantissa_len)
- {
- /* Scientific notation. */
- *cp++ = mantissa[0];
- *cp++ = '.';
- cp = stpcpy (cp, &mantissa[1]);
- cp = spprintf (cp, "%+ld", (long) (exponent - 1));
- }
- else if (exponent <= 0)
- {
- /* Standard notation, D <= 1. */
- *cp++ = '.';
- memset (cp, '0', -exponent);
- cp += -exponent;
- cp = stpcpy (cp, mantissa);
- }
- else
- {
- /* Standard notation, D > 1. */
- memcpy (cp, mantissa, exponent);
- cp += exponent;
- *cp++ = '.';
- cp = stpcpy (cp, &mantissa[exponent]);
- }
- *cp++ = '/';
-
- success = buf_write (w, buf, cp - buf);
- local_free (buf);
- free (mantissa);
- return success;
+ char buffer[64];
+ format_trig_double (d, floor (d) == d ? DBL_DIG : w->digits, buffer);
+ return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1);
}
/* Write N to the portable file as an integer field, and return success. */
static int
write_int (struct pfm_writer *w, int n)
{
- char buf[64];
- char *bp = &buf[64];
- int neg = 0;
-
- *--bp = '/';
-
- if (n < 0)
- {
- n = -n;
- neg = 1;
- }
-
- do
- {
- int r = n % 30;
-
- /* PORTME: character codes. */
- if (r < 10)
- *--bp = r + '0';
- else
- *--bp = r - 10 + 'A';
-
- n /= 30;
- }
- while (n > 0);
-
- if (neg)
- *--bp = '-';
-
- return buf_write (w, bp, &buf[64] - bp);
+ char buffer[64];
+ format_trig_int (n, false, buffer);
+ return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1);
}
/* Write S to the portable file as a string field. */
/* Write variable records, and return success. */
static int
-write_variables (struct pfm_writer *w, const struct dictionary *dict)
+write_variables (struct pfm_writer *w, struct dictionary *dict)
{
int i;
+
+ dict_assign_short_names (dict);
if (!buf_write (w, "4", 1) || !write_int (w, dict_get_var_cnt (dict))
|| !write_int (w, 161))
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
- static const char *miss_types[MISSING_COUNT] =
- {
- "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8",
- };
-
- const char *m;
- int j;
-
struct variable *v = dict_get_var (dict, i);
+ struct missing_values mv;
if (!buf_write (w, "7", 1) || !write_int (w, v->width)
- || !write_string (w, v->name)
+ || !write_string (w, v->short_name)
|| !write_format (w, &v->print) || !write_format (w, &v->write))
return 0;
- for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++)
- if ((m[j] != ' ' && !buf_write (w, &m[j], 1))
- || !write_value (w, &v->missing[j], v))
- return 0;
+ /* Write missing values. */
+ mv_copy (&mv, &v->miss);
+ while (mv_has_range (&mv))
+ {
+ double x, y;
+ mv_pop_range (&mv, &x, &y);
+ if (x == LOWEST)
+ {
+ if (!buf_write (w, "9", 1) || !write_float (w, y))
+ return 0;
+ }
+ else if (y == HIGHEST)
+ {
+ if (!buf_write (w, "A", 1) || !write_float (w, y))
+ return 0;
+ }
+ else {
+ if (!buf_write (w, "B", 1) || !write_float (w, x)
+ || !write_float (w, y))
+ return 0;
+ }
+ }
+ while (mv_has_value (&mv))
+ {
+ union value value;
+ mv_pop_value (&mv, &value);
+ if (!buf_write (w, "8", 1) || !write_value (w, &value, v))
+ return 0;
+ }
if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label)))
return 0;
if (!buf_write (w, "D", 1)
|| !write_int (w, 1)
- || !write_string (w, v->name)
+ || !write_string (w, v->short_name)
|| !write_int (w, val_labs_count (v->val_labs)))
return 0;
free (w->vars);
free (w);
}
+\f
+/* Base-30 conversion.
+
+ Portable files represent numbers in base-30 format, so we need
+ to be able to convert real and integer number to that base.
+ Older versions of PSPP used libgmp to do so, but this added a
+ big library dependency to do just one thing. Now we do it
+ ourselves internally.
+
+ Important fact: base 30 is called "trigesimal". */
+
+/* Conversion base. */
+#define BASE 30 /* As an integer. */
+#define LDBASE ((long double) BASE) /* As a long double. */
+
+/* This is floor(log30(2**31)), the minimum number of trigesimal
+ digits that a `long int' can hold. */
+#define CHUNK_SIZE 6
+
+/* pow_tab[i] = pow (30, pow (2, i)) */
+static long double pow_tab[16];
+
+/* Initializes pow_tab[]. */
+static void
+init_pow_tab (void)
+{
+ static bool did_init = false;
+ long double power;
+ size_t i;
+
+ /* Only initialize once. */
+ if (did_init)
+ return;
+ did_init = true;
+
+ /* Set each element of pow_tab[] until we run out of numerical
+ range. */
+ i = 0;
+ for (power = 30.0L; power < DBL_MAX; power *= power)
+ {
+ assert (i < sizeof pow_tab / sizeof *pow_tab);
+ pow_tab[i++] = power;
+ }
+}
+
+/* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */
+static long double
+pow30_nonnegative (int exponent)
+{
+ long double power;
+ int i;
+
+ assert (exponent >= 0);
+ assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab));
+
+ power = 1.L;
+ for (i = 0; exponent > 0; exponent >>= 1, i++)
+ if (exponent & 1)
+ power *= pow_tab[i];
+
+ return power;
+}
+
+/* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <=
+ log30(DBL_MAX). */
+static long double
+pow30 (int exponent)
+{
+ if (exponent >= 0)
+ return pow30_nonnegative (exponent);
+ else
+ return 1.L / pow30_nonnegative (-exponent);
+}
+
+/* Returns the character corresponding to TRIG. */
+static int
+trig_to_char (int trig)
+{
+ assert (trig >= 0 && trig < 30);
+ return "0123456789ABCDEFGHIJKLMNOPQRST"[trig];
+}
+
+/* Formats the TRIG_CNT trigs in TRIGS[], writing them as
+ null-terminated STRING. The trigesimal point is inserted
+ after TRIG_PLACES characters have been printed, if necessary
+ adding extra zeros at either end for correctness. Returns the
+ character after the formatted number. */
+static char *
+format_trig_digits (char *string,
+ const char trigs[], int trig_cnt, int trig_places)
+{
+ if (trig_places < 0)
+ {
+ *string++ = '.';
+ while (trig_places++ < 0)
+ *string++ = '0';
+ trig_places = -1;
+ }
+ while (trig_cnt-- > 0)
+ {
+ if (trig_places-- == 0)
+ *string++ = '.';
+ *string++ = trig_to_char (*trigs++);
+ }
+ while (trig_places-- > 0)
+ *string++ = '0';
+ *string = '\0';
+ return string;
+}
+
+/* Helper function for format_trig_int() that formats VALUE as a
+ trigesimal integer at CP. VALUE must be nonnegative.
+ Returns the character following the formatted integer. */
+static char *
+recurse_format_trig_int (char *cp, int value)
+{
+ int trig = value % BASE;
+ value /= BASE;
+ if (value > 0)
+ cp = recurse_format_trig_int (cp, value);
+ *cp++ = trig_to_char (trig);
+ return cp;
+}
+
+/* Formats VALUE as a trigesimal integer in null-terminated
+ STRING[]. VALUE must be in the range -DBL_MAX...DBL_MAX. If
+ FORCE_SIGN is true, a sign is always inserted; otherwise, a
+ sign is only inserted if VALUE is negative. */
+static char *
+format_trig_int (int value, bool force_sign, char string[])
+{
+ /* Insert sign. */
+ if (value < 0)
+ {
+ *string++ = '-';
+ value = -value;
+ }
+ else if (force_sign)
+ *string++ = '+';
+
+ /* Format integer. */
+ string = recurse_format_trig_int (string, value);
+ *string = '\0';
+ return string;
+}
+
+/* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant
+ rounding up or down. Returns true if TRIGS[] represents a
+ value greater than half, false if less than half. If TRIGS[]
+ is exactly half, examines TRIGS[-1] and returns true if odd,
+ false if even ("round to even"). */
+static bool
+should_round_up (const char trigs[], int trig_cnt)
+{
+ assert (trig_cnt > 0);
+
+ if (*trigs < BASE / 2)
+ {
+ /* Less than half: round down. */
+ return false;
+ }
+ else if (*trigs > BASE / 2)
+ {
+ /* Greater than half: round up. */
+ return true;
+ }
+ else
+ {
+ /* Approximately half: look more closely. */
+ int i;
+ for (i = 1; i < trig_cnt; i++)
+ if (trigs[i] > 0)
+ {
+ /* Slightly greater than half: round up. */
+ return true;
+ }
+
+ /* Exactly half: round to even. */
+ return trigs[-1] % 2;
+ }
+}
+
+/* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[],
+ carrying to the left as necessary. Returns true if
+ successful, false on failure (due to a carry out of the
+ leftmost position). */
+static bool
+try_round_up (char *trigs, int trig_cnt)
+{
+ while (trig_cnt > 0)
+ {
+ char *round_trig = trigs + --trig_cnt;
+ if (*round_trig != BASE - 1)
+ {
+ /* Round this trig up to the next value. */
+ ++*round_trig;
+ return true;
+ }
+
+ /* Carry over to the next trig to the left. */
+ *round_trig = 0;
+ }
+
+ /* Ran out of trigs to carry. */
+ return false;
+}
+
+/* Converts VALUE to trigesimal format in string OUTPUT[] with the
+ equivalent of at least BASE_10_PRECISION decimal digits of
+ precision. The output format may use conventional or
+ scientific notation. Missing, infinite, and extreme values
+ are represented with "*.". */
+static void
+format_trig_double (long double value, int base_10_precision, char output[])
+{
+ /* Original VALUE was negative? */
+ bool negative;
+
+ /* Number of significant trigesimals. */
+ int base_30_precision;
+
+ /* Base-2 significand and exponent for original VALUE. */
+ double base_2_sig;
+ int base_2_exp;
+
+ /* VALUE as a set of trigesimals. */
+ char buffer[DBL_DIG + 16];
+ char *trigs;
+ int trig_cnt;
+
+ /* Number of trigesimal places for trigs.
+ trigs[0] has coefficient 30**(trig_places - 1),
+ trigs[1] has coefficient 30**(trig_places - 2),
+ and so on.
+ In other words, the trigesimal point is just before trigs[0].
+ */
+ int trig_places;
+
+ /* Number of trigesimal places left to write into BUFFER. */
+ int trigs_to_output;
+
+ init_pow_tab ();
+
+ /* Handle special cases. */
+ if (value == SYSMIS)
+ goto missing_value;
+ if (value == 0.)
+ goto zero;
+
+ /* Make VALUE positive. */
+ if (value < 0)
+ {
+ value = -value;
+ negative = true;
+ }
+ else
+ negative = false;
+
+ /* Adjust VALUE to roughly 30**3, by shifting the trigesimal
+ point left or right as necessary. We approximate the
+ base-30 exponent by obtaining the base-2 exponent, then
+ multiplying by log30(2). This approximation is sufficient
+ to ensure that the adjusted VALUE is always in the range
+ 0...30**6, an invariant of the loop below. */
+ errno = 0;
+ base_2_sig = frexp (value, &base_2_exp);
+ if (errno != 0 || !finite (base_2_sig))
+ goto missing_value;
+ if (base_2_exp == 0 && base_2_sig == 0.)
+ goto zero;
+ if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L)
+ goto missing_value;
+ trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2;
+ value *= pow30 (CHUNK_SIZE - trig_places);
+
+ /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */
+ trigs = buffer;
+ trig_cnt = 0;
+ for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2);
+ trigs_to_output > 0;
+ trigs_to_output -= CHUNK_SIZE)
+ {
+ long chunk;
+ int trigs_left;
+
+ /* The current chunk is just the integer part of VALUE,
+ truncated to the nearest integer. The chunk fits in a
+ long. */
+ chunk = value;
+ assert (pow30 (CHUNK_SIZE) <= LONG_MAX);
+ assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE));
+
+ value -= chunk;
+
+ /* Append the chunk, in base 30, to trigs[]. */
+ for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0; )
+ {
+ trigs[trig_cnt + --trigs_left] = chunk % 30;
+ chunk /= 30;
+ }
+ while (trigs_left > 0)
+ trigs[trig_cnt + --trigs_left] = 0;
+ trig_cnt += CHUNK_SIZE;
+
+ /* Proceed to the next chunk. */
+ if (value == 0.)
+ break;
+ value *= pow (LDBASE, CHUNK_SIZE);
+ }
+
+ /* Strip leading zeros. */
+ while (trig_cnt > 1 && *trigs == 0)
+ {
+ trigs++;
+ trig_cnt--;
+ trig_places--;
+ }
+
+ /* Round to requested precision, conservatively estimating the
+ required base-30 precision as 2/3 of the base-10 precision
+ (log30(10) = .68). */
+ assert (base_10_precision > 0);
+ if (base_10_precision > LDBL_DIG)
+ base_10_precision = LDBL_DIG;
+ base_30_precision = DIV_RND_UP (base_10_precision * 2, 3);
+ if (trig_cnt > base_30_precision)
+ {
+ if (should_round_up (trigs + base_30_precision,
+ trig_cnt - base_30_precision))
+ {
+ /* Try to round up. */
+ if (try_round_up (trigs, base_30_precision))
+ {
+ /* Rounding up worked. */
+ trig_cnt = base_30_precision;
+ }
+ else
+ {
+ /* Couldn't round up because we ran out of trigs to
+ carry into. Do the carry here instead. */
+ *trigs = 1;
+ trig_cnt = 1;
+ trig_places++;
+ }
+ }
+ else
+ {
+ /* Round down. */
+ trig_cnt = base_30_precision;
+ }
+ }
+ else
+ {
+ /* No rounding required: fewer digits available than
+ requested. */
+ }
+
+ /* Strip trailing zeros. */
+ while (trig_cnt > 1 && trigs[trig_cnt - 1] == 0)
+ trig_cnt--;
+
+ /* Write output. */
+ if (negative)
+ *output++ = '-';
+ if (trig_places >= -1 && trig_places < trig_cnt + 3)
+ {
+ /* Use conventional notation. */
+ format_trig_digits (output, trigs, trig_cnt, trig_places);
+ }
+ else
+ {
+ /* Use scientific notation. */
+ char *op;
+ op = format_trig_digits (output, trigs, trig_cnt, trig_cnt);
+ op = format_trig_int (trig_places - trig_cnt, true, op);
+ }
+ return;
+
+ zero:
+ strcpy (output, "0");
+ return;
+
+ missing_value:
+ strcpy (output, "*.");
+ return;
+}