X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fpfm-write.c;h=be9812c5b365ca26724f16eb5910abd5138c4b2a;hb=3417680e253b1bfc4519347ef06536378026be2a;hp=8117b5c51ff01167c658f5e6bf40a2e9ccad90fe;hpb=4fdeb2145d081ff1b84e3f6c99f9d1c048c0d64a;p=pspp diff --git a/src/pfm-write.c b/src/pfm-write.c index 8117b5c51f..be9812c5b3 100644 --- a/src/pfm-write.c +++ b/src/pfm-write.c @@ -22,24 +22,31 @@ #include "error.h" #include #include +#include #include #include #include #include +#include #include +#include #include "alloc.h" #include "case.h" #include "dictionary.h" #include "error.h" #include "file-handle.h" -#include "gmp.h" #include "hash.h" #include "magic.h" +#include "misc.h" +#include "stat-macros.h" #include "str.h" #include "value-labels.h" #include "var.h" #include "version.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) + #include "debug-print.h" /* Portable file writer. */ @@ -52,6 +59,8 @@ struct pfm_writer size_t var_cnt; /* Number of variables. */ struct pfm_var *vars; /* Variables. */ + + int digits; /* Digits of precision. */ }; /* A variable to write to the portable file. */ @@ -64,38 +73,61 @@ struct pfm_var static int buf_write (struct pfm_writer *, const void *, size_t); static int write_header (struct pfm_writer *); static int write_version_data (struct pfm_writer *); -static int write_variables (struct pfm_writer *, const struct dictionary *); +static int write_variables (struct pfm_writer *, struct dictionary *); static int write_value_labels (struct pfm_writer *, const struct dictionary *); -/* Writes the dictionary DICT to portable file HANDLE. Returns - nonzero only if successful. */ +static void format_trig_double (long double, int base_10_precision, char[]); +static char *format_trig_int (int, bool force_sign, char[]); + +/* Returns default options for writing a portable file. */ +struct pfm_write_options +pfm_writer_default_options (void) +{ + struct pfm_write_options opts; + opts.create_writeable = true; + opts.type = PFM_COMM; + opts.digits = DBL_DIG; + return opts; +} + +/* Writes the dictionary DICT to portable file HANDLE according + to the given OPTS. Returns nonzero only if successful. DICT + will not be modified, except to assign short names. */ struct pfm_writer * -pfm_open_writer (struct file_handle *fh, const struct dictionary *dict) +pfm_open_writer (struct file_handle *fh, struct dictionary *dict, + struct pfm_write_options opts) { struct pfm_writer *w = NULL; + mode_t mode; + int fd; size_t i; + /* Create file. */ + mode = S_IRUSR | S_IRGRP | S_IROTH; + if (opts.create_writeable) + mode |= S_IWUSR | S_IWGRP | S_IWOTH; + fd = open (handle_get_filename (fh), O_WRONLY | O_CREAT | O_TRUNC, mode); + if (fd < 0) + goto open_error; + + /* Open file handle. */ if (!fh_open (fh, "portable file", "we")) goto error; - - /* Open the physical disk file. */ + + /* Initialize data structures. */ w = xmalloc (sizeof *w); w->fh = fh; - w->file = fopen (handle_get_filename (fh), "wb"); + w->file = fdopen (fd, "w"); + if (w->file == NULL) + { + close (fd); + goto open_error; + } + w->lc = 0; w->var_cnt = 0; w->vars = NULL; - /* Check that file create succeeded. */ - if (w->file == NULL) - { - msg (ME, _("An error occurred while opening \"%s\" for writing " - "as a portable file: %s."), - handle_get_filename (fh), strerror (errno)); - err_cond_fail (); - goto error; - } - w->var_cnt = dict_get_var_cnt (dict); w->vars = xmalloc (sizeof *w->vars * w->var_cnt); for (i = 0; i < w->var_cnt; i++) @@ -106,6 +138,14 @@ pfm_open_writer (struct file_handle *fh, const struct dictionary *dict) pv->fv = dv->fv; } + w->digits = opts.digits; + if (w->digits < 1) + { + msg (ME, _("Invalid decimal digits count %d. Treating as %d."), + w->digits, DBL_DIG); + w->digits = DBL_DIG; + } + /* Write file header. */ if (!write_header (w) || !write_version_data (w) @@ -116,9 +156,16 @@ pfm_open_writer (struct file_handle *fh, const struct dictionary *dict) return w; -error: + error: pfm_close_writer (w); return NULL; + + open_error: + msg (ME, _("An error occurred while opening \"%s\" for writing " + "as a portable file: %s."), + handle_get_filename (fh), strerror (errno)); + err_cond_fail (); + goto error; } /* Write NBYTES starting at BUF to the portable file represented by @@ -161,107 +208,18 @@ buf_write (struct pfm_writer *w, const void *buf_, size_t nbytes) static int write_float (struct pfm_writer *w, double d) { - int neg = 0; - char *mantissa; - int mantissa_len; - mp_exp_t exponent; - char *buf, *cp; - int success; - - if (d < 0.) - { - d = -d; - neg = 1; - } - - if (d == fabs (SYSMIS) || d == HUGE_VAL) - return buf_write (w, "*.", 2); - - /* Use GNU libgmp2 to convert D into base-30. */ - { - mpf_t f; - - mpf_init_set_d (f, d); - mantissa = mpf_get_str (NULL, &exponent, 30, 0, f); - mpf_clear (f); - - for (cp = mantissa; *cp; cp++) - *cp = toupper (*cp); - } - - /* Choose standard or scientific notation. */ - mantissa_len = (int) strlen (mantissa); - cp = buf = local_alloc (mantissa_len + 32); - if (neg) - *cp++ = '-'; - if (mantissa_len == 0) - *cp++ = '0'; - else if (exponent < -4 || exponent > (mp_exp_t) mantissa_len) - { - /* Scientific notation. */ - *cp++ = mantissa[0]; - *cp++ = '.'; - cp = stpcpy (cp, &mantissa[1]); - cp = spprintf (cp, "%+ld", (long) (exponent - 1)); - } - else if (exponent <= 0) - { - /* Standard notation, D <= 1. */ - *cp++ = '.'; - memset (cp, '0', -exponent); - cp += -exponent; - cp = stpcpy (cp, mantissa); - } - else - { - /* Standard notation, D > 1. */ - memcpy (cp, mantissa, exponent); - cp += exponent; - *cp++ = '.'; - cp = stpcpy (cp, &mantissa[exponent]); - } - *cp++ = '/'; - - success = buf_write (w, buf, cp - buf); - local_free (buf); - free (mantissa); - return success; + char buffer[64]; + format_trig_double (d, floor (d) == d ? DBL_DIG : w->digits, buffer); + return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1); } /* Write N to the portable file as an integer field, and return success. */ static int write_int (struct pfm_writer *w, int n) { - char buf[64]; - char *bp = &buf[64]; - int neg = 0; - - *--bp = '/'; - - if (n < 0) - { - n = -n; - neg = 1; - } - - do - { - int r = n % 30; - - /* PORTME: character codes. */ - if (r < 10) - *--bp = r + '0'; - else - *--bp = r - 10 + 'A'; - - n /= 30; - } - while (n > 0); - - if (neg) - *--bp = '-'; - - return buf_write (w, bp, &buf[64] - bp); + char buffer[64]; + format_trig_int (n, false, buffer); + return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1); } /* Write S to the portable file as a string field. */ @@ -288,7 +246,7 @@ write_header (struct pfm_writer *w) { /* PORTME: Translation table from SPSS character code to this computer's native character code (which is probably ASCII). */ - static const unsigned char spss2ascii[256] = + static const char spss2ascii[256] = { "0000000000000000000000000000000000000000000000000000000000000000" "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ." @@ -368,9 +326,11 @@ write_value (struct pfm_writer *w, union value *v, struct variable *vv) /* Write variable records, and return success. */ static int -write_variables (struct pfm_writer *w, const struct dictionary *dict) +write_variables (struct pfm_writer *w, struct dictionary *dict) { int i; + + dict_assign_short_names (dict); if (!buf_write (w, "4", 1) || !write_int (w, dict_get_var_cnt (dict)) || !write_int (w, 161)) @@ -378,25 +338,43 @@ write_variables (struct pfm_writer *w, const struct dictionary *dict) for (i = 0; i < dict_get_var_cnt (dict); i++) { - static const char *miss_types[MISSING_COUNT] = - { - "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8", - }; - - const char *m; - int j; - struct variable *v = dict_get_var (dict, i); + struct missing_values mv; if (!buf_write (w, "7", 1) || !write_int (w, v->width) - || !write_string (w, v->name) + || !write_string (w, v->short_name) || !write_format (w, &v->print) || !write_format (w, &v->write)) return 0; - for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++) - if ((m[j] != ' ' && !buf_write (w, &m[j], 1)) - || !write_value (w, &v->missing[j], v)) - return 0; + /* Write missing values. */ + mv_copy (&mv, &v->miss); + while (mv_has_range (&mv)) + { + double x, y; + mv_pop_range (&mv, &x, &y); + if (x == LOWEST) + { + if (!buf_write (w, "9", 1) || !write_float (w, y)) + return 0; + } + else if (y == HIGHEST) + { + if (!buf_write (w, "A", 1) || !write_float (w, y)) + return 0; + } + else { + if (!buf_write (w, "B", 1) || !write_float (w, x) + || !write_float (w, y)) + return 0; + } + } + while (mv_has_value (&mv)) + { + union value value; + mv_pop_value (&mv, &value); + if (!buf_write (w, "8", 1) || !write_value (w, &value, v)) + return 0; + } if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label))) return 0; @@ -422,7 +400,7 @@ write_value_labels (struct pfm_writer *w, const struct dictionary *dict) if (!buf_write (w, "D", 1) || !write_int (w, 1) - || !write_string (w, v->name) + || !write_string (w, v->short_name) || !write_int (w, val_labs_count (v->val_labs))) return 0; @@ -494,3 +472,389 @@ pfm_close_writer (struct pfm_writer *w) free (w->vars); free (w); } + +/* Base-30 conversion. + + Portable files represent numbers in base-30 format, so we need + to be able to convert real and integer number to that base. + Older versions of PSPP used libgmp to do so, but this added a + big library dependency to do just one thing. Now we do it + ourselves internally. + + Important fact: base 30 is called "trigesimal". */ + +/* Conversion base. */ +#define BASE 30 /* As an integer. */ +#define LDBASE ((long double) BASE) /* As a long double. */ + +/* This is floor(log30(2**31)), the minimum number of trigesimal + digits that a `long int' can hold. */ +#define CHUNK_SIZE 6 + +/* pow_tab[i] = pow (30, pow (2, i)) */ +static long double pow_tab[16]; + +/* Initializes pow_tab[]. */ +static void +init_pow_tab (void) +{ + static bool did_init = false; + long double power; + size_t i; + + /* Only initialize once. */ + if (did_init) + return; + did_init = true; + + /* Set each element of pow_tab[] until we run out of numerical + range. */ + i = 0; + for (power = 30.0L; power < DBL_MAX; power *= power) + { + assert (i < sizeof pow_tab / sizeof *pow_tab); + pow_tab[i++] = power; + } +} + +/* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */ +static long double +pow30_nonnegative (int exponent) +{ + long double power; + int i; + + assert (exponent >= 0); + assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab)); + + power = 1.L; + for (i = 0; exponent > 0; exponent >>= 1, i++) + if (exponent & 1) + power *= pow_tab[i]; + + return power; +} + +/* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <= + log30(DBL_MAX). */ +static long double +pow30 (int exponent) +{ + if (exponent >= 0) + return pow30_nonnegative (exponent); + else + return 1.L / pow30_nonnegative (-exponent); +} + +/* Returns the character corresponding to TRIG. */ +static int +trig_to_char (int trig) +{ + assert (trig >= 0 && trig < 30); + return "0123456789ABCDEFGHIJKLMNOPQRST"[trig]; +} + +/* Formats the TRIG_CNT trigs in TRIGS[], writing them as + null-terminated STRING. The trigesimal point is inserted + after TRIG_PLACES characters have been printed, if necessary + adding extra zeros at either end for correctness. Returns the + character after the formatted number. */ +static char * +format_trig_digits (char *string, + const char trigs[], int trig_cnt, int trig_places) +{ + if (trig_places < 0) + { + *string++ = '.'; + while (trig_places++ < 0) + *string++ = '0'; + trig_places = -1; + } + while (trig_cnt-- > 0) + { + if (trig_places-- == 0) + *string++ = '.'; + *string++ = trig_to_char (*trigs++); + } + while (trig_places-- > 0) + *string++ = '0'; + *string = '\0'; + return string; +} + +/* Helper function for format_trig_int() that formats VALUE as a + trigesimal integer at CP. VALUE must be nonnegative. + Returns the character following the formatted integer. */ +static char * +recurse_format_trig_int (char *cp, int value) +{ + int trig = value % BASE; + value /= BASE; + if (value > 0) + cp = recurse_format_trig_int (cp, value); + *cp++ = trig_to_char (trig); + return cp; +} + +/* Formats VALUE as a trigesimal integer in null-terminated + STRING[]. VALUE must be in the range -DBL_MAX...DBL_MAX. If + FORCE_SIGN is true, a sign is always inserted; otherwise, a + sign is only inserted if VALUE is negative. */ +static char * +format_trig_int (int value, bool force_sign, char string[]) +{ + /* Insert sign. */ + if (value < 0) + { + *string++ = '-'; + value = -value; + } + else if (force_sign) + *string++ = '+'; + + /* Format integer. */ + string = recurse_format_trig_int (string, value); + *string = '\0'; + return string; +} + +/* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant + rounding up or down. Returns true if TRIGS[] represents a + value greater than half, false if less than half. If TRIGS[] + is exactly half, examines TRIGS[-1] and returns true if odd, + false if even ("round to even"). */ +static bool +should_round_up (const char trigs[], int trig_cnt) +{ + assert (trig_cnt > 0); + + if (*trigs < BASE / 2) + { + /* Less than half: round down. */ + return false; + } + else if (*trigs > BASE / 2) + { + /* Greater than half: round up. */ + return true; + } + else + { + /* Approximately half: look more closely. */ + int i; + for (i = 1; i < trig_cnt; i++) + if (trigs[i] > 0) + { + /* Slightly greater than half: round up. */ + return true; + } + + /* Exactly half: round to even. */ + return trigs[-1] % 2; + } +} + +/* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[], + carrying to the left as necessary. Returns true if + successful, false on failure (due to a carry out of the + leftmost position). */ +static bool +try_round_up (char *trigs, int trig_cnt) +{ + while (trig_cnt > 0) + { + char *round_trig = trigs + --trig_cnt; + if (*round_trig != BASE - 1) + { + /* Round this trig up to the next value. */ + ++*round_trig; + return true; + } + + /* Carry over to the next trig to the left. */ + *round_trig = 0; + } + + /* Ran out of trigs to carry. */ + return false; +} + +/* Converts VALUE to trigesimal format in string OUTPUT[] with the + equivalent of at least BASE_10_PRECISION decimal digits of + precision. The output format may use conventional or + scientific notation. Missing, infinite, and extreme values + are represented with "*.". */ +static void +format_trig_double (long double value, int base_10_precision, char output[]) +{ + /* Original VALUE was negative? */ + bool negative; + + /* Number of significant trigesimals. */ + int base_30_precision; + + /* Base-2 significand and exponent for original VALUE. */ + double base_2_sig; + int base_2_exp; + + /* VALUE as a set of trigesimals. */ + char buffer[DBL_DIG + 16]; + char *trigs; + int trig_cnt; + + /* Number of trigesimal places for trigs. + trigs[0] has coefficient 30**(trig_places - 1), + trigs[1] has coefficient 30**(trig_places - 2), + and so on. + In other words, the trigesimal point is just before trigs[0]. + */ + int trig_places; + + /* Number of trigesimal places left to write into BUFFER. */ + int trigs_to_output; + + init_pow_tab (); + + /* Handle special cases. */ + if (value == SYSMIS) + goto missing_value; + if (value == 0.) + goto zero; + + /* Make VALUE positive. */ + if (value < 0) + { + value = -value; + negative = true; + } + else + negative = false; + + /* Adjust VALUE to roughly 30**3, by shifting the trigesimal + point left or right as necessary. We approximate the + base-30 exponent by obtaining the base-2 exponent, then + multiplying by log30(2). This approximation is sufficient + to ensure that the adjusted VALUE is always in the range + 0...30**6, an invariant of the loop below. */ + errno = 0; + base_2_sig = frexp (value, &base_2_exp); + if (errno != 0 || !finite (base_2_sig)) + goto missing_value; + if (base_2_exp == 0 && base_2_sig == 0.) + goto zero; + if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L) + goto missing_value; + trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2; + value *= pow30 (CHUNK_SIZE - trig_places); + + /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */ + trigs = buffer; + trig_cnt = 0; + for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2); + trigs_to_output > 0; + trigs_to_output -= CHUNK_SIZE) + { + long chunk; + int trigs_left; + + /* The current chunk is just the integer part of VALUE, + truncated to the nearest integer. The chunk fits in a + long. */ + chunk = value; + assert (pow30 (CHUNK_SIZE) <= LONG_MAX); + assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE)); + + value -= chunk; + + /* Append the chunk, in base 30, to trigs[]. */ + for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0; ) + { + trigs[trig_cnt + --trigs_left] = chunk % 30; + chunk /= 30; + } + while (trigs_left > 0) + trigs[trig_cnt + --trigs_left] = 0; + trig_cnt += CHUNK_SIZE; + + /* Proceed to the next chunk. */ + if (value == 0.) + break; + value *= pow (LDBASE, CHUNK_SIZE); + } + + /* Strip leading zeros. */ + while (trig_cnt > 1 && *trigs == 0) + { + trigs++; + trig_cnt--; + trig_places--; + } + + /* Round to requested precision, conservatively estimating the + required base-30 precision as 2/3 of the base-10 precision + (log30(10) = .68). */ + assert (base_10_precision > 0); + if (base_10_precision > LDBL_DIG) + base_10_precision = LDBL_DIG; + base_30_precision = DIV_RND_UP (base_10_precision * 2, 3); + if (trig_cnt > base_30_precision) + { + if (should_round_up (trigs + base_30_precision, + trig_cnt - base_30_precision)) + { + /* Try to round up. */ + if (try_round_up (trigs, base_30_precision)) + { + /* Rounding up worked. */ + trig_cnt = base_30_precision; + } + else + { + /* Couldn't round up because we ran out of trigs to + carry into. Do the carry here instead. */ + *trigs = 1; + trig_cnt = 1; + trig_places++; + } + } + else + { + /* Round down. */ + trig_cnt = base_30_precision; + } + } + else + { + /* No rounding required: fewer digits available than + requested. */ + } + + /* Strip trailing zeros. */ + while (trig_cnt > 1 && trigs[trig_cnt - 1] == 0) + trig_cnt--; + + /* Write output. */ + if (negative) + *output++ = '-'; + if (trig_places >= -1 && trig_places < trig_cnt + 3) + { + /* Use conventional notation. */ + format_trig_digits (output, trigs, trig_cnt, trig_places); + } + else + { + /* Use scientific notation. */ + char *op; + op = format_trig_digits (output, trigs, trig_cnt, trig_cnt); + op = format_trig_int (trig_places - trig_cnt, true, op); + } + return; + + zero: + strcpy (output, "0"); + return; + + missing_value: + strcpy (output, "*."); + return; +}