1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "por-file-writer.h"
35 #include "dictionary.h"
37 #include "file-handle-def.h"
41 #include "stat-macros.h"
43 #include "value-labels.h"
48 #define _(msgid) gettext (msgid)
50 #include "debug-print.h"
52 /* Portable file writer. */
55 struct file_handle *fh; /* File handle. */
56 FILE *file; /* File stream. */
58 int lc; /* Number of characters on this line so far. */
60 size_t var_cnt; /* Number of variables. */
61 struct pfm_var *vars; /* Variables. */
63 int digits; /* Digits of precision. */
66 /* A variable to write to the portable file. */
69 int width; /* 0=numeric, otherwise string var width. */
70 int fv; /* Starting case index. */
73 static void buf_write (struct pfm_writer *, const void *, size_t);
74 static void write_header (struct pfm_writer *);
75 static void write_version_data (struct pfm_writer *);
76 static void write_variables (struct pfm_writer *, struct dictionary *);
77 static void write_value_labels (struct pfm_writer *,
78 const struct dictionary *);
80 static void format_trig_double (long double, int base_10_precision, char[]);
81 static char *format_trig_int (int, bool force_sign, char[]);
83 /* Returns default options for writing a portable file. */
84 struct pfm_write_options
85 pfm_writer_default_options (void)
87 struct pfm_write_options opts;
88 opts.create_writeable = true;
90 opts.digits = DBL_DIG;
94 /* Writes the dictionary DICT to portable file HANDLE according
95 to the given OPTS. Returns nonzero only if successful. DICT
96 will not be modified, except to assign short names. */
98 pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
99 struct pfm_write_options opts)
101 struct pfm_writer *w = NULL;
107 mode = S_IRUSR | S_IRGRP | S_IROTH;
108 if (opts.create_writeable)
109 mode |= S_IWUSR | S_IWGRP | S_IWOTH;
110 fd = open (fh_get_filename (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
114 /* Open file handle. */
115 if (!fh_open (fh, FH_REF_FILE, "portable file", "we"))
118 /* Initialize data structures. */
119 w = xmalloc (sizeof *w);
121 w->file = fdopen (fd, "w");
132 w->var_cnt = dict_get_var_cnt (dict);
133 w->vars = xnmalloc (w->var_cnt, sizeof *w->vars);
134 for (i = 0; i < w->var_cnt; i++)
136 const struct variable *dv = dict_get_var (dict, i);
137 struct pfm_var *pv = &w->vars[i];
138 pv->width = dv->width;
142 w->digits = opts.digits;
145 msg (ME, _("Invalid decimal digits count %d. Treating as %d."),
150 /* Write file header. */
152 write_version_data (w);
153 write_variables (w, dict);
154 write_value_labels (w, dict);
155 buf_write (w, "F", 1);
156 if (pfm_write_error (w))
161 pfm_close_writer (w);
165 msg (ME, _("An error occurred while opening \"%s\" for writing "
166 "as a portable file: %s."),
167 fh_get_filename (fh), strerror (errno));
171 /* Write NBYTES starting at BUF to the portable file represented by
172 H. Break lines properly every 80 characters. */
174 buf_write (struct pfm_writer *w, const void *buf_, size_t nbytes)
176 const char *buf = buf_;
178 if (ferror (w->file))
181 assert (buf != NULL);
182 while (nbytes + w->lc >= 80)
184 size_t n = 80 - w->lc;
187 fwrite (buf, n, 1, w->file);
188 fwrite ("\r\n", 2, 1, w->file);
194 fwrite (buf, nbytes, 1, w->file);
199 /* Write D to the portable file as a floating-point field. */
201 write_float (struct pfm_writer *w, double d)
204 format_trig_double (d, floor (d) == d ? DBL_DIG : w->digits, buffer);
205 buf_write (w, buffer, strlen (buffer));
206 buf_write (w, "/", 1);
209 /* Write N to the portable file as an integer field. */
211 write_int (struct pfm_writer *w, int n)
214 format_trig_int (n, false, buffer);
215 buf_write (w, buffer, strlen (buffer));
216 buf_write (w, "/", 1);
219 /* Write S to the portable file as a string field. */
221 write_string (struct pfm_writer *w, const char *s)
223 size_t n = strlen (s);
224 write_int (w, (int) n);
228 /* Write file header. */
230 write_header (struct pfm_writer *w)
232 static const char spss2ascii[256] =
234 "0000000000000000000000000000000000000000000000000000000000000000"
235 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
236 "<(+|&[]!$*);^-/|,%_>?`:$@'=\"000000~-0000123456789000-()0{}\\00000"
237 "0000000000000000000000000000000000000000000000000000000000000000"
241 for (i = 0; i < 5; i++)
242 buf_write (w, "ASCII SPSS PORT FILE ", 40);
244 buf_write (w, spss2ascii, 256);
245 buf_write (w, "SPSSPORT", 8);
248 /* Writes version, date, and identification records. */
250 write_version_data (struct pfm_writer *w)
258 if ((time_t) -1 == time (&t))
260 tm.tm_sec = tm.tm_min = tm.tm_hour = tm.tm_mon = tm.tm_year = 0;
265 tmp = localtime (&t);
267 sprintf (date_str, "%04d%02d%02d",
268 tmp->tm_year + 1900, tmp->tm_mon + 1, tmp->tm_mday);
269 sprintf (time_str, "%02d%02d%02d", tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
270 buf_write (w, "A", 1);
271 write_string (w, date_str);
272 write_string (w, time_str);
274 /* Product identification. */
275 buf_write (w, "1", 1);
276 write_string (w, version);
278 /* Subproduct identification. */
279 buf_write (w, "3", 1);
280 write_string (w, host_system);
283 /* Write format F to file H. */
285 write_format (struct pfm_writer *w, struct fmt_spec *f)
287 write_int (w, formats[f->type].spss);
292 /* Write value V for variable VV to file H. */
294 write_value (struct pfm_writer *w, union value *v, struct variable *vv)
296 if (vv->type == NUMERIC)
297 write_float (w, v->f);
300 write_int (w, vv->width);
301 buf_write (w, v->s, vv->width);
305 /* Write variable records. */
307 write_variables (struct pfm_writer *w, struct dictionary *dict)
311 dict_assign_short_names (dict);
313 buf_write (w, "4", 1);
314 write_int (w, dict_get_var_cnt (dict));
317 for (i = 0; i < dict_get_var_cnt (dict); i++)
319 struct variable *v = dict_get_var (dict, i);
320 struct missing_values mv;
322 buf_write (w, "7", 1);
323 write_int (w, v->width);
324 write_string (w, v->short_name);
325 write_format (w, &v->print);
326 write_format (w, &v->write);
328 /* Write missing values. */
329 mv_copy (&mv, &v->miss);
330 while (mv_has_range (&mv))
333 mv_pop_range (&mv, &x, &y);
336 buf_write (w, "9", 1);
339 else if (y == HIGHEST)
341 buf_write (w, "A", 1);
346 buf_write (w, "B", 1);
351 while (mv_has_value (&mv))
354 mv_pop_value (&mv, &value);
355 buf_write (w, "8", 1);
356 write_value (w, &value, v);
361 buf_write (w, "C", 1);
362 write_string (w, v->label);
367 /* Write value labels to disk. FIXME: Inefficient. */
369 write_value_labels (struct pfm_writer *w, const struct dictionary *dict)
373 for (i = 0; i < dict_get_var_cnt (dict); i++)
375 struct val_labs_iterator *j;
376 struct variable *v = dict_get_var (dict, i);
379 if (!val_labs_count (v->val_labs))
382 buf_write (w, "D", 1);
384 write_string (w, v->short_name);
385 write_int (w, val_labs_count (v->val_labs));
387 for (vl = val_labs_first_sorted (v->val_labs, &j); vl != NULL;
388 vl = val_labs_next (v->val_labs, &j))
390 write_value (w, &vl->value, v);
391 write_string (w, vl->label);
396 /* Writes case ELEM to the portable file represented by H. */
398 pfm_write_case (struct pfm_writer *w, const struct ccase *c)
402 if (ferror (w->file))
405 for (i = 0; i < w->var_cnt; i++)
407 struct pfm_var *v = &w->vars[i];
410 write_float (w, case_num (c, v->fv));
413 write_int (w, v->width);
414 buf_write (w, case_str (c, v->fv), v->width);
418 return !pfm_write_error (w);
422 pfm_write_error (const struct pfm_writer *w)
424 return ferror (w->file);
427 /* Closes a portable file after we're done with it.
428 Returns true if successful, false if an I/O error occurred. */
430 pfm_close_writer (struct pfm_writer *w)
441 memset (buf, 'Z', sizeof buf);
442 buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc);
444 ok = !pfm_write_error (w);
445 if (fclose (w->file) == EOF)
449 msg (ME, _("An I/O error occurred writing portable file \"%s\"."),
450 fh_get_filename (w->fh));
453 fh_close (w->fh, "portable file", "we");
461 /* Base-30 conversion.
463 Portable files represent numbers in base-30 format, so we need
464 to be able to convert real and integer number to that base.
465 Older versions of PSPP used libgmp to do so, but this added a
466 big library dependency to do just one thing. Now we do it
467 ourselves internally.
469 Important fact: base 30 is called "trigesimal". */
471 /* Conversion base. */
472 #define BASE 30 /* As an integer. */
473 #define LDBASE ((long double) BASE) /* As a long double. */
475 /* This is floor(log30(2**31)), the minimum number of trigesimal
476 digits that a `long int' can hold. */
479 /* pow_tab[i] = pow (30, pow (2, i)) */
480 static long double pow_tab[16];
482 /* Initializes pow_tab[]. */
486 static bool did_init = false;
490 /* Only initialize once. */
495 /* Set each element of pow_tab[] until we run out of numerical
498 for (power = 30.0L; power < DBL_MAX; power *= power)
500 assert (i < sizeof pow_tab / sizeof *pow_tab);
501 pow_tab[i++] = power;
505 /* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */
507 pow30_nonnegative (int exponent)
512 assert (exponent >= 0);
513 assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab));
516 for (i = 0; exponent > 0; exponent >>= 1, i++)
523 /* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <=
529 return pow30_nonnegative (exponent);
531 return 1.L / pow30_nonnegative (-exponent);
534 /* Returns the character corresponding to TRIG. */
536 trig_to_char (int trig)
538 assert (trig >= 0 && trig < 30);
539 return "0123456789ABCDEFGHIJKLMNOPQRST"[trig];
542 /* Formats the TRIG_CNT trigs in TRIGS[], writing them as
543 null-terminated STRING. The trigesimal point is inserted
544 after TRIG_PLACES characters have been printed, if necessary
545 adding extra zeros at either end for correctness. Returns the
546 character after the formatted number. */
548 format_trig_digits (char *string,
549 const char trigs[], int trig_cnt, int trig_places)
554 while (trig_places++ < 0)
558 while (trig_cnt-- > 0)
560 if (trig_places-- == 0)
562 *string++ = trig_to_char (*trigs++);
564 while (trig_places-- > 0)
570 /* Helper function for format_trig_int() that formats VALUE as a
571 trigesimal integer at CP. VALUE must be nonnegative.
572 Returns the character following the formatted integer. */
574 recurse_format_trig_int (char *cp, int value)
576 int trig = value % BASE;
579 cp = recurse_format_trig_int (cp, value);
580 *cp++ = trig_to_char (trig);
584 /* Formats VALUE as a trigesimal integer in null-terminated
585 STRING[]. VALUE must be in the range -DBL_MAX...DBL_MAX. If
586 FORCE_SIGN is true, a sign is always inserted; otherwise, a
587 sign is only inserted if VALUE is negative. */
589 format_trig_int (int value, bool force_sign, char string[])
600 /* Format integer. */
601 string = recurse_format_trig_int (string, value);
606 /* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant
607 rounding up or down. Returns true if TRIGS[] represents a
608 value greater than half, false if less than half. If TRIGS[]
609 is exactly half, examines TRIGS[-1] and returns true if odd,
610 false if even ("round to even"). */
612 should_round_up (const char trigs[], int trig_cnt)
614 assert (trig_cnt > 0);
616 if (*trigs < BASE / 2)
618 /* Less than half: round down. */
621 else if (*trigs > BASE / 2)
623 /* Greater than half: round up. */
628 /* Approximately half: look more closely. */
630 for (i = 1; i < trig_cnt; i++)
633 /* Slightly greater than half: round up. */
637 /* Exactly half: round to even. */
638 return trigs[-1] % 2;
642 /* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[],
643 carrying to the left as necessary. Returns true if
644 successful, false on failure (due to a carry out of the
645 leftmost position). */
647 try_round_up (char *trigs, int trig_cnt)
651 char *round_trig = trigs + --trig_cnt;
652 if (*round_trig != BASE - 1)
654 /* Round this trig up to the next value. */
659 /* Carry over to the next trig to the left. */
663 /* Ran out of trigs to carry. */
667 /* Converts VALUE to trigesimal format in string OUTPUT[] with the
668 equivalent of at least BASE_10_PRECISION decimal digits of
669 precision. The output format may use conventional or
670 scientific notation. Missing, infinite, and extreme values
671 are represented with "*.". */
673 format_trig_double (long double value, int base_10_precision, char output[])
675 /* Original VALUE was negative? */
678 /* Number of significant trigesimals. */
679 int base_30_precision;
681 /* Base-2 significand and exponent for original VALUE. */
685 /* VALUE as a set of trigesimals. */
686 char buffer[DBL_DIG + 16];
690 /* Number of trigesimal places for trigs.
691 trigs[0] has coefficient 30**(trig_places - 1),
692 trigs[1] has coefficient 30**(trig_places - 2),
694 In other words, the trigesimal point is just before trigs[0].
698 /* Number of trigesimal places left to write into BUFFER. */
703 /* Handle special cases. */
709 /* Make VALUE positive. */
718 /* Adjust VALUE to roughly 30**3, by shifting the trigesimal
719 point left or right as necessary. We approximate the
720 base-30 exponent by obtaining the base-2 exponent, then
721 multiplying by log30(2). This approximation is sufficient
722 to ensure that the adjusted VALUE is always in the range
723 0...30**6, an invariant of the loop below. */
725 base_2_sig = frexp (value, &base_2_exp);
726 if (errno != 0 || !finite (base_2_sig))
728 if (base_2_exp == 0 && base_2_sig == 0.)
730 if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L)
732 trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2;
733 value *= pow30 (CHUNK_SIZE - trig_places);
735 /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */
738 for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2);
740 trigs_to_output -= CHUNK_SIZE)
745 /* The current chunk is just the integer part of VALUE,
746 truncated to the nearest integer. The chunk fits in a
749 assert (pow30 (CHUNK_SIZE) <= LONG_MAX);
750 assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE));
754 /* Append the chunk, in base 30, to trigs[]. */
755 for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0; )
757 trigs[trig_cnt + --trigs_left] = chunk % 30;
760 while (trigs_left > 0)
761 trigs[trig_cnt + --trigs_left] = 0;
762 trig_cnt += CHUNK_SIZE;
764 /* Proceed to the next chunk. */
767 value *= pow (LDBASE, CHUNK_SIZE);
770 /* Strip leading zeros. */
771 while (trig_cnt > 1 && *trigs == 0)
778 /* Round to requested precision, conservatively estimating the
779 required base-30 precision as 2/3 of the base-10 precision
780 (log30(10) = .68). */
781 assert (base_10_precision > 0);
782 if (base_10_precision > LDBL_DIG)
783 base_10_precision = LDBL_DIG;
784 base_30_precision = DIV_RND_UP (base_10_precision * 2, 3);
785 if (trig_cnt > base_30_precision)
787 if (should_round_up (trigs + base_30_precision,
788 trig_cnt - base_30_precision))
790 /* Try to round up. */
791 if (try_round_up (trigs, base_30_precision))
793 /* Rounding up worked. */
794 trig_cnt = base_30_precision;
798 /* Couldn't round up because we ran out of trigs to
799 carry into. Do the carry here instead. */
808 trig_cnt = base_30_precision;
813 /* No rounding required: fewer digits available than
817 /* Strip trailing zeros. */
818 while (trig_cnt > 1 && trigs[trig_cnt - 1] == 0)
824 if (trig_places >= -1 && trig_places < trig_cnt + 3)
826 /* Use conventional notation. */
827 format_trig_digits (output, trigs, trig_cnt, trig_places);
831 /* Use scientific notation. */
833 op = format_trig_digits (output, trigs, trig_cnt, trig_cnt);
834 op = format_trig_int (trig_places - trig_cnt, true, op);
839 strcpy (output, "0");
843 strcpy (output, "*.");