1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "pfm-write.h"
32 #include "dictionary.h"
34 #include "file-handle.h"
39 #include "value-labels.h"
43 #include "debug-print.h"
45 /* Portable file writer. */
48 struct file_handle *fh; /* File handle. */
49 FILE *file; /* File stream. */
51 int lc; /* Number of characters on this line so far. */
53 size_t var_cnt; /* Number of variables. */
54 struct pfm_var *vars; /* Variables. */
57 /* A variable to write to the portable file. */
60 int width; /* 0=numeric, otherwise string var width. */
61 int fv; /* Starting case index. */
64 static int buf_write (struct pfm_writer *, const void *, size_t);
65 static int write_header (struct pfm_writer *);
66 static int write_version_data (struct pfm_writer *);
67 static int write_variables (struct pfm_writer *, struct dictionary *);
68 static int write_value_labels (struct pfm_writer *, const struct dictionary *);
70 static void format_trig_double (long double, int base_10_precision, char[]);
71 static char *format_trig_int (int, bool force_sign, char[]);
73 /* Writes the dictionary DICT to portable file HANDLE. Returns
74 nonzero only if successful. DICT will not be modified, except
75 to assign short names. */
77 pfm_open_writer (struct file_handle *fh, struct dictionary *dict)
79 struct pfm_writer *w = NULL;
82 if (!fh_open (fh, "portable file", "we"))
85 /* Open the physical disk file. */
86 w = xmalloc (sizeof *w);
88 w->file = fopen (handle_get_filename (fh), "wb");
93 /* Check that file create succeeded. */
96 msg (ME, _("An error occurred while opening \"%s\" for writing "
97 "as a portable file: %s."),
98 handle_get_filename (fh), strerror (errno));
103 w->var_cnt = dict_get_var_cnt (dict);
104 w->vars = xmalloc (sizeof *w->vars * w->var_cnt);
105 for (i = 0; i < w->var_cnt; i++)
107 const struct variable *dv = dict_get_var (dict, i);
108 struct pfm_var *pv = &w->vars[i];
109 pv->width = dv->width;
113 /* Write file header. */
114 if (!write_header (w)
115 || !write_version_data (w)
116 || !write_variables (w, dict)
117 || !write_value_labels (w, dict)
118 || !buf_write (w, "F", 1))
124 pfm_close_writer (w);
128 /* Write NBYTES starting at BUF to the portable file represented by
129 H. Break lines properly every 80 characters. */
131 buf_write (struct pfm_writer *w, const void *buf_, size_t nbytes)
133 const char *buf = buf_;
135 assert (buf != NULL);
136 while (nbytes + w->lc >= 80)
138 size_t n = 80 - w->lc;
140 if (n && fwrite (buf, n, 1, w->file) != 1)
143 if (fwrite ("\r\n", 2, 1, w->file) != 1)
151 if (nbytes && 1 != fwrite (buf, nbytes, 1, w->file))
158 msg (ME, _("%s: Writing portable file: %s."),
159 handle_get_filename (w->fh), strerror (errno));
163 /* Write D to the portable file as a floating-point field, and return
166 write_float (struct pfm_writer *w, double d)
169 format_trig_double (d, DBL_DIG, buffer);
170 return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1);
173 /* Write N to the portable file as an integer field, and return success. */
175 write_int (struct pfm_writer *w, int n)
178 format_trig_int (n, false, buffer);
179 return buf_write (w, buffer, strlen (buffer)) && buf_write (w, "/", 1);
182 /* Write S to the portable file as a string field. */
184 write_string (struct pfm_writer *w, const char *s)
186 size_t n = strlen (s);
187 return write_int (w, (int) n) && buf_write (w, s, n);
190 /* Write file header. */
192 write_header (struct pfm_writer *w)
198 for (i = 0; i < 5; i++)
199 if (!buf_write (w, "ASCII SPSS PORT FILE ", 40))
204 /* PORTME: Translation table from SPSS character code to this
205 computer's native character code (which is probably ASCII). */
206 static const unsigned char spss2ascii[256] =
208 "0000000000000000000000000000000000000000000000000000000000000000"
209 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
210 "<(+|&[]!$*);^-/|,%_>?`:$@'=\"000000~-0000123456789000-()0{}\\00000"
211 "0000000000000000000000000000000000000000000000000000000000000000"
214 if (!buf_write (w, spss2ascii, 256))
218 if (!buf_write (w, "SPSSPORT", 8))
224 /* Writes version, date, and identification records. */
226 write_version_data (struct pfm_writer *w)
228 if (!buf_write (w, "A", 1))
238 if ((time_t) -1 == time (&t))
240 tm.tm_sec = tm.tm_min = tm.tm_hour = tm.tm_mon = tm.tm_year = 0;
245 tmp = localtime (&t);
247 sprintf (date_str, "%04d%02d%02d",
248 tmp->tm_year + 1900, tmp->tm_mon + 1, tmp->tm_mday);
249 sprintf (time_str, "%02d%02d%02d", tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
250 if (!write_string (w, date_str) || !write_string (w, time_str))
254 /* Product identification. */
255 if (!buf_write (w, "1", 1) || !write_string (w, version))
258 /* Subproduct identification. */
259 if (!buf_write (w, "3", 1) || !write_string (w, host_system))
265 /* Write format F to file H, and return success. */
267 write_format (struct pfm_writer *w, struct fmt_spec *f)
269 return (write_int (w, formats[f->type].spss)
270 && write_int (w, f->w)
271 && write_int (w, f->d));
274 /* Write value V for variable VV to file H, and return success. */
276 write_value (struct pfm_writer *w, union value *v, struct variable *vv)
278 if (vv->type == NUMERIC)
279 return write_float (w, v->f);
281 return write_int (w, vv->width) && buf_write (w, v->s, vv->width);
284 /* Write variable records, and return success. */
286 write_variables (struct pfm_writer *w, struct dictionary *dict)
290 dict_assign_short_names (dict);
292 if (!buf_write (w, "4", 1) || !write_int (w, dict_get_var_cnt (dict))
293 || !write_int (w, 161))
296 for (i = 0; i < dict_get_var_cnt (dict); i++)
298 static const char *miss_types[MISSING_COUNT] =
300 "", "8", "88", "888", "B ", "9", "A", "B 8", "98", "A8",
306 struct variable *v = dict_get_var (dict, i);
308 if (!buf_write (w, "7", 1) || !write_int (w, v->width)
309 || !write_string (w, v->short_name)
310 || !write_format (w, &v->print) || !write_format (w, &v->write))
313 for (m = miss_types[v->miss_type], j = 0; j < (int) strlen (m); j++)
314 if ((m[j] != ' ' && !buf_write (w, &m[j], 1))
315 || !write_value (w, &v->missing[j], v))
318 if (v->label && (!buf_write (w, "C", 1) || !write_string (w, v->label)))
325 /* Write value labels to disk. FIXME: Inefficient. */
327 write_value_labels (struct pfm_writer *w, const struct dictionary *dict)
331 for (i = 0; i < dict_get_var_cnt (dict); i++)
333 struct val_labs_iterator *j;
334 struct variable *v = dict_get_var (dict, i);
337 if (!val_labs_count (v->val_labs))
340 if (!buf_write (w, "D", 1)
342 || !write_string (w, v->short_name)
343 || !write_int (w, val_labs_count (v->val_labs)))
346 for (vl = val_labs_first_sorted (v->val_labs, &j); vl != NULL;
347 vl = val_labs_next (v->val_labs, &j))
348 if (!write_value (w, &vl->value, v)
349 || !write_string (w, vl->label))
359 /* Writes case ELEM to the portable file represented by H. Returns
362 pfm_write_case (struct pfm_writer *w, struct ccase *c)
366 for (i = 0; i < w->var_cnt; i++)
368 struct pfm_var *v = &w->vars[i];
372 if (!write_float (w, case_num (c, v->fv)))
377 if (!write_int (w, v->width)
378 || !buf_write (w, case_str (c, v->fv), v->width))
386 /* Closes a portable file after we're done with it. */
388 pfm_close_writer (struct pfm_writer *w)
393 fh_close (w->fh, "portable file", "we");
403 memset (buf, 'Z', n);
404 buf_write (w, buf, n);
406 if (fclose (w->file) == EOF)
407 msg (ME, _("%s: Closing portable file: %s."),
408 handle_get_filename (w->fh), strerror (errno));
415 /* Base-30 conversion. */
417 /* Conversion base. */
418 #define BASE 30 /* As an integer. */
419 #define LDBASE ((long double) BASE) /* As a long double. */
421 /* This is floor(log30(2**31)), the minimum number of trigesimal
422 digits that a `long int' can hold. */
425 /* Yields the square of X. */
426 #define Q(X) ((X) * (X))
428 /* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */
430 pow30_nonnegative (int exponent)
432 /* pow_tab[i] = pow (30, pow (2, i)) */
433 static const long double pow_tab[] =
439 Q (Q (Q (Q (LDBASE)))),
440 Q (Q (Q (Q (Q (LDBASE))))),
441 Q (Q (Q (Q (Q (Q (LDBASE)))))),
442 Q (Q (Q (Q (Q (Q (Q (LDBASE))))))),
443 Q (Q (Q (Q (Q (Q (Q (Q (LDBASE)))))))),
444 Q (Q (Q (Q (Q (Q (Q (Q (Q (LDBASE))))))))),
445 Q (Q (Q (Q (Q (Q (Q (Q (Q (Q (LDBASE)))))))))),
446 Q (Q (Q (Q (Q (Q (Q (Q (Q (Q (Q (LDBASE))))))))))),
452 assert (exponent >= 0);
453 assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab));
456 for (i = 0; exponent > 0; exponent >>= 1, i++)
463 /* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <=
469 return pow30_nonnegative (exponent);
471 return 1.L / pow30_nonnegative (-exponent);
474 /* Returns the character corresponding to TRIG. */
476 trig_to_char (int trig)
478 assert (trig >= 0 && trig < 30);
479 return "0123456789ABCDEFGHIJKLMNOPQRST"[trig];
482 /* Formats the TRIG_CNT trigs in TRIGS[], writing them as
483 null-terminated STRING. The trigesimal point is inserted
484 after TRIG_PLACES characters have been printed, if necessary
485 adding extra zeros at either end for correctness. Returns the
486 character after the formatted number. */
488 format_trig_digits (char *string,
489 const char trigs[], int trig_cnt, int trig_places)
494 while (trig_places++ < 0)
498 while (trig_cnt-- > 0)
500 if (trig_places-- == 0)
502 *string++ = trig_to_char (*trigs++);
504 while (trig_places-- > 0)
510 /* Helper function for format_trig_int() that formats VALUE as a
511 trigesimal integer at CP. VALUE must be nonnegative.
512 Returns the character following the formatted integer. */
514 recurse_format_trig_int (char *cp, int value)
516 int trig = value % BASE;
519 cp = recurse_format_trig_int (cp, value);
520 *cp++ = trig_to_char (trig);
524 /* Formats VALUE as a trigesimal integer in null-terminated
525 STRING[]. VALUE must be in the range -DBL_MAX...DBL_MAX. If
526 FORCE_SIGN is true, a sign is always inserted; otherwise, a
527 sign is only inserted if VALUE is negative. */
529 format_trig_int (int value, bool force_sign, char string[])
540 /* Format integer. */
541 string = recurse_format_trig_int (string, value);
546 /* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant
547 rounding up or down. Returns true if TRIGS[] represents a
548 value greater than half, false if less than half. If TRIGS[]
549 is exactly half, examines TRIGS[-1] and returns true if odd,
550 false if even ("round to even"). */
552 should_round_up (const char trigs[], int trig_cnt)
554 assert (trig_cnt > 0);
556 if (*trigs < BASE / 2)
558 /* Less than half: round down. */
561 else if (*trigs > BASE / 2)
563 /* Greater than half: round up. */
568 /* Approximately half: look more closely. */
570 for (i = 1; i < trig_cnt; i++)
573 /* Slightly greater than half: round up. */
577 /* Exactly half: round to even. */
578 return trigs[-1] % 2;
582 /* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[],
583 carrying to the left as necessary. Returns true if
584 successful, false on failure (due to a carry out of the
585 leftmost position). */
587 try_round_up (char *trigs, int trig_cnt)
591 char *round_trig = trigs + --trig_cnt;
592 if (*round_trig != BASE - 1)
594 /* Round this trig up to the next value. */
599 /* Carry over to the next trig to the left. */
603 /* Ran out of trigs to carry. */
607 /* Converts VALUE to trigesimal format in string OUTPUT[] with the
608 equivalent of at least BASE_10_PRECISION decimal digits of
609 precision. The output format may use conventional or
610 scientific notation. Missing, infinite, and extreme values
611 are represented with "*.". */
613 format_trig_double (long double value, int base_10_precision, char output[])
615 /* Original VALUE was negative? */
618 /* Number of significant trigesimals. */
619 int base_30_precision;
621 /* Base-2 significand and exponent for original VALUE. */
625 /* VALUE as a set of trigesimals. */
626 char buffer[DBL_DIG + 16];
630 /* Number of trigesimal places for trigs.
631 trigs[0] has coefficient 30**(trig_places - 1),
632 trigs[1] has coefficient 30**(trig_places - 2),
634 In other words, the trigesimal point is just before trigs[0].
638 /* Number of trigesimal places left to write into BUFFER. */
641 /* Handle special cases. */
647 /* Make VALUE positive. */
656 /* Adjust VALUE to roughly 30**3, by shifting the trigesimal
657 point left or right as necessary. We approximate the
658 base-30 exponent by obtaining the base-2 exponent, then
659 multiplying by log30(2). This approximation is sufficient
660 to ensure that the adjusted VALUE is always in the range
661 0...30**6, an invariant of the loop below. */
663 base_2_sig = frexp (value, &base_2_exp);
664 if (errno != 0 || !finite (base_2_sig))
666 if (base_2_exp == 0 && base_2_sig == 0.)
668 if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L)
670 trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2;
671 value *= pow30 (CHUNK_SIZE - trig_places);
673 /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */
676 for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2);
678 trigs_to_output -= CHUNK_SIZE)
683 /* The current chunk is just the integer part of VALUE,
684 truncated to the nearest integer. The chunk fits in a
687 assert (pow30 (CHUNK_SIZE) <= LONG_MAX);
688 assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE));
692 /* Append the chunk, in base 30, to trigs[]. */
693 for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0; )
695 trigs[trig_cnt + --trigs_left] = chunk % 30;
698 while (trigs_left > 0)
699 trigs[trig_cnt + --trigs_left] = 0;
700 trig_cnt += CHUNK_SIZE;
702 /* Proceed to the next chunk. */
705 value *= pow (LDBASE, CHUNK_SIZE);
708 /* Strip leading zeros. */
709 while (trig_cnt > 1 && *trigs == 0)
716 /* Round to requested precision, conservatively estimating the
717 required base-30 precision as 2/3 of the base-10 precision
718 (log30(10) = .68). */
719 assert (base_10_precision > 0);
720 base_30_precision = DIV_RND_UP (base_10_precision * 2, 3);
721 if (trig_cnt > base_30_precision)
723 if (should_round_up (trigs + base_30_precision,
724 trig_cnt - base_30_precision))
726 /* Try to round up. */
727 if (try_round_up (trigs, base_30_precision))
729 /* Rounding up worked. */
730 trig_cnt = base_30_precision;
734 /* Couldn't round up because we ran out of trigs to
735 carry into. Do the carry here instead. */
744 trig_cnt = base_30_precision;
749 /* No rounding required: fewer digits available than
753 /* Strip trailing zeros. */
754 while (trig_cnt > 1 && trigs[trig_cnt - 1] == 0)
760 if (trig_places >= -1 && trig_places < trig_cnt + 3)
762 /* Use conventional notation. */
763 format_trig_digits (output, trigs, trig_cnt, trig_places);
767 /* Use scientific notation. */
769 op = format_trig_digits (output, trigs, trig_cnt, trig_cnt);
770 op = format_trig_int (trig_places - trig_cnt, true, op);
775 strcpy (output, "0");
779 strcpy (output, "*.");