1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
18 #include "por-file-writer.h"
31 #include <data/case.h>
32 #include <data/casewriter-provider.h>
33 #include <data/casewriter.h>
34 #include <data/dictionary.h>
35 #include <data/file-handle-def.h>
36 #include <data/format.h>
37 #include <data/missing-values.h>
38 #include <data/short-names.h>
39 #include <data/value-labels.h>
40 #include <data/variable.h>
42 #include <libpspp/alloc.h>
43 #include <libpspp/hash.h>
44 #include <libpspp/magic.h>
45 #include <libpspp/message.h>
46 #include <libpspp/misc.h>
47 #include <libpspp/str.h>
48 #include <libpspp/version.h>
51 #define _(msgid) gettext (msgid)
53 /* Portable file writer. */
56 struct file_handle *fh; /* File handle. */
57 FILE *file; /* File stream. */
59 int lc; /* Number of characters on this line so far. */
61 size_t var_cnt; /* Number of variables. */
62 struct pfm_var *vars; /* Variables. */
64 int digits; /* Digits of precision. */
67 /* A variable to write to the portable file. */
70 int width; /* 0=numeric, otherwise string var width. */
71 int fv; /* Starting case index. */
74 static struct casewriter_class por_file_casewriter_class;
76 static bool close_writer (struct pfm_writer *);
77 static void buf_write (struct pfm_writer *, const void *, size_t);
78 static void write_header (struct pfm_writer *);
79 static void write_version_data (struct pfm_writer *);
80 static void write_variables (struct pfm_writer *, struct dictionary *);
81 static void write_value_labels (struct pfm_writer *,
82 const struct dictionary *);
84 static void format_trig_double (long double, int base_10_precision, char[]);
85 static char *format_trig_int (int, bool force_sign, char[]);
87 /* Returns default options for writing a portable file. */
88 struct pfm_write_options
89 pfm_writer_default_options (void)
91 struct pfm_write_options opts;
92 opts.create_writeable = true;
94 opts.digits = DBL_DIG;
98 /* Writes the dictionary DICT to portable file HANDLE according
99 to the given OPTS. Returns nonzero only if successful. DICT
100 will not be modified, except to assign short names. */
102 pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
103 struct pfm_write_options opts)
105 struct pfm_writer *w = NULL;
111 mode = S_IRUSR | S_IRGRP | S_IROTH;
112 if (opts.create_writeable)
113 mode |= S_IWUSR | S_IWGRP | S_IWOTH;
114 fd = open (fh_get_file_name (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
118 /* Open file handle. */
119 if (!fh_open (fh, FH_REF_FILE, "portable file", "we"))
122 /* Initialize data structures. */
123 w = xmalloc (sizeof *w);
125 w->file = fdopen (fd, "w");
136 w->var_cnt = dict_get_var_cnt (dict);
137 w->vars = xnmalloc (w->var_cnt, sizeof *w->vars);
138 for (i = 0; i < w->var_cnt; i++)
140 const struct variable *dv = dict_get_var (dict, i);
141 struct pfm_var *pv = &w->vars[i];
142 pv->width = var_get_width (dv);
143 pv->fv = var_get_case_index (dv);
146 w->digits = opts.digits;
149 msg (ME, _("Invalid decimal digits count %d. Treating as %d."),
154 /* Write file header. */
156 write_version_data (w);
157 write_variables (w, dict);
158 write_value_labels (w, dict);
159 buf_write (w, "F", 1);
160 if (ferror (w->file))
162 return casewriter_create (&por_file_casewriter_class, w);
169 msg (ME, _("An error occurred while opening \"%s\" for writing "
170 "as a portable file: %s."),
171 fh_get_file_name (fh), strerror (errno));
175 /* Write NBYTES starting at BUF to the portable file represented by
176 H. Break lines properly every 80 characters. */
178 buf_write (struct pfm_writer *w, const void *buf_, size_t nbytes)
180 const char *buf = buf_;
182 if (ferror (w->file))
185 assert (buf != NULL);
186 while (nbytes + w->lc >= 80)
188 size_t n = 80 - w->lc;
191 fwrite (buf, n, 1, w->file);
192 fwrite ("\r\n", 2, 1, w->file);
198 fwrite (buf, nbytes, 1, w->file);
203 /* Write D to the portable file as a floating-point field. */
205 write_float (struct pfm_writer *w, double d)
208 format_trig_double (d, floor (d) == d ? DBL_DIG : w->digits, buffer);
209 buf_write (w, buffer, strlen (buffer));
210 buf_write (w, "/", 1);
213 /* Write N to the portable file as an integer field. */
215 write_int (struct pfm_writer *w, int n)
218 format_trig_int (n, false, buffer);
219 buf_write (w, buffer, strlen (buffer));
220 buf_write (w, "/", 1);
223 /* Write S to the portable file as a string field. */
225 write_string (struct pfm_writer *w, const char *s)
227 size_t n = strlen (s);
228 write_int (w, (int) n);
232 /* Write file header. */
234 write_header (struct pfm_writer *w)
236 static const char spss2ascii[256] =
238 "0000000000000000000000000000000000000000000000000000000000000000"
239 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
240 "<(+|&[]!$*);^-/|,%_>?`:$@'=\"000000~-0000123456789000-()0{}\\00000"
241 "0000000000000000000000000000000000000000000000000000000000000000"
245 for (i = 0; i < 5; i++)
246 buf_write (w, "ASCII SPSS PORT FILE ", 40);
248 buf_write (w, spss2ascii, 256);
249 buf_write (w, "SPSSPORT", 8);
252 /* Writes version, date, and identification records. */
254 write_version_data (struct pfm_writer *w)
262 if ((time_t) -1 == time (&t))
264 tm.tm_sec = tm.tm_min = tm.tm_hour = tm.tm_mon = tm.tm_year = 0;
269 tmp = localtime (&t);
271 sprintf (date_str, "%04d%02d%02d",
272 tmp->tm_year + 1900, tmp->tm_mon + 1, tmp->tm_mday);
273 sprintf (time_str, "%02d%02d%02d", tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
274 buf_write (w, "A", 1);
275 write_string (w, date_str);
276 write_string (w, time_str);
278 /* Product identification. */
279 buf_write (w, "1", 1);
280 write_string (w, version);
282 /* Subproduct identification. */
283 buf_write (w, "3", 1);
284 write_string (w, host_system);
287 /* Write format F to file H. */
289 write_format (struct pfm_writer *w, const struct fmt_spec *f)
291 write_int (w, fmt_to_io (f->type));
296 /* Write value V for variable VV to file H. */
298 write_value (struct pfm_writer *w, union value *v, struct variable *vv)
300 if (var_is_numeric (vv))
301 write_float (w, v->f);
304 write_int (w, var_get_width (vv));
305 buf_write (w, v->s, var_get_width (vv));
309 /* Write variable records. */
311 write_variables (struct pfm_writer *w, struct dictionary *dict)
315 short_names_assign (dict);
317 buf_write (w, "4", 1);
318 write_int (w, dict_get_var_cnt (dict));
321 for (i = 0; i < dict_get_var_cnt (dict); i++)
323 struct variable *v = dict_get_var (dict, i);
324 struct missing_values mv;
326 buf_write (w, "7", 1);
327 write_int (w, var_get_width (v));
328 write_string (w, var_get_short_name (v, 0));
329 write_format (w, var_get_print_format (v));
330 write_format (w, var_get_write_format (v));
332 /* Write missing values. */
333 mv_copy (&mv, var_get_missing_values (v));
334 while (mv_has_range (&mv))
337 mv_pop_range (&mv, &x, &y);
340 buf_write (w, "9", 1);
343 else if (y == HIGHEST)
345 buf_write (w, "A", 1);
350 buf_write (w, "B", 1);
355 while (mv_has_value (&mv))
358 mv_pop_value (&mv, &value);
359 buf_write (w, "8", 1);
360 write_value (w, &value, v);
363 /* Write variable label. */
364 if (var_get_label (v) != NULL)
366 buf_write (w, "C", 1);
367 write_string (w, var_get_label (v));
372 /* Write value labels to disk. FIXME: Inefficient. */
374 write_value_labels (struct pfm_writer *w, const struct dictionary *dict)
378 for (i = 0; i < dict_get_var_cnt (dict); i++)
380 struct val_labs_iterator *j;
381 struct variable *v = dict_get_var (dict, i);
382 const struct val_labs *val_labs = var_get_value_labels (v);
385 if (val_labs == NULL)
388 buf_write (w, "D", 1);
390 write_string (w, var_get_short_name (v, 0));
391 write_int (w, val_labs_count (val_labs));
393 for (vl = val_labs_first_sorted (val_labs, &j); vl != NULL;
394 vl = val_labs_next (val_labs, &j))
396 write_value (w, &vl->value, v);
397 write_string (w, vl->label);
402 /* Writes case C to the portable file represented by H. */
404 por_file_casewriter_write (struct casewriter *writer, void *w_,
407 struct pfm_writer *w = w_;
410 if (!ferror (w->file))
412 for (i = 0; i < w->var_cnt; i++)
414 struct pfm_var *v = &w->vars[i];
417 write_float (w, case_num_idx (c, v->fv));
420 write_int (w, v->width);
421 buf_write (w, case_str_idx (c, v->fv), v->width);
426 casewriter_force_error (writer);
432 por_file_casewriter_destroy (struct casewriter *writer, void *w_)
434 struct pfm_writer *w = w_;
435 if (!close_writer (w))
436 casewriter_force_error (writer);
439 /* Closes a portable file after we're done with it.
440 Returns true if successful, false if an I/O error occurred. */
442 close_writer (struct pfm_writer *w)
453 memset (buf, 'Z', sizeof buf);
454 buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc);
456 ok = !ferror (w->file);
457 if (fclose (w->file) == EOF)
461 msg (ME, _("An I/O error occurred writing portable file \"%s\"."),
462 fh_get_file_name (w->fh));
465 fh_close (w->fh, "portable file", "we");
473 /* Base-30 conversion.
475 Portable files represent numbers in base-30 format, so we need
476 to be able to convert real and integer number to that base.
477 Older versions of PSPP used libgmp to do so, but this added a
478 big library dependency to do just one thing. Now we do it
479 ourselves internally.
481 Important fact: base 30 is called "trigesimal". */
483 /* Conversion base. */
484 #define BASE 30 /* As an integer. */
485 #define LDBASE ((long double) BASE) /* As a long double. */
487 /* This is floor(log30(2**31)), the minimum number of trigesimal
488 digits that a `long int' can hold. */
491 /* pow_tab[i] = pow (30, pow (2, i)) */
492 static long double pow_tab[16];
494 /* Initializes pow_tab[]. */
498 static bool did_init = false;
502 /* Only initialize once. */
507 /* Set each element of pow_tab[] until we run out of numerical
510 for (power = 30.0L; power < DBL_MAX; power *= power)
512 assert (i < sizeof pow_tab / sizeof *pow_tab);
513 pow_tab[i++] = power;
517 /* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */
519 pow30_nonnegative (int exponent)
524 assert (exponent >= 0);
525 assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab));
528 for (i = 0; exponent > 0; exponent >>= 1, i++)
535 /* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <=
541 return pow30_nonnegative (exponent);
543 return 1.L / pow30_nonnegative (-exponent);
546 /* Returns the character corresponding to TRIG. */
548 trig_to_char (int trig)
550 assert (trig >= 0 && trig < 30);
551 return "0123456789ABCDEFGHIJKLMNOPQRST"[trig];
554 /* Formats the TRIG_CNT trigs in TRIGS[], writing them as
555 null-terminated STRING. The trigesimal point is inserted
556 after TRIG_PLACES characters have been printed, if necessary
557 adding extra zeros at either end for correctness. Returns the
558 character after the formatted number. */
560 format_trig_digits (char *string,
561 const char trigs[], int trig_cnt, int trig_places)
566 while (trig_places++ < 0)
570 while (trig_cnt-- > 0)
572 if (trig_places-- == 0)
574 *string++ = trig_to_char (*trigs++);
576 while (trig_places-- > 0)
582 /* Helper function for format_trig_int() that formats VALUE as a
583 trigesimal integer at CP. VALUE must be nonnegative.
584 Returns the character following the formatted integer. */
586 recurse_format_trig_int (char *cp, int value)
588 int trig = value % BASE;
591 cp = recurse_format_trig_int (cp, value);
592 *cp++ = trig_to_char (trig);
596 /* Formats VALUE as a trigesimal integer in null-terminated
597 STRING[]. VALUE must be in the range -DBL_MAX...DBL_MAX. If
598 FORCE_SIGN is true, a sign is always inserted; otherwise, a
599 sign is only inserted if VALUE is negative. */
601 format_trig_int (int value, bool force_sign, char string[])
612 /* Format integer. */
613 string = recurse_format_trig_int (string, value);
618 /* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant
619 rounding up or down. Returns true if TRIGS[] represents a
620 value greater than half, false if less than half. If TRIGS[]
621 is exactly half, examines TRIGS[-1] and returns true if odd,
622 false if even ("round to even"). */
624 should_round_up (const char trigs[], int trig_cnt)
626 assert (trig_cnt > 0);
628 if (*trigs < BASE / 2)
630 /* Less than half: round down. */
633 else if (*trigs > BASE / 2)
635 /* Greater than half: round up. */
640 /* Approximately half: look more closely. */
642 for (i = 1; i < trig_cnt; i++)
645 /* Slightly greater than half: round up. */
649 /* Exactly half: round to even. */
650 return trigs[-1] % 2;
654 /* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[],
655 carrying to the left as necessary. Returns true if
656 successful, false on failure (due to a carry out of the
657 leftmost position). */
659 try_round_up (char *trigs, int trig_cnt)
663 char *round_trig = trigs + --trig_cnt;
664 if (*round_trig != BASE - 1)
666 /* Round this trig up to the next value. */
671 /* Carry over to the next trig to the left. */
675 /* Ran out of trigs to carry. */
679 /* Converts VALUE to trigesimal format in string OUTPUT[] with the
680 equivalent of at least BASE_10_PRECISION decimal digits of
681 precision. The output format may use conventional or
682 scientific notation. Missing, infinite, and extreme values
683 are represented with "*.". */
685 format_trig_double (long double value, int base_10_precision, char output[])
687 /* Original VALUE was negative? */
690 /* Number of significant trigesimals. */
691 int base_30_precision;
693 /* Base-2 significand and exponent for original VALUE. */
697 /* VALUE as a set of trigesimals. */
698 char buffer[DBL_DIG + 16];
702 /* Number of trigesimal places for trigs.
703 trigs[0] has coefficient 30**(trig_places - 1),
704 trigs[1] has coefficient 30**(trig_places - 2),
706 In other words, the trigesimal point is just before trigs[0].
710 /* Number of trigesimal places left to write into BUFFER. */
715 /* Handle special cases. */
721 /* Make VALUE positive. */
730 /* Adjust VALUE to roughly 30**3, by shifting the trigesimal
731 point left or right as necessary. We approximate the
732 base-30 exponent by obtaining the base-2 exponent, then
733 multiplying by log30(2). This approximation is sufficient
734 to ensure that the adjusted VALUE is always in the range
735 0...30**6, an invariant of the loop below. */
737 base_2_sig = frexp (value, &base_2_exp);
738 if (errno != 0 || !finite (base_2_sig))
740 if (base_2_exp == 0 && base_2_sig == 0.)
742 if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L)
744 trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2;
745 value *= pow30 (CHUNK_SIZE - trig_places);
747 /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */
750 for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2);
752 trigs_to_output -= CHUNK_SIZE)
757 /* The current chunk is just the integer part of VALUE,
758 truncated to the nearest integer. The chunk fits in a
761 assert (pow30 (CHUNK_SIZE) <= LONG_MAX);
762 assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE));
766 /* Append the chunk, in base 30, to trigs[]. */
767 for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0; )
769 trigs[trig_cnt + --trigs_left] = chunk % 30;
772 while (trigs_left > 0)
773 trigs[trig_cnt + --trigs_left] = 0;
774 trig_cnt += CHUNK_SIZE;
776 /* Proceed to the next chunk. */
779 value *= pow (LDBASE, CHUNK_SIZE);
782 /* Strip leading zeros. */
783 while (trig_cnt > 1 && *trigs == 0)
790 /* Round to requested precision, conservatively estimating the
791 required base-30 precision as 2/3 of the base-10 precision
792 (log30(10) = .68). */
793 assert (base_10_precision > 0);
794 if (base_10_precision > LDBL_DIG)
795 base_10_precision = LDBL_DIG;
796 base_30_precision = DIV_RND_UP (base_10_precision * 2, 3);
797 if (trig_cnt > base_30_precision)
799 if (should_round_up (trigs + base_30_precision,
800 trig_cnt - base_30_precision))
802 /* Try to round up. */
803 if (try_round_up (trigs, base_30_precision))
805 /* Rounding up worked. */
806 trig_cnt = base_30_precision;
810 /* Couldn't round up because we ran out of trigs to
811 carry into. Do the carry here instead. */
820 trig_cnt = base_30_precision;
825 /* No rounding required: fewer digits available than
829 /* Strip trailing zeros. */
830 while (trig_cnt > 1 && trigs[trig_cnt - 1] == 0)
836 if (trig_places >= -1 && trig_places < trig_cnt + 3)
838 /* Use conventional notation. */
839 format_trig_digits (output, trigs, trig_cnt, trig_places);
843 /* Use scientific notation. */
845 op = format_trig_digits (output, trigs, trig_cnt, trig_cnt);
846 op = format_trig_int (trig_places - trig_cnt, true, op);
851 strcpy (output, "0");
855 strcpy (output, "*.");
859 static struct casewriter_class por_file_casewriter_class =
861 por_file_casewriter_write,
862 por_file_casewriter_destroy,