1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "data/por-file-writer.h"
30 #include "data/case.h"
31 #include "data/casewriter-provider.h"
32 #include "data/casewriter.h"
33 #include "data/dictionary.h"
34 #include "data/file-handle-def.h"
35 #include "data/format.h"
36 #include "data/make-file.h"
37 #include "data/missing-values.h"
38 #include "data/short-names.h"
39 #include "data/value-labels.h"
40 #include "data/variable.h"
41 #include "libpspp/message.h"
42 #include "libpspp/misc.h"
43 #include "libpspp/str.h"
44 #include "libpspp/version.h"
46 #include "gl/minmax.h"
47 #include "gl/xalloc.h"
50 #define _(msgid) gettext (msgid)
51 #define N_(msgid) (msgid)
53 /* Maximum width of a variable in a portable file. */
54 #define MAX_POR_WIDTH 255
56 /* Portable file writer. */
59 struct file_handle *fh; /* File handle. */
60 struct fh_lock *lock; /* Lock on file handle. */
61 FILE *file; /* File stream. */
62 struct replace_file *rf; /* Ticket for replacing output file. */
64 int lc; /* Number of characters on this line so far. */
66 size_t n_vars; /* Number of variables. */
67 struct pfm_var *vars; /* Variables. */
69 int digits; /* Digits of precision. */
72 /* A variable to write to the portable file. */
75 int width; /* 0=numeric, otherwise string var width. */
76 int case_index; /* Index in case. */
79 static const struct casewriter_class por_file_casewriter_class;
81 static bool close_writer (struct pfm_writer *);
82 static void buf_write (struct pfm_writer *, const void *, size_t);
83 static void write_header (struct pfm_writer *);
84 static void write_version_data (struct pfm_writer *);
85 static void write_variables (struct pfm_writer *, struct dictionary *);
86 static void write_value_labels (struct pfm_writer *,
87 const struct dictionary *);
88 static void write_documents (struct pfm_writer *,
89 const struct dictionary *);
91 static void format_trig_double (long double, int base_10_precision, char[]);
92 static char *format_trig_int (int, bool force_sign, char[]);
94 /* Returns default options for writing a portable file. */
95 struct pfm_write_options
96 pfm_writer_default_options (void)
98 struct pfm_write_options opts;
99 opts.create_writeable = true;
100 opts.type = PFM_COMM;
101 opts.digits = DBL_DIG;
105 /* Writes the dictionary DICT to portable file HANDLE according
106 to the given OPTS. Returns nonzero only if successful. DICT
107 will not be modified, except to assign short names. */
109 pfm_open_writer (struct file_handle *fh, struct dictionary *dict,
110 struct pfm_write_options opts)
112 struct pfm_writer *w = NULL;
116 /* Initialize data structures. */
117 w = xmalloc (sizeof *w);
126 w->n_vars = dict_get_n_vars (dict);
127 w->vars = xnmalloc (w->n_vars, sizeof *w->vars);
128 for (i = 0; i < w->n_vars; i++)
130 const struct variable *dv = dict_get_var (dict, i);
131 struct pfm_var *pv = &w->vars[i];
132 pv->width = MIN (var_get_width (dv), MAX_POR_WIDTH);
133 pv->case_index = var_get_case_index (dv);
136 w->digits = opts.digits;
139 msg (ME, _("Invalid decimal digits count %d. Treating as %d."),
145 /* TRANSLATORS: this fragment will be interpolated into
146 messages in fh_lock() that identify types of files. */
147 w->lock = fh_lock (fh, FH_REF_FILE, N_("portable file"), FH_ACC_WRITE, true);
153 if (opts.create_writeable)
155 w->rf = replace_file_start (fh, "w", mode,
159 msg (ME, _("Error opening `%s' for writing as a portable file: %s."),
160 fh_get_file_name (fh), strerror (errno));
164 /* Write file header. */
166 write_version_data (w);
167 write_variables (w, dict);
168 write_value_labels (w, dict);
169 if (dict_get_document_n_lines (dict) > 0)
170 write_documents (w, dict);
171 buf_write (w, "F", 1);
172 if (ferror (w->file))
174 return casewriter_create (dict_get_proto (dict),
175 &por_file_casewriter_class, w);
182 /* Write NBYTES starting at BUF to the portable file represented by
183 H. Break lines properly every 80 characters. */
185 buf_write (struct pfm_writer *w, const void *buf_, size_t nbytes)
187 const char *buf = buf_;
189 if (ferror (w->file))
192 assert (buf != NULL);
193 while (nbytes + w->lc >= 80)
195 size_t n = 80 - w->lc;
198 fwrite (buf, n, 1, w->file);
199 fwrite ("\r\n", 2, 1, w->file);
205 fwrite (buf, nbytes, 1, w->file);
210 /* Write D to the portable file as a floating-point field. */
212 write_float (struct pfm_writer *w, double d)
215 format_trig_double (d, floor (d) == d ? DBL_DIG : w->digits, buffer);
216 buf_write (w, buffer, strlen (buffer));
218 buf_write (w, "/", 1);
221 /* Write N to the portable file as an integer field. */
223 write_int (struct pfm_writer *w, int n)
226 format_trig_int (n, false, buffer);
227 buf_write (w, buffer, strlen (buffer));
228 buf_write (w, "/", 1);
231 /* Write S to the portable file as a string field. */
233 write_string (struct pfm_writer *w, const char *s)
235 size_t n = strlen (s);
236 write_int (w, (int) n);
240 /* Write file header. */
242 write_header (struct pfm_writer *w)
244 static const char spss2ascii[256] =
246 "0000000000000000000000000000000000000000000000000000000000000000"
247 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ."
248 "<(+|&[]!$*);^-/|,%_>?`:$@'=\"000000~-0000123456789000-()0{}\\00000"
249 "0000000000000000000000000000000000000000000000000000000000000000"
253 for (i = 0; i < 5; i++)
254 buf_write (w, "ASCII SPSS PORT FILE ", 40);
256 buf_write (w, spss2ascii, 256);
257 buf_write (w, "SPSSPORT", 8);
260 /* Writes version, date, and identification records. */
262 write_version_data (struct pfm_writer *w)
268 if ((time_t) -1 == time (&t))
270 tm.tm_sec = tm.tm_min = tm.tm_hour = tm.tm_mon = tm.tm_year = 0;
275 tmp = localtime (&t);
277 char *date_str = xasprintf ("%04d%02d%02d", tmp->tm_year + 1900,
278 tmp->tm_mon + 1, tmp->tm_mday);
279 char *time_str = xasprintf ("%02d%02d%02d",
280 tmp->tm_hour, tmp->tm_min, tmp->tm_sec);
281 buf_write (w, "A", 1);
282 write_string (w, date_str);
283 write_string (w, time_str);
287 /* Product identification. */
288 buf_write (w, "1", 1);
289 write_string (w, version);
291 /* Subproduct identification. */
292 buf_write (w, "3", 1);
293 write_string (w, host_system);
296 /* Write format F to file H. The format is first resized to fit
297 a value of the given WIDTH, which is handy in case F
298 represents a string longer than 255 bytes and thus WIDTH is
299 truncated to 255 bytes. */
301 write_format (struct pfm_writer *w, struct fmt_spec f, int width)
303 fmt_resize (&f, width);
304 write_int (w, fmt_to_io (f.type));
309 /* Write value V with width WIDTH to file H. */
311 write_value (struct pfm_writer *w, const union value *v, int width)
314 write_float (w, v->f);
317 width = MIN (width, MAX_POR_WIDTH);
318 write_int (w, width);
319 buf_write (w, v->s, width);
323 /* Write variable records. */
325 write_variables (struct pfm_writer *w, struct dictionary *dict)
329 short_names_assign (dict);
331 if (dict_get_weight (dict) != NULL)
333 buf_write (w, "6", 1);
334 write_string (w, var_get_short_name (dict_get_weight (dict), 0));
337 buf_write (w, "4", 1);
338 write_int (w, dict_get_n_vars (dict));
340 buf_write (w, "5", 1);
341 write_int (w, ceil (w->digits * (log (10) / log (30))));
343 for (i = 0; i < dict_get_n_vars (dict); i++)
345 struct variable *v = dict_get_var (dict, i);
346 struct missing_values mv;
347 int width = MIN (var_get_width (v), MAX_POR_WIDTH);
350 buf_write (w, "7", 1);
351 write_int (w, width);
352 write_string (w, var_get_short_name (v, 0));
353 write_format (w, *var_get_print_format (v), width);
354 write_format (w, *var_get_write_format (v), width);
356 /* Write missing values. */
357 mv_copy (&mv, var_get_missing_values (v));
358 if (var_get_width (v) > 8)
360 if (mv_has_range (&mv))
363 mv_get_range (&mv, &x, &y);
366 buf_write (w, "9", 1);
369 else if (y == HIGHEST)
371 buf_write (w, "A", 1);
376 buf_write (w, "B", 1);
381 for (j = 0; j < mv_n_values (&mv); j++)
383 buf_write (w, "8", 1);
384 write_value (w, mv_get_value (&mv, j), mv_get_width (&mv));
388 /* Write variable label. */
389 if (var_get_label (v) != NULL)
391 buf_write (w, "C", 1);
392 write_string (w, var_get_label (v));
397 /* Write value labels to disk. FIXME: Inefficient. */
399 write_value_labels (struct pfm_writer *w, const struct dictionary *dict)
403 for (i = 0; i < dict_get_n_vars (dict); i++)
405 struct variable *v = dict_get_var (dict, i);
406 const struct val_labs *val_labs = var_get_value_labels (v);
407 size_t n_labels = val_labs_count (val_labs);
408 const struct val_lab **labels;
414 buf_write (w, "D", 1);
416 write_string (w, var_get_short_name (v, 0));
417 write_int (w, val_labs_count (val_labs));
419 n_labels = val_labs_count (val_labs);
420 labels = val_labs_sorted (val_labs);
421 for (j = 0; j < n_labels; j++)
423 const struct val_lab *vl = labels[j];
424 write_value (w, val_lab_get_value (vl), var_get_width (v));
425 write_string (w, val_lab_get_escaped_label (vl));
431 /* Write documents in DICT to portable file W. */
433 write_documents (struct pfm_writer *w, const struct dictionary *dict)
435 size_t n_lines = dict_get_document_n_lines (dict);
436 struct string line = DS_EMPTY_INITIALIZER;
439 buf_write (w, "E", 1);
440 write_int (w, n_lines);
441 for (i = 0; i < n_lines; i++)
442 write_string (w, dict_get_document_line (dict, i));
446 /* Writes case C to the portable file represented by WRITER. */
448 por_file_casewriter_write (struct casewriter *writer, void *w_,
451 struct pfm_writer *w = w_;
454 if (!ferror (w->file))
456 for (i = 0; i < w->n_vars; i++)
458 struct pfm_var *v = &w->vars[i];
461 write_float (w, case_num_idx (c, v->case_index));
464 write_int (w, v->width);
465 buf_write (w, case_str_idx (c, v->case_index), v->width);
470 casewriter_force_error (writer);
476 por_file_casewriter_destroy (struct casewriter *writer, void *w_)
478 struct pfm_writer *w = w_;
479 if (!close_writer (w))
480 casewriter_force_error (writer);
483 /* Closes a portable file after we're done with it.
484 Returns true if successful, false if an I/O error occurred. */
486 close_writer (struct pfm_writer *w)
497 memset (buf, 'Z', sizeof buf);
498 buf_write (w, buf, w->lc >= 80 ? 80 : 80 - w->lc);
500 ok = !ferror (w->file);
501 if (fclose (w->file) == EOF)
505 msg (ME, _("An I/O error occurred writing portable file `%s'."),
506 fh_get_file_name (w->fh));
508 if (ok ? !replace_file_commit (w->rf) : !replace_file_abort (w->rf))
521 /* Base-30 conversion.
523 Portable files represent numbers in base-30 format, so we need
524 to be able to convert real and integer number to that base.
525 Older versions of PSPP used libgmp to do so, but this added a
526 big library dependency to do just one thing. Now we do it
527 ourselves internally.
529 Important fact: base 30 is called "trigesimal". */
531 /* Conversion base. */
532 #define BASE 30 /* As an integer. */
533 #define LDBASE ((long double) BASE) /* As a long double. */
535 /* This is floor(log30(2**31)), the minimum number of trigesimal
536 digits that a `long int' can hold. */
539 /* pow_tab[i] = pow (30, pow (2, i)) */
540 static long double pow_tab[16];
542 /* Initializes pow_tab[]. */
546 static bool did_init = false;
550 /* Only initialize once. */
555 /* Set each element of pow_tab[] until we run out of numerical
558 for (power = 30.0L; power < DBL_MAX; power *= power)
560 assert (i < sizeof pow_tab / sizeof *pow_tab);
561 pow_tab[i++] = power;
565 /* Returns 30**EXPONENT, for 0 <= EXPONENT <= log30(DBL_MAX). */
567 pow30_nonnegative (int exponent)
572 assert (exponent >= 0);
573 assert (exponent < 1L << (sizeof pow_tab / sizeof *pow_tab));
576 for (i = 0; exponent > 0; exponent >>= 1, i++)
583 /* Returns 30**EXPONENT, for log30(DBL_MIN) <= EXPONENT <=
589 return pow30_nonnegative (exponent);
591 return 1.L / pow30_nonnegative (-exponent);
594 /* Returns the character corresponding to TRIG. */
596 trig_to_char (int trig)
598 assert (trig >= 0 && trig < 30);
599 return "0123456789ABCDEFGHIJKLMNOPQRST"[trig];
602 /* Formats the TRIG_CNT trigs in TRIGS[], writing them as
603 null-terminated STRING. The trigesimal point is inserted
604 after TRIG_PLACES characters have been printed, if necessary
605 adding extra zeros at either end for correctness. Returns the
606 character after the formatted number. */
608 format_trig_digits (char *string,
609 const char trigs[], int n_trigs, int trig_places)
614 while (trig_places++ < 0)
618 while (n_trigs-- > 0)
620 if (trig_places-- == 0)
622 *string++ = trig_to_char (*trigs++);
624 while (trig_places-- > 0)
630 /* Helper function for format_trig_int() that formats VALUE as a
631 trigesimal integer at CP. VALUE must be nonnegative.
632 Returns the character following the formatted integer. */
634 recurse_format_trig_int (char *cp, int value)
636 int trig = value % BASE;
639 cp = recurse_format_trig_int (cp, value);
640 *cp++ = trig_to_char (trig);
644 /* Formats VALUE as a trigesimal integer in null-terminated
645 STRING[]. VALUE must be in the range -DBL_MAX...DBL_MAX. If
646 FORCE_SIGN is true, a sign is always inserted; otherwise, a
647 sign is only inserted if VALUE is negative. */
649 format_trig_int (int value, bool force_sign, char string[])
660 /* Format integer. */
661 string = recurse_format_trig_int (string, value);
666 /* Determines whether the TRIG_CNT trigesimals in TRIGS[] warrant
667 rounding up or down. Returns true if TRIGS[] represents a
668 value greater than half, false if less than half. If TRIGS[]
669 is exactly half, examines TRIGS[-1] and returns true if odd,
670 false if even ("round to even"). */
672 should_round_up (const char trigs[], int n_trigs)
674 assert (n_trigs > 0);
676 if (*trigs < BASE / 2)
678 /* Less than half: round down. */
681 else if (*trigs > BASE / 2)
683 /* Greater than half: round up. */
688 /* Approximately half: look more closely. */
690 for (i = 1; i < n_trigs; i++)
693 /* Slightly greater than half: round up. */
697 /* Exactly half: round to even. */
698 return trigs[-1] % 2;
702 /* Rounds up the rightmost trig in the TRIG_CNT trigs in TRIGS[],
703 carrying to the left as necessary. Returns true if
704 successful, false on failure (due to a carry out of the
705 leftmost position). */
707 try_round_up (char *trigs, int n_trigs)
711 char *round_trig = trigs + --n_trigs;
712 if (*round_trig != BASE - 1)
714 /* Round this trig up to the next value. */
719 /* Carry over to the next trig to the left. */
723 /* Ran out of trigs to carry. */
727 /* Converts VALUE to trigesimal format in string OUTPUT[] with the
728 equivalent of at least BASE_10_PRECISION decimal digits of
729 precision. The output format may use conventional or
730 scientific notation. Missing, infinite, and extreme values
731 are represented with "*.". */
733 format_trig_double (long double value, int base_10_precision, char output[])
735 /* Original VALUE was negative? */
738 /* Number of significant trigesimals. */
739 int base_30_precision;
741 /* Base-2 significand and exponent for original VALUE. */
745 /* VALUE as a set of trigesimals. */
746 char buffer[DBL_DIG + 16];
750 /* Number of trigesimal places for trigs.
751 trigs[0] has coefficient 30**(trig_places - 1),
752 trigs[1] has coefficient 30**(trig_places - 2),
754 In other words, the trigesimal point is just before trigs[0].
758 /* Number of trigesimal places left to write into BUFFER. */
763 /* Handle special cases. */
769 /* Make VALUE positive. */
778 /* Adjust VALUE to roughly 30**3, by shifting the trigesimal
779 point left or right as necessary. We approximate the
780 base-30 exponent by obtaining the base-2 exponent, then
781 multiplying by log30(2). This approximation is sufficient
782 to ensure that the adjusted VALUE is always in the range
783 0...30**6, an invariant of the loop below. */
785 base_2_sig = frexp (value, &base_2_exp);
786 if (errno != 0 || !isfinite (base_2_sig))
788 if (base_2_exp == 0 && base_2_sig == 0.)
790 if (base_2_exp <= INT_MIN / 20379L || base_2_exp >= INT_MAX / 20379L)
792 trig_places = (base_2_exp * 20379L / 100000L) + CHUNK_SIZE / 2;
793 value *= pow30 (CHUNK_SIZE - trig_places);
795 /* Dump all the trigs to buffer[], CHUNK_SIZE at a time. */
798 for (trigs_to_output = DIV_RND_UP (DBL_DIG * 2, 3) + 1 + (CHUNK_SIZE / 2);
800 trigs_to_output -= CHUNK_SIZE)
805 /* The current chunk is just the integer part of VALUE,
806 truncated to the nearest integer. The chunk fits in a
809 assert (pow30 (CHUNK_SIZE) <= LONG_MAX);
810 assert (chunk >= 0 && chunk < pow30 (CHUNK_SIZE));
814 /* Append the chunk, in base 30, to trigs[]. */
815 for (trigs_left = CHUNK_SIZE; chunk > 0 && trigs_left > 0;)
817 trigs[n_trigs + --trigs_left] = chunk % 30;
820 while (trigs_left > 0)
821 trigs[n_trigs + --trigs_left] = 0;
822 n_trigs += CHUNK_SIZE;
824 /* Proceed to the next chunk. */
827 value *= pow (LDBASE, CHUNK_SIZE);
830 /* Strip leading zeros. */
831 while (n_trigs > 1 && *trigs == 0)
838 /* Round to requested precision, conservatively estimating the
839 required base-30 precision as 2/3 of the base-10 precision
840 (log30(10) = .68). */
841 assert (base_10_precision > 0);
842 if (base_10_precision > LDBL_DIG)
843 base_10_precision = LDBL_DIG;
844 base_30_precision = DIV_RND_UP (base_10_precision * 2, 3);
845 if (n_trigs > base_30_precision)
847 if (should_round_up (trigs + base_30_precision,
848 n_trigs - base_30_precision))
850 /* Try to round up. */
851 if (try_round_up (trigs, base_30_precision))
853 /* Rounding up worked. */
854 n_trigs = base_30_precision;
858 /* Couldn't round up because we ran out of trigs to
859 carry into. Do the carry here instead. */
868 n_trigs = base_30_precision;
873 /* No rounding required: fewer digits available than
877 /* Strip trailing zeros. */
878 while (n_trigs > 1 && trigs[n_trigs - 1] == 0)
884 if (trig_places >= -1 && trig_places < n_trigs + 3)
886 /* Use conventional notation. */
887 format_trig_digits (output, trigs, n_trigs, trig_places);
891 /* Use scientific notation. */
893 op = format_trig_digits (output, trigs, n_trigs, n_trigs);
894 op = format_trig_int (trig_places - n_trigs, true, op);
899 strcpy (output, "0");
903 strcpy (output, "*.");
907 static const struct casewriter_class por_file_casewriter_class =
909 por_file_casewriter_write,
910 por_file_casewriter_destroy,