1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "sys-file-writer.h"
22 #include "sfm-private.h"
23 #include <libpspp/message.h>
31 #include <libpspp/alloc.h>
33 #include "dictionary.h"
34 #include <libpspp/message.h>
35 #include "file-handle-def.h"
36 #include <libpspp/hash.h>
37 #include <libpspp/magic.h>
38 #include <libpspp/misc.h>
40 #include "stat-macros.h"
41 #include <libpspp/str.h>
42 #include "value-labels.h"
44 #include <libpspp/version.h>
47 #define _(msgid) gettext (msgid)
49 /* Compression bias used by PSPP. Values between (1 -
50 COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be
52 #define COMPRESSION_BIAS 100
54 /* System file writer. */
57 struct file_handle *fh; /* File handle. */
58 FILE *file; /* File stream. */
60 int needs_translation; /* 0=use fast path, 1=translation needed. */
61 int compress; /* 1=compressed, 0=not compressed. */
62 int case_cnt; /* Number of cases written so far. */
63 size_t flt64_cnt; /* Number of flt64 elements in case. */
65 /* Compression buffering. */
66 flt64 *buf; /* Buffered data. */
67 flt64 *end; /* Buffer end. */
68 flt64 *ptr; /* Current location in buffer. */
69 unsigned char *x; /* Location in current instruction octet. */
70 unsigned char *y; /* End of instruction octet. */
73 struct sfm_var *vars; /* Variables. */
74 size_t var_cnt; /* Number of variables. */
75 size_t var_cnt_vls; /* Number of variables including
76 very long string components. */
79 /* A variable in a system file. */
82 int width; /* 0=numeric, otherwise string width. */
83 int fv; /* Index into case. */
84 size_t flt64_cnt; /* Number of flt64 elements. */
87 static char *append_string_max (char *, const char *, const char *);
88 static void write_header (struct sfm_writer *, const struct dictionary *);
89 static void buf_write (struct sfm_writer *, const void *, size_t);
90 static void write_variable (struct sfm_writer *, const struct variable *);
91 static void write_value_labels (struct sfm_writer *,
92 struct variable *, int idx);
93 static void write_rec_7_34 (struct sfm_writer *);
95 static void write_longvar_table (struct sfm_writer *w,
96 const struct dictionary *dict);
98 static void write_vls_length_table (struct sfm_writer *w,
99 const struct dictionary *dict);
102 static void write_variable_display_parameters (struct sfm_writer *w,
103 const struct dictionary *dict);
105 static void write_documents (struct sfm_writer *, const struct dictionary *);
108 var_flt64_cnt (const struct variable *v)
110 assert(sizeof(flt64) == MAX_SHORT_STRING);
111 return width_to_bytes(v->width) / MAX_SHORT_STRING ;
115 var_flt64_cnt_nom (const struct variable *v)
117 return v->type == NUMERIC ? 1 : DIV_RND_UP (v->width, sizeof (flt64));
121 /* Returns default options for writing a system file. */
122 struct sfm_write_options
123 sfm_writer_default_options (void)
125 struct sfm_write_options opts;
126 opts.create_writeable = true;
127 opts.compress = get_scompression ();
133 /* Return a short variable name to be used as the continuation of the
134 variable with the short name SN.
136 FIXME: Need to resolve clashes somehow.
140 cont_var_name(const char *sn, int idx)
142 static char s[SHORT_NAME_LEN + 1];
144 char abb[SHORT_NAME_LEN + 1 - 3]= {0};
146 strncpy(abb, sn, SHORT_NAME_LEN - 3);
148 snprintf(s, SHORT_NAME_LEN + 1, "%s%03d", abb, idx);
154 /* Opens the system file designated by file handle FH for writing
155 cases from dictionary D according to the given OPTS. If
156 COMPRESS is nonzero, the system file will be compressed.
158 No reference to D is retained, so it may be modified or
159 destroyed at will after this function returns. D is not
160 modified by this function, except to assign short names. */
162 sfm_open_writer (struct file_handle *fh, struct dictionary *d,
163 struct sfm_write_options opts)
165 struct sfm_writer *w = NULL;
172 if (opts.version != 2 && opts.version != 3)
174 msg (ME, _("Unknown system file version %d. Treating as version %d."),
180 mode = S_IRUSR | S_IRGRP | S_IROTH;
181 if (opts.create_writeable)
182 mode |= S_IWUSR | S_IWGRP | S_IWOTH;
183 fd = open (fh_get_file_name (fh), O_WRONLY | O_CREAT | O_TRUNC, mode);
187 /* Open file handle. */
188 if (!fh_open (fh, FH_REF_FILE, "system file", "we"))
191 /* Create and initialize writer. */
192 w = xmalloc (sizeof *w);
194 w->file = fdopen (fd, "w");
196 w->needs_translation = dict_compacting_would_change (d);
197 w->compress = opts.compress;
201 w->buf = w->end = w->ptr = NULL;
204 w->var_cnt = dict_get_var_cnt (d);
205 w->var_cnt_vls = w->var_cnt;
206 w->vars = xnmalloc (w->var_cnt, sizeof *w->vars);
207 for (i = 0; i < w->var_cnt; i++)
209 const struct variable *dv = dict_get_var (d, i);
210 struct sfm_var *sv = &w->vars[i];
211 sv->width = dv->width;
212 /* spss compatibility nonsense */
213 if ( dv->width > MAX_LONG_STRING )
214 sv->width = (dv->width / MAX_LONG_STRING) * (MAX_LONG_STRING + 1)
215 + (dv->width % MAX_LONG_STRING) ;
217 sv->flt64_cnt = var_flt64_cnt (dv);
220 /* Check that file create succeeded. */
227 /* Write the file header. */
230 /* Write basic variable info. */
231 dict_assign_short_names (d);
232 for (i = 0; i < dict_get_var_cnt (d); i++)
235 const struct variable *v = dict_get_var(d, i);
236 int wcount = v->width;
239 struct variable var_cont = *v;
240 if ( v->type == ALPHA)
244 mv_init(&var_cont.miss, 0);
245 strcpy(var_cont.short_name,
246 cont_var_name(v->short_name, count));
247 var_cont.label = NULL;
251 if ( wcount > MAX_LONG_STRING )
253 var_cont.width = MAX_LONG_STRING;
254 wcount -= EFFECTIVE_LONG_STRING_LENGTH;
258 var_cont.width = wcount;
259 wcount -= var_cont.width;
262 var_cont.write.w = var_cont.width;
263 var_cont.print.w = var_cont.width;
266 write_variable (w, &var_cont);
270 /* Write out value labels. */
271 for (idx = i = 0; i < dict_get_var_cnt (d); i++)
273 struct variable *v = dict_get_var (d, i);
275 write_value_labels (w, v, idx);
276 idx += var_flt64_cnt (v);
279 if (dict_get_documents (d) != NULL)
280 write_documents (w, d);
284 write_variable_display_parameters (w, d);
286 if (opts.version >= 3)
287 write_longvar_table (w, d);
289 write_vls_length_table(w, d);
291 /* Write end-of-headers record. */
300 rec_999.rec_type = 999;
303 buf_write (w, &rec_999, sizeof rec_999);
308 w->buf = xnmalloc (128, sizeof *w->buf);
310 w->end = &w->buf[128];
311 w->x = (unsigned char *) w->ptr++;
312 w->y = (unsigned char *) w->ptr;
315 if (sfm_write_error (w))
321 sfm_close_writer (w);
325 msg (ME, _("Error opening \"%s\" for writing as a system file: %s."),
326 fh_get_file_name (fh), strerror (errno));
330 /* Returns value of X truncated to two least-significant digits. */
341 /* Write the sysfile_header header to system file W. */
343 write_header (struct sfm_writer *w, const struct dictionary *d)
345 struct sysfile_header hdr;
351 memcpy (hdr.rec_type, "$FL2", 4);
353 p = stpcpy (hdr.prod_name, "@(#) SPSS DATA FILE ");
354 p = append_string_max (p, version, &hdr.prod_name[60]);
355 p = append_string_max (p, " - ", &hdr.prod_name[60]);
356 p = append_string_max (p, host_system, &hdr.prod_name[60]);
357 memset (p, ' ', &hdr.prod_name[60] - p);
362 for (i = 0; i < dict_get_var_cnt (d); i++)
364 w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i));
366 hdr.nominal_case_size = w->flt64_cnt;
368 hdr.compress = w->compress;
370 if (dict_get_weight (d) != NULL)
372 struct variable *weight_var;
373 int recalc_weight_idx = 1;
376 weight_var = dict_get_weight (d);
379 struct variable *v = dict_get_var (d, i);
382 recalc_weight_idx += var_flt64_cnt (v);
384 hdr.weight_idx = recalc_weight_idx;
390 hdr.bias = COMPRESSION_BIAS;
392 if (time (&t) == (time_t) -1)
394 memcpy (hdr.creation_date, "01 Jan 70", 9);
395 memcpy (hdr.creation_time, "00:00:00", 8);
399 static const char *month_name[12] =
401 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
402 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
404 struct tm *tmp = localtime (&t);
405 int day = rerange (tmp->tm_mday);
406 int mon = rerange (tmp->tm_mon + 1);
407 int year = rerange (tmp->tm_year);
408 int hour = rerange (tmp->tm_hour + 1);
409 int min = rerange (tmp->tm_min + 1);
410 int sec = rerange (tmp->tm_sec + 1);
413 sprintf (buf, "%02d %s %02d", day, month_name[mon - 1], year);
414 memcpy (hdr.creation_date, buf, sizeof hdr.creation_date);
415 sprintf (buf, "%02d:%02d:%02d", hour - 1, min - 1, sec - 1);
416 memcpy (hdr.creation_time, buf, sizeof hdr.creation_time);
420 const char *label = dict_get_label (d);
424 buf_copy_str_rpad (hdr.file_label, sizeof hdr.file_label, label);
427 memset (hdr.padding, 0, sizeof hdr.padding);
429 buf_write (w, &hdr, sizeof hdr);
432 /* Translates format spec from internal form in SRC to system file
435 write_format_spec (const struct fmt_spec *src, int32_t *dest)
437 assert(check_output_specifier(src, true));
438 *dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d;
441 /* Write the variable record(s) for primary variable P and secondary
442 variable S to system file W. */
444 write_variable (struct sfm_writer *w, const struct variable *v)
446 struct sysfile_variable sv;
448 /* Missing values. */
449 struct missing_values mv;
450 flt64 m[3]; /* Missing value values. */
451 int nm; /* Number of missing values, possibly negative. */
454 sv.type = min(v->width, MAX_LONG_STRING);
455 sv.has_var_label = (v->label != NULL);
457 mv_copy (&mv, &v->miss);
459 if (mv_has_range (&mv))
462 mv_pop_range (&mv, &x, &y);
463 m[nm++] = x == LOWEST ? second_lowest_flt64 : x;
464 m[nm++] = y == HIGHEST ? FLT64_MAX : y;
466 while (mv_has_value (&mv))
469 mv_pop_value (&mv, &value);
470 if (v->type == NUMERIC)
473 buf_copy_rpad ((char *) &m[nm], sizeof m[nm], value.s, v->width);
476 if (mv_has_range (&v->miss))
479 sv.n_missing_values = nm;
480 write_format_spec (&v->print, &sv.print);
481 write_format_spec (&v->write, &sv.write);
482 buf_copy_str_rpad (sv.name, sizeof sv.name, v->short_name);
483 buf_write (w, &sv, sizeof sv);
496 l.label_len = min (strlen (v->label), 255);
497 ext_len = ROUND_UP (l.label_len, sizeof l.label_len);
498 memcpy (l.label, v->label, l.label_len);
499 memset (&l.label[l.label_len], ' ', ext_len - l.label_len);
501 buf_write (w, &l, offsetof (struct label, label) + ext_len);
505 buf_write (w, m, sizeof *m * abs (nm));
507 if (v->type == ALPHA && v->width > (int) sizeof (flt64))
513 sv.has_var_label = 0;
514 sv.n_missing_values = 0;
515 memset (&sv.print, 0, sizeof sv.print);
516 memset (&sv.write, 0, sizeof sv.write);
517 memset (&sv.name, 0, sizeof sv.name);
519 pad_count = DIV_RND_UP (min(v->width, MAX_LONG_STRING),
520 (int) sizeof (flt64)) - 1;
521 for (i = 0; i < pad_count; i++)
522 buf_write (w, &sv, sizeof sv);
526 /* Writes the value labels for variable V having system file
527 variable index IDX to system file W. */
529 write_value_labels (struct sfm_writer *w, struct variable *v, int idx)
531 struct value_label_rec
545 struct val_labs_iterator *i;
546 struct value_label_rec *vlr;
547 struct var_idx_rec vir;
552 if (!val_labs_count (v->val_labs))
555 /* Pass 1: Count bytes. */
556 vlr_size = (sizeof (struct value_label_rec)
557 + sizeof (flt64) * (val_labs_count (v->val_labs) - 1));
558 for (vl = val_labs_first (v->val_labs, &i); vl != NULL;
559 vl = val_labs_next (v->val_labs, &i))
560 vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64));
562 /* Pass 2: Copy bytes. */
563 vlr = xmalloc (vlr_size);
565 vlr->n_labels = val_labs_count (v->val_labs);
567 for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL;
568 vl = val_labs_next (v->val_labs, &i))
570 size_t len = strlen (vl->label);
572 *loc++ = vl->value.f;
573 *(unsigned char *) loc = len;
574 memcpy (&((char *) loc)[1], vl->label, len);
575 memset (&((char *) loc)[1 + len], ' ',
576 REM_RND_UP (len + 1, sizeof (flt64)));
577 loc += DIV_RND_UP (len + 1, sizeof (flt64));
580 buf_write (w, vlr, vlr_size);
585 vir.vars[0] = idx + 1;
586 buf_write (w, &vir, sizeof vir);
589 /* Writes record type 6, document record. */
591 write_documents (struct sfm_writer *w, const struct dictionary *d)
595 int32_t rec_type P; /* Always 6. */
596 int32_t n_lines P; /* Number of lines of documents. */
600 const char *documents;
603 documents = dict_get_documents (d);
604 n_lines = strlen (documents) / 80;
607 rec_6.n_lines = n_lines;
608 buf_write (w, &rec_6, sizeof rec_6);
609 buf_write (w, documents, 80 * n_lines);
612 /* Write the alignment, width and scale values */
614 write_variable_display_parameters (struct sfm_writer *w,
615 const struct dictionary *dict)
627 vdp_hdr.rec_type = 7;
628 vdp_hdr.subtype = 11;
629 vdp_hdr.elem_size = 4;
630 vdp_hdr.n_elem = w->var_cnt_vls * 3;
632 buf_write (w, &vdp_hdr, sizeof vdp_hdr);
634 for ( i = 0 ; i < w->var_cnt ; ++i )
645 v = dict_get_var(dict, i);
647 params.measure = v->measure;
648 params.width = v->display_width;
649 params.align = v->alignment;
651 buf_write (w, ¶ms, sizeof(params));
653 if ( v->width > MAX_LONG_STRING )
655 int wcount = v->width - EFFECTIVE_LONG_STRING_LENGTH ;
659 params.width = wcount > MAX_LONG_STRING ? 32 : wcount;
661 buf_write (w, ¶ms, sizeof(params));
663 wcount -= EFFECTIVE_LONG_STRING_LENGTH ;
669 /* Writes the table of lengths for Very Long String Variables */
671 write_vls_length_table (struct sfm_writer *w,
672 const struct dictionary *dict)
684 struct string vls_length_map;
686 ds_init (&vls_length_map, 12 * dict_get_var_cnt (dict));
688 vls_hdr.rec_type = 7;
689 vls_hdr.subtype = 14;
690 vls_hdr.elem_size = 1;
693 for (i = 0; i < dict_get_var_cnt (dict); ++i)
695 const struct variable *v = dict_get_var (dict, i);
697 if ( v->width <= MAX_LONG_STRING )
700 ds_printf (&vls_length_map, "%s=%05d", v->short_name, v->width);
701 ds_putc (&vls_length_map, '\0');
702 ds_putc (&vls_length_map, '\t');
705 vls_hdr.n_elem = ds_length (&vls_length_map);
707 if ( vls_hdr.n_elem > 0 )
709 buf_write (w, &vls_hdr, sizeof vls_hdr);
710 buf_write (w, ds_data (&vls_length_map), ds_length (&vls_length_map));
713 ds_destroy (&vls_length_map);
716 /* Writes the long variable name table */
718 write_longvar_table (struct sfm_writer *w, const struct dictionary *dict)
729 struct string long_name_map;
732 ds_init (&long_name_map, 10 * dict_get_var_cnt (dict));
733 for (i = 0; i < dict_get_var_cnt (dict); i++)
735 struct variable *v = dict_get_var (dict, i);
738 ds_putc (&long_name_map, '\t');
739 ds_printf (&long_name_map, "%s=%s", v->short_name, v->name);
744 lv_hdr.elem_size = 1;
745 lv_hdr.n_elem = ds_length (&long_name_map);
747 buf_write (w, &lv_hdr, sizeof lv_hdr);
748 buf_write (w, ds_data (&long_name_map), ds_length (&long_name_map));
750 ds_destroy (&long_name_map);
753 /* Writes record type 7, subtypes 3 and 4. */
755 write_rec_7_34 (struct sfm_writer *w)
759 int32_t rec_type_3 P;
761 int32_t data_type_3 P;
764 int32_t rec_type_4 P;
766 int32_t data_type_4 P;
772 /* Components of the version number, from major to minor. */
773 int version_component[3];
775 /* Used to step through the version string. */
778 /* Parses the version string, which is assumed to be of the form
779 #.#x, where each # is a string of digits, and x is a single
781 version_component[0] = strtol (bare_version, &p, 10);
784 version_component[1] = strtol (bare_version, &p, 10);
785 version_component[2] = (isalpha ((unsigned char) *p)
786 ? tolower ((unsigned char) *p) - 'a' : 0);
788 rec_7.rec_type_3 = 7;
790 rec_7.data_type_3 = sizeof (int32_t);
792 rec_7.elem_3[0] = version_component[0];
793 rec_7.elem_3[1] = version_component[1];
794 rec_7.elem_3[2] = version_component[2];
795 rec_7.elem_3[3] = -1;
797 /* PORTME: 1=IEEE754, 2=IBM 370, 3=DEC VAX E. */
804 /* PORTME: 1=big-endian, 2=little-endian. */
811 /* PORTME: 1=EBCDIC, 2=7-bit ASCII, 3=8-bit ASCII, 4=DEC Kanji. */
814 rec_7.rec_type_4 = 7;
816 rec_7.data_type_4 = sizeof (flt64);
818 rec_7.elem_4[0] = -FLT64_MAX;
819 rec_7.elem_4[1] = FLT64_MAX;
820 rec_7.elem_4[2] = second_lowest_flt64;
822 buf_write (w, &rec_7, sizeof rec_7);
825 /* Write NBYTES starting at BUF to the system file represented by
828 buf_write (struct sfm_writer *w, const void *buf, size_t nbytes)
830 assert (buf != NULL);
831 fwrite (buf, nbytes, 1, w->file);
834 /* Copies string DEST to SRC with the proviso that DEST does not reach
835 byte END; no null terminator is copied. Returns a pointer to the
836 byte after the last byte copied. */
838 append_string_max (char *dest, const char *src, const char *end)
840 int nbytes = min (end - dest, (int) strlen (src));
841 memcpy (dest, src, nbytes);
842 return dest + nbytes;
845 /* Makes certain that the compression buffer of H has room for another
846 element. If there's not room, pads out the current instruction
847 octet with zero and dumps out the buffer. */
849 ensure_buf_space (struct sfm_writer *w)
851 if (w->ptr >= w->end)
853 memset (w->x, 0, w->y - w->x);
856 buf_write (w, w->buf, sizeof *w->buf * 128);
860 static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
862 /* Writes case C to system file W.
863 Returns 1 if successful, 0 if an I/O error occurred. */
865 sfm_write_case (struct sfm_writer *w, const struct ccase *c)
867 if (ferror (w->file))
872 if (!w->needs_translation && !w->compress
873 && sizeof (flt64) == sizeof (union value))
875 /* Fast path: external and internal representations are the
876 same and the dictionary is properly ordered. Write
878 buf_write (w, case_data_all (c), sizeof (union value) * w->flt64_cnt);
882 /* Slow path: internal and external representations differ.
883 Write into a bounce buffer, then write to W. */
890 bounce_size = sizeof *bounce * w->flt64_cnt;
891 bounce = bounce_cur = local_alloc (bounce_size);
892 bounce_end = bounce + bounce_size;
894 for (i = 0; i < w->var_cnt; i++)
896 struct sfm_var *v = &w->vars[i];
898 memset(bounce_cur, ' ', v->flt64_cnt * sizeof (flt64));
901 *bounce_cur = case_num (c, v->fv);
904 buf_copy_rpad((char*)bounce_cur, v->flt64_cnt * sizeof (flt64),
905 case_data(c, v->fv)->s,
908 bounce_cur += v->flt64_cnt;
912 buf_write (w, bounce, bounce_size);
914 write_compressed_data (w, bounce);
919 return !sfm_write_error (w);
923 put_instruction (struct sfm_writer *w, unsigned char instruction)
927 ensure_buf_space (w);
928 w->x = (unsigned char *) w->ptr++;
929 w->y = (unsigned char *) w->ptr;
931 *w->x++ = instruction;
935 put_element (struct sfm_writer *w, const flt64 *elem)
937 ensure_buf_space (w);
938 memcpy (w->ptr++, elem, sizeof *elem);
942 write_compressed_data (struct sfm_writer *w, const flt64 *elem)
946 for (i = 0; i < w->var_cnt; i++)
948 struct sfm_var *v = &w->vars[i];
952 if (*elem == -FLT64_MAX)
953 put_instruction (w, 255);
954 else if (*elem >= 1 - COMPRESSION_BIAS
955 && *elem <= 251 - COMPRESSION_BIAS
956 && *elem == (int) *elem)
957 put_instruction (w, (int) *elem + COMPRESSION_BIAS);
960 put_instruction (w, 253);
961 put_element (w, elem);
969 for (j = 0; j < v->flt64_cnt; j++, elem++)
971 if (!memcmp (elem, " ", sizeof (flt64)))
972 put_instruction (w, 254);
975 put_instruction (w, 253);
976 put_element (w, elem);
983 /* Returns true if an I/O error has occurred on WRITER, false otherwise. */
985 sfm_write_error (const struct sfm_writer *writer)
987 return ferror (writer->file);
990 /* Closes a system file after we're done with it.
991 Returns true if successful, false if an I/O error occurred. */
993 sfm_close_writer (struct sfm_writer *w)
1001 if (w->file != NULL)
1004 if (w->buf != NULL && w->ptr > w->buf)
1006 memset (w->x, 0, w->y - w->x);
1007 buf_write (w, w->buf, (w->ptr - w->buf) * sizeof *w->buf);
1011 ok = !sfm_write_error (w);
1013 /* Seek back to the beginning and update the number of cases.
1014 This is just a courtesy to later readers, so there's no need
1015 to check return values or report errors. */
1016 if (ok && !fseek (w->file, offsetof (struct sysfile_header, case_cnt),
1019 int32_t case_cnt = w->case_cnt;
1020 fwrite (&case_cnt, sizeof case_cnt, 1, w->file);
1024 if (fclose (w->file) == EOF)
1028 msg (ME, _("An I/O error occurred writing system file \"%s\"."),
1029 fh_get_file_name (w->fh));
1032 fh_close (w->fh, "system file", "we");