1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 #include "sfm-write.h"
29 #include <unistd.h> /* Required by SunOS4. */
33 #include "dictionary.h"
35 #include "file-handle.h"
41 #include "value-labels.h"
45 #include "debug-print.h"
47 /* Compression bias used by PSPP. Values between (1 -
48 COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be
50 #define COMPRESSION_BIAS 100
52 /* System file writer. */
55 struct file_handle *fh; /* File handle. */
56 FILE *file; /* File stream. */
58 int needs_translation; /* 0=use fast path, 1=translation needed. */
59 int compress; /* 1=compressed, 0=not compressed. */
60 int case_cnt; /* Number of cases written so far. */
61 size_t flt64_cnt; /* Number of flt64 elements in case. */
63 /* Compression buffering. */
64 flt64 *buf; /* Buffered data. */
65 flt64 *end; /* Buffer end. */
66 flt64 *ptr; /* Current location in buffer. */
67 unsigned char *x; /* Location in current instruction octet. */
68 unsigned char *y; /* End of instruction octet. */
71 struct sfm_var *vars; /* Variables. */
72 size_t var_cnt; /* Number of variables. */
75 /* A variable in a system file. */
78 int width; /* 0=numeric, otherwise string width. */
79 int fv; /* Index into case. */
80 size_t flt64_cnt; /* Number of flt64 elements. */
83 static char *append_string_max (char *, const char *, const char *);
84 static int write_header (struct sfm_writer *, const struct dictionary *);
85 static int buf_write (struct sfm_writer *, const void *, size_t);
86 static int write_variable (struct sfm_writer *, struct variable *);
87 static int write_value_labels (struct sfm_writer *,
88 struct variable *, int idx);
89 static int write_rec_7_34 (struct sfm_writer *);
90 static int write_documents (struct sfm_writer *, const struct dictionary *);
91 static int does_dict_need_translation (const struct dictionary *);
94 var_flt64_cnt (const struct variable *v)
96 return v->type == NUMERIC ? 1 : DIV_RND_UP (v->width, sizeof (flt64));
99 /* Opens the system file designated by file handle FH for writing
100 cases from dictionary D. If COMPRESS is nonzero, the
101 system file will be compressed.
103 No reference to D is retained, so it may be modified or
104 destroyed at will after this function returns. */
106 sfm_open_writer (struct file_handle *fh,
107 const struct dictionary *d, int compress)
109 struct sfm_writer *w = NULL;
113 if (!fh_open (fh, "system file", "we"))
116 /* Create and initialize writer. */
117 w = xmalloc (sizeof *w);
119 w->file = fopen (handle_get_filename (fh), "wb");
121 w->needs_translation = does_dict_need_translation (d);
122 w->compress = compress;
126 w->buf = w->end = w->ptr = NULL;
129 w->var_cnt = dict_get_var_cnt (d);
130 w->vars = xmalloc (sizeof *w->vars * w->var_cnt);
131 for (i = 0; i < w->var_cnt; i++)
133 const struct variable *dv = dict_get_var (d, i);
134 struct sfm_var *sv = &w->vars[i];
135 sv->width = dv->width;
137 sv->flt64_cnt = var_flt64_cnt (dv);
140 /* Check that file create succeeded. */
143 msg (ME, _("Error opening \"%s\" for writing "
144 "as a system file: %s."),
145 handle_get_filename (w->fh), strerror (errno));
150 /* Write the file header. */
151 if (!write_header (w, d))
154 /* Write basic variable info. */
155 for (i = 0; i < dict_get_var_cnt (d); i++)
156 write_variable (w, dict_get_var (d, i));
158 /* Write out value labels. */
159 for (idx = i = 0; i < dict_get_var_cnt (d); i++)
161 struct variable *v = dict_get_var (d, i);
163 if (!write_value_labels (w, v, idx))
165 idx += var_flt64_cnt (v);
168 if (dict_get_documents (d) != NULL && !write_documents (w, d))
170 if (!write_rec_7_34 (w))
173 /* Write record 999. */
182 rec_999.rec_type = 999;
185 if (!buf_write (w, &rec_999, sizeof rec_999))
191 w->buf = xmalloc (sizeof *w->buf * 128);
193 w->end = &w->buf[128];
194 w->x = (unsigned char *) w->ptr++;
195 w->y = (unsigned char *) w->ptr;
201 sfm_close_writer (w);
206 does_dict_need_translation (const struct dictionary *d)
212 for (i = 0; i < dict_get_var_cnt (d); i++)
214 struct variable *v = dict_get_var (d, i);
215 if (v->fv != case_idx)
222 /* Returns value of X truncated to two least-significant digits. */
233 /* Write the sysfile_header header to system file W. */
235 write_header (struct sfm_writer *w, const struct dictionary *d)
237 struct sysfile_header hdr;
243 memcpy (hdr.rec_type, "$FL2", 4);
245 p = stpcpy (hdr.prod_name, "@(#) SPSS DATA FILE ");
246 p = append_string_max (p, version, &hdr.prod_name[60]);
247 p = append_string_max (p, " - ", &hdr.prod_name[60]);
248 p = append_string_max (p, host_system, &hdr.prod_name[60]);
249 memset (p, ' ', &hdr.prod_name[60] - p);
254 for (i = 0; i < dict_get_var_cnt (d); i++)
255 w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i));
256 hdr.case_size = w->flt64_cnt;
258 hdr.compress = w->compress;
260 if (dict_get_weight (d) != NULL)
262 struct variable *weight_var;
263 int recalc_weight_idx = 1;
266 weight_var = dict_get_weight (d);
269 struct variable *v = dict_get_var (d, i);
272 recalc_weight_idx += var_flt64_cnt (v);
274 hdr.weight_idx = recalc_weight_idx;
280 hdr.bias = COMPRESSION_BIAS;
282 if (time (&t) == (time_t) -1)
284 memcpy (hdr.creation_date, "01 Jan 70", 9);
285 memcpy (hdr.creation_time, "00:00:00", 8);
289 static const char *month_name[12] =
291 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
292 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
294 struct tm *tmp = localtime (&t);
295 int day = rerange (tmp->tm_mday);
296 int mon = rerange (tmp->tm_mon + 1);
297 int year = rerange (tmp->tm_year);
298 int hour = rerange (tmp->tm_hour + 1);
299 int min = rerange (tmp->tm_min + 1);
300 int sec = rerange (tmp->tm_sec + 1);
303 sprintf (buf, "%02d %s %02d", day, month_name[mon - 1], year);
304 memcpy (hdr.creation_date, buf, sizeof hdr.creation_date);
305 sprintf (buf, "%02d:%02d:%02d", hour - 1, min - 1, sec - 1);
306 memcpy (hdr.creation_time, buf, sizeof hdr.creation_time);
310 const char *label = dict_get_label (d);
314 st_bare_pad_copy (hdr.file_label, label, sizeof hdr.file_label);
317 memset (hdr.padding, 0, sizeof hdr.padding);
319 if (!buf_write (w, &hdr, sizeof hdr))
324 /* Translates format spec from internal form in SRC to system file
327 write_format_spec (struct fmt_spec *src, int32 *dest)
329 *dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d;
332 /* Write the variable record(s) for primary variable P and secondary
333 variable S to system file W. */
335 write_variable (struct sfm_writer *w, struct variable *v)
337 struct sysfile_variable sv;
339 /* Missing values. */
340 flt64 m[3]; /* Missing value values. */
341 int nm; /* Number of missing values, possibly negative. */
345 sv.has_var_label = (v->label != NULL);
347 switch (v->miss_type)
355 for (nm = 0; nm < v->miss_type; nm++)
356 m[nm] = v->missing[nm].f;
359 m[0] = v->missing[0].f;
360 m[1] = v->missing[1].f;
364 m[0] = second_lowest_flt64;
365 m[1] = v->missing[0].f;
369 m[0] = v->missing[0].f;
373 case MISSING_RANGE_1:
374 m[0] = v->missing[0].f;
375 m[1] = v->missing[1].f;
376 m[2] = v->missing[2].f;
380 m[0] = second_lowest_flt64;
381 m[1] = v->missing[0].f;
382 m[2] = v->missing[1].f;
386 m[0] = v->missing[0].f;
387 m[1] = second_lowest_flt64;
388 m[2] = v->missing[1].f;
396 sv.n_missing_values = nm;
397 write_format_spec (&v->print, &sv.print);
398 write_format_spec (&v->write, &sv.write);
399 memcpy (sv.name, v->name, strlen (v->name));
400 memset (&sv.name[strlen (v->name)], ' ', 8 - strlen (v->name));
401 if (!buf_write (w, &sv, sizeof sv))
415 l.label_len = min (strlen (v->label), 255);
416 ext_len = ROUND_UP (l.label_len, sizeof l.label_len);
417 memcpy (l.label, v->label, l.label_len);
418 memset (&l.label[l.label_len], ' ', ext_len - l.label_len);
420 if (!buf_write (w, &l, offsetof (struct label, label) + ext_len))
424 if (nm && !buf_write (w, m, sizeof *m * nm))
427 if (v->type == ALPHA && v->width > (int) sizeof (flt64))
433 sv.has_var_label = 0;
434 sv.n_missing_values = 0;
435 memset (&sv.print, 0, sizeof sv.print);
436 memset (&sv.write, 0, sizeof sv.write);
437 memset (&sv.name, 0, sizeof sv.name);
439 pad_count = DIV_RND_UP (v->width, (int) sizeof (flt64)) - 1;
440 for (i = 0; i < pad_count; i++)
441 if (!buf_write (w, &sv, sizeof sv))
448 /* Writes the value labels for variable V having system file variable
449 index IDX to system file W. Returns
450 nonzero only if successful. */
452 write_value_labels (struct sfm_writer *w, struct variable *v, int idx)
454 struct value_label_rec
468 struct val_labs_iterator *i;
469 struct value_label_rec *vlr;
470 struct var_idx_rec vir;
475 if (!val_labs_count (v->val_labs))
478 /* Pass 1: Count bytes. */
479 vlr_size = (sizeof (struct value_label_rec)
480 + sizeof (flt64) * (val_labs_count (v->val_labs) - 1));
481 for (vl = val_labs_first (v->val_labs, &i); vl != NULL;
482 vl = val_labs_next (v->val_labs, &i))
483 vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64));
485 /* Pass 2: Copy bytes. */
486 vlr = xmalloc (vlr_size);
488 vlr->n_labels = val_labs_count (v->val_labs);
490 for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL;
491 vl = val_labs_next (v->val_labs, &i))
493 size_t len = strlen (vl->label);
495 *loc++ = vl->value.f;
496 *(unsigned char *) loc = len;
497 memcpy (&((unsigned char *) loc)[1], vl->label, len);
498 memset (&((unsigned char *) loc)[1 + len], ' ',
499 REM_RND_UP (len + 1, sizeof (flt64)));
500 loc += DIV_RND_UP (len + 1, sizeof (flt64));
503 if (!buf_write (w, vlr, vlr_size))
512 vir.vars[0] = idx + 1;
513 if (!buf_write (w, &vir, sizeof vir))
519 /* Writes record type 6, document record. */
521 write_documents (struct sfm_writer *w, const struct dictionary *d)
525 int32 rec_type P; /* Always 6. */
526 int32 n_lines P; /* Number of lines of documents. */
530 const char *documents;
533 documents = dict_get_documents (d);
534 n_lines = strlen (documents) / 80;
537 rec_6.n_lines = n_lines;
538 if (!buf_write (w, &rec_6, sizeof rec_6))
540 if (!buf_write (w, documents, 80 * n_lines))
546 /* Writes record type 7, subtypes 3 and 4. */
548 write_rec_7_34 (struct sfm_writer *w)
565 /* Components of the version number, from major to minor. */
566 int version_component[3];
568 /* Used to step through the version string. */
571 /* Parses the version string, which is assumed to be of the form
572 #.#x, where each # is a string of digits, and x is a single
574 version_component[0] = strtol (bare_version, &p, 10);
577 version_component[1] = strtol (bare_version, &p, 10);
578 version_component[2] = (isalpha ((unsigned char) *p)
579 ? tolower ((unsigned char) *p) - 'a' : 0);
581 rec_7.rec_type_3 = 7;
583 rec_7.data_type_3 = sizeof (int32);
585 rec_7.elem_3[0] = version_component[0];
586 rec_7.elem_3[1] = version_component[1];
587 rec_7.elem_3[2] = version_component[2];
588 rec_7.elem_3[3] = -1;
590 /* PORTME: 1=IEEE754, 2=IBM 370, 3=DEC VAX E. */
597 /* PORTME: 1=big-endian, 2=little-endian. */
604 /* PORTME: 1=EBCDIC, 2=7-bit ASCII, 3=8-bit ASCII, 4=DEC Kanji. */
607 rec_7.rec_type_4 = 7;
609 rec_7.data_type_4 = sizeof (flt64);
611 rec_7.elem_4[0] = -FLT64_MAX;
612 rec_7.elem_4[1] = FLT64_MAX;
613 rec_7.elem_4[2] = second_lowest_flt64;
615 if (!buf_write (w, &rec_7, sizeof rec_7))
620 /* Write NBYTES starting at BUF to the system file represented by
623 buf_write (struct sfm_writer *w, const void *buf, size_t nbytes)
625 assert (buf != NULL);
626 if (fwrite (buf, nbytes, 1, w->file) != 1)
628 msg (ME, _("%s: Writing system file: %s."),
629 handle_get_filename (w->fh), strerror (errno));
635 /* Copies string DEST to SRC with the proviso that DEST does not reach
636 byte END; no null terminator is copied. Returns a pointer to the
637 byte after the last byte copied. */
639 append_string_max (char *dest, const char *src, const char *end)
641 int nbytes = min (end - dest, (int) strlen (src));
642 memcpy (dest, src, nbytes);
643 return dest + nbytes;
646 /* Makes certain that the compression buffer of H has room for another
647 element. If there's not room, pads out the current instruction
648 octet with zero and dumps out the buffer. */
650 ensure_buf_space (struct sfm_writer *w)
652 if (w->ptr >= w->end)
654 memset (w->x, 0, w->y - w->x);
657 if (!buf_write (w, w->buf, sizeof *w->buf * 128))
663 static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
665 /* Writes case C to system file W.
666 Returns nonzero if successful. */
668 sfm_write_case (struct sfm_writer *w, struct ccase *c)
672 if (!w->needs_translation && !w->compress
673 && sizeof (flt64) == sizeof (union value))
675 /* Fast path: external and internal representations are the
676 same and the dictionary is properly ordered. Write
678 buf_write (w, case_data_all (c), sizeof (union value) * w->flt64_cnt);
682 /* Slow path: internal and external representations differ.
683 Write into a bounce buffer, then write to W. */
689 bounce_size = sizeof *bounce * w->flt64_cnt;
690 bounce = bounce_cur = local_alloc (bounce_size);
692 for (i = 0; i < w->var_cnt; i++)
694 struct sfm_var *v = &w->vars[i];
697 *bounce_cur = case_num (c, v->fv);
699 memcpy (bounce_cur, case_data (c, v->fv)->s, v->width);
700 bounce_cur += v->flt64_cnt;
704 buf_write (w, bounce, bounce_size);
706 write_compressed_data (w, bounce);
715 put_instruction (struct sfm_writer *w, unsigned char instruction)
719 if (!ensure_buf_space (w))
721 w->x = (unsigned char *) w->ptr++;
722 w->y = (unsigned char *) w->ptr;
724 *w->x++ = instruction;
728 put_element (struct sfm_writer *w, const flt64 *elem)
730 if (!ensure_buf_space (w))
732 memcpy (w->ptr++, elem, sizeof *elem);
736 write_compressed_data (struct sfm_writer *w, const flt64 *elem)
740 for (i = 0; i < w->var_cnt; i++)
742 struct sfm_var *v = &w->vars[i];
746 if (*elem == -FLT64_MAX)
747 put_instruction (w, 255);
748 else if (*elem >= 1 - COMPRESSION_BIAS
749 && *elem <= 251 - COMPRESSION_BIAS
750 && *elem == (int) *elem)
751 put_instruction (w, (int) *elem + COMPRESSION_BIAS);
754 put_instruction (w, 253);
755 put_element (w, elem);
763 for (j = 0; j < v->flt64_cnt; j++, elem++)
765 if (!memcmp (elem, " ", sizeof (flt64)))
766 put_instruction (w, 254);
769 put_instruction (w, 253);
770 put_element (w, elem);
777 /* Closes a system file after we're done with it. */
779 sfm_close_writer (struct sfm_writer *w)
784 fh_close (w->fh, "system file", "we");
789 if (w->buf != NULL && w->ptr > w->buf)
791 memset (w->x, 0, w->y - w->x);
792 buf_write (w, w->buf, (w->ptr - w->buf) * sizeof *w->buf);
795 /* Seek back to the beginning and update the number of cases.
796 This is just a courtesy to later readers, so there's no need
797 to check return values or report errors. */
798 if (!fseek (w->file, offsetof (struct sysfile_header, case_cnt), SEEK_SET))
800 int32 case_cnt = w->case_cnt;
802 /* I don't really care about the return value: it doesn't
803 matter whether this data is written. */
804 fwrite (&case_cnt, sizeof case_cnt, 1, w->file);
807 if (fclose (w->file) == EOF)
808 msg (ME, _("%s: Closing system file: %s."),
809 handle_get_filename (w->fh), strerror (errno));