1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
46 #include <unistd.h> /* Required by SunOS4. */
51 #include "file-handle.h"
57 #include "value-labels.h"
61 #include "debug-print.h"
63 /* PORTME: This file may require substantial revision for those
64 systems that don't meet the typical 32-bit integer/64-bit double
65 model. It's kinda hard to tell without having one of them on my
68 /* Compression bias used by PSPP. Values between (1 -
69 COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be
71 #define COMPRESSION_BIAS 100
73 /* sfm writer file_handle extension. */
76 FILE *file; /* Actual file. */
78 int compressed; /* 1=compressed, 0=not compressed. */
79 flt64 *buf; /* Buffered data. */
80 flt64 *end; /* Buffer end. */
81 flt64 *ptr; /* Current location in buffer. */
82 unsigned char *x; /* Location in current instruction octet. */
83 unsigned char *y; /* End of instruction octet. */
84 int n_cases; /* Number of cases written so far. */
86 char *elem_type; /* ALPHA or NUMERIC for each flt64 element. */
89 static struct fh_ext_class sfm_w_class;
91 static char *append_string_max (char *, const char *, const char *);
92 static int write_header (struct sfm_write_info *inf);
93 static int bufwrite (struct file_handle *h, const void *buf, size_t nbytes);
94 static int write_variable (struct sfm_write_info *inf, struct variable *v);
95 static int write_value_labels (struct sfm_write_info *inf, struct variable * s, int index);
96 static int write_rec_7_34 (struct sfm_write_info *inf);
97 static int write_documents (struct sfm_write_info *inf);
99 /* Writes the dictionary INF->dict to system file INF->h. The system
100 file is compressed if INF->compress is nonzero. INF->case_size is
101 set to the number of flt64 elements in a single case. Returns
102 nonzero only if successful. */
104 sfm_write_dictionary (struct sfm_write_info *inf)
106 struct dictionary *d = inf->dict;
107 struct sfm_fhuser_ext *ext;
111 if (inf->h->class != NULL)
113 msg (ME, _("Cannot write file %s as system file: already opened for %s."),
114 fh_handle_name (inf->h), inf->h->class->name);
118 msg (VM (1), _("%s: Opening system-file handle %s for writing."),
119 fh_handle_filename (inf->h), fh_handle_name (inf->h));
121 /* Open the physical disk file. */
122 inf->h->class = &sfm_w_class;
123 inf->h->ext = ext = xmalloc (sizeof (struct sfm_fhuser_ext));
124 ext->file = fopen (inf->h->norm_fn, "wb");
125 ext->elem_type = NULL;
126 if (ext->file == NULL)
128 msg (ME, _("An error occurred while opening \"%s\" for writing "
129 "as a system file: %s."), inf->h->fn, strerror (errno));
135 /* Initialize the sfm_fhuser_ext structure. */
136 ext->compressed = inf->compress;
137 ext->buf = ext->ptr = NULL;
138 ext->x = ext->y = NULL;
141 /* Write the file header. */
142 if (!write_header (inf))
145 /* Write basic variable info. */
146 for (i = 0; i < dict_get_var_cnt (d); i++)
147 write_variable (inf, dict_get_var (d, i));
149 /* Write out value labels. */
150 for (index = i = 0; i < dict_get_var_cnt (d); i++)
152 struct variable *v = dict_get_var (d, i);
154 if (!write_value_labels (inf, v, index))
156 index += (v->type == NUMERIC ? 1
157 : DIV_RND_UP (v->width, sizeof (flt64)));
160 if (dict_get_documents (d) != NULL && !write_documents (inf))
162 if (!write_rec_7_34 (inf))
165 /* Write record 999. */
174 rec_999.rec_type = 999;
177 if (!bufwrite (inf->h, &rec_999, sizeof rec_999))
181 msg (VM (2), _("Wrote system-file header successfully."));
186 msg (VM (1), _("Error writing system-file header."));
188 inf->h->class = NULL;
190 free (ext->elem_type);
191 ext->elem_type = NULL;
195 /* Returns value of X truncated to two least-significant digits. */
206 /* Write the sysfile_header header to the system file represented by
209 write_header (struct sfm_write_info *inf)
211 struct dictionary *d = inf->dict;
212 struct sfm_fhuser_ext *ext = inf->h->ext;
213 struct sysfile_header hdr;
219 memcpy (hdr.rec_type, "$FL2", 4);
221 p = stpcpy (hdr.prod_name, "@(#) SPSS DATA FILE ");
222 p = append_string_max (p, version, &hdr.prod_name[60]);
223 p = append_string_max (p, " - ", &hdr.prod_name[60]);
224 p = append_string_max (p, host_system, &hdr.prod_name[60]);
225 memset (p, ' ', &hdr.prod_name[60] - p);
230 for (i = 0; i < dict_get_var_cnt (d); i++)
232 struct variable *v = dict_get_var (d, i);
233 hdr.case_size += (v->type == NUMERIC ? 1
234 : DIV_RND_UP (v->width, sizeof (flt64)));
236 inf->case_size = hdr.case_size;
238 p = ext->elem_type = xmalloc (inf->case_size);
239 for (i = 0; i < dict_get_var_cnt (d); i++)
241 struct variable *v = dict_get_var (d, i);
242 int count = (v->type == NUMERIC ? 1
243 : DIV_RND_UP (v->width, sizeof (flt64)));
248 hdr.compressed = inf->compress;
250 if (dict_get_weight (d) != NULL)
252 struct variable *weight_var;
253 int recalc_weight_index = 1;
256 weight_var = dict_get_weight (d);
259 struct variable *v = dict_get_var (d, i);
262 recalc_weight_index += (v->type == NUMERIC ? 1
263 : DIV_RND_UP (v->width, sizeof (flt64)));
265 hdr.weight_index = recalc_weight_index;
268 hdr.weight_index = 0;
271 hdr.bias = COMPRESSION_BIAS;
273 if ((time_t) - 1 == time (&t))
275 memcpy (hdr.creation_date, "01 Jan 70", 9);
276 memcpy (hdr.creation_time, "00:00:00", 8);
280 static const char *month_name[12] =
282 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
283 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
285 struct tm *tmp = localtime (&t);
286 int day = rerange (tmp->tm_mday);
287 int mon = rerange (tmp->tm_mon + 1);
288 int year = rerange (tmp->tm_year);
289 int hour = rerange (tmp->tm_hour + 1);
290 int min = rerange (tmp->tm_min + 1);
291 int sec = rerange (tmp->tm_sec + 1);
294 sprintf (buf, "%02d %s %02d", day, month_name[mon - 1], year);
295 memcpy (hdr.creation_date, buf, sizeof hdr.creation_date);
296 sprintf (buf, "%02d:%02d:%02d", hour - 1, min - 1, sec - 1);
297 memcpy (hdr.creation_time, buf, sizeof hdr.creation_time);
301 const char *label = dict_get_label (d);
305 st_bare_pad_copy (hdr.file_label, label, sizeof hdr.file_label);
308 memset (hdr.padding, 0, sizeof hdr.padding);
310 if (!bufwrite (inf->h, &hdr, sizeof hdr))
315 /* Translates format spec from internal form in SRC to system file
318 write_format_spec (struct fmt_spec *src, int32 *dest)
320 *dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d;
323 /* Write the variable record(s) for primary variable P and secondary
324 variable S to the system file represented by INF. */
326 write_variable (struct sfm_write_info *inf, struct variable *v)
328 struct sysfile_variable sv;
330 /* Missing values. */
331 flt64 m[3]; /* Missing value values. */
332 int nm; /* Number of missing values, possibly negative. */
335 sv.type = (v->type == NUMERIC ? 0 : v->width);
336 sv.has_var_label = (v->label != NULL);
338 switch (v->miss_type)
346 for (nm = 0; nm < v->miss_type; nm++)
347 m[nm] = v->missing[nm].f;
350 m[0] = v->missing[0].f;
351 m[1] = v->missing[1].f;
355 m[0] = second_lowest_flt64;
356 m[1] = v->missing[0].f;
360 m[0] = v->missing[0].f;
364 case MISSING_RANGE_1:
365 m[0] = v->missing[0].f;
366 m[1] = v->missing[1].f;
367 m[2] = v->missing[2].f;
371 m[0] = second_lowest_flt64;
372 m[1] = v->missing[0].f;
373 m[2] = v->missing[1].f;
377 m[0] = v->missing[0].f;
378 m[1] = second_lowest_flt64;
379 m[2] = v->missing[1].f;
386 sv.n_missing_values = nm;
387 write_format_spec (&v->print, &sv.print);
388 write_format_spec (&v->write, &sv.write);
389 memcpy (sv.name, v->name, strlen (v->name));
390 memset (&sv.name[strlen (v->name)], ' ', 8 - strlen (v->name));
391 if (!bufwrite (inf->h, &sv, sizeof sv))
405 l.label_len = min (strlen (v->label), 120);
406 ext_len = ROUND_UP (l.label_len, sizeof l.label_len);
407 memcpy (l.label, v->label, l.label_len);
408 memset (&l.label[l.label_len], ' ', ext_len - l.label_len);
410 if (!bufwrite (inf->h, &l, offsetof (struct label, label) + ext_len))
414 if (nm && !bufwrite (inf->h, m, sizeof *m * nm))
417 if (v->type == ALPHA && v->width > (int) sizeof (flt64))
423 sv.has_var_label = 0;
424 sv.n_missing_values = 0;
425 memset (&sv.print, 0, sizeof sv.print);
426 memset (&sv.write, 0, sizeof sv.write);
427 memset (&sv.name, 0, sizeof sv.name);
429 pad_count = DIV_RND_UP (v->width, (int) sizeof (flt64)) - 1;
430 for (i = 0; i < pad_count; i++)
431 if (!bufwrite (inf->h, &sv, sizeof sv))
438 /* Writes the value labels for variable V having system file variable
439 index INDEX to the system file associated with INF. Returns
440 nonzero only if successful. */
442 write_value_labels (struct sfm_write_info * inf, struct variable *v, int index)
444 struct value_label_rec
451 struct variable_index_rec
458 struct val_labs_iterator *i;
459 struct value_label_rec *vlr;
460 struct variable_index_rec vir;
465 if (!val_labs_count (v->val_labs))
468 /* Pass 1: Count bytes. */
469 vlr_size = (sizeof (struct value_label_rec)
470 + sizeof (flt64) * (val_labs_count (v->val_labs) - 1));
471 for (vl = val_labs_first (v->val_labs, &i); vl != NULL;
472 vl = val_labs_next (v->val_labs, &i))
473 vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64));
475 /* Pass 2: Copy bytes. */
476 vlr = xmalloc (vlr_size);
478 vlr->n_labels = val_labs_count (v->val_labs);
480 for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL;
481 vl = val_labs_next (v->val_labs, &i))
483 size_t len = strlen (vl->label);
485 *loc++ = vl->value.f;
486 *(unsigned char *) loc = len;
487 memcpy (&((unsigned char *) loc)[1], vl->label, len);
488 memset (&((unsigned char *) loc)[1 + len], ' ',
489 REM_RND_UP (len + 1, sizeof (flt64)));
490 loc += DIV_RND_UP (len + 1, sizeof (flt64));
493 if (!bufwrite (inf->h, vlr, vlr_size))
502 vir.vars[0] = index + 1;
503 if (!bufwrite (inf->h, &vir, sizeof vir))
509 /* Writes record type 6, document record. */
511 write_documents (struct sfm_write_info * inf)
513 struct dictionary *d = inf->dict;
516 int32 rec_type P; /* Always 6. */
517 int32 n_lines P; /* Number of lines of documents. */
521 const char *documents;
524 documents = dict_get_documents (d);
525 n_lines = strlen (documents) / 80;
528 rec_6.n_lines = n_lines;
529 if (!bufwrite (inf->h, &rec_6, sizeof rec_6))
531 if (!bufwrite (inf->h, documents, 80 * n_lines))
537 /* Writes record type 7, subtypes 3 and 4. */
539 write_rec_7_34 (struct sfm_write_info * inf)
556 /* Components of the version number, from major to minor. */
557 int version_component[3];
559 /* Used to step through the version string. */
562 /* Parses the version string, which is assumed to be of the form
563 #.#x, where each # is a string of digits, and x is a single
565 version_component[0] = strtol (bare_version, &p, 10);
568 version_component[1] = strtol (bare_version, &p, 10);
569 version_component[2] = (isalpha ((unsigned char) *p)
570 ? tolower ((unsigned char) *p) - 'a' : 0);
572 rec_7.rec_type_3 = 7;
574 rec_7.data_type_3 = sizeof (int32);
576 rec_7.elem_3[0] = version_component[0];
577 rec_7.elem_3[1] = version_component[1];
578 rec_7.elem_3[2] = version_component[2];
579 rec_7.elem_3[3] = -1;
581 /* PORTME: 1=IEEE754, 2=IBM 370, 3=DEC VAX E. */
588 /* PORTME: 1=big-endian, 2=little-endian. */
595 /* PORTME: 1=EBCDIC, 2=7-bit ASCII, 3=8-bit ASCII, 4=DEC Kanji. */
598 rec_7.rec_type_4 = 7;
600 rec_7.data_type_4 = sizeof (flt64);
602 rec_7.elem_4[0] = -FLT64_MAX;
603 rec_7.elem_4[1] = FLT64_MAX;
604 rec_7.elem_4[2] = second_lowest_flt64;
606 if (!bufwrite (inf->h, &rec_7, sizeof rec_7))
611 /* Write NBYTES starting at BUF to the system file represented by
614 bufwrite (struct file_handle * h, const void *buf, size_t nbytes)
616 struct sfm_fhuser_ext *ext = h->ext;
619 if (1 != fwrite (buf, nbytes, 1, ext->file))
621 msg (ME, _("%s: Writing system file: %s."), h->fn, strerror (errno));
627 /* Copies string DEST to SRC with the proviso that DEST does not reach
628 byte END; no null terminator is copied. Returns a pointer to the
629 byte after the last byte copied. */
631 append_string_max (char *dest, const char *src, const char *end)
633 int nbytes = min (end - dest, (int) strlen (src));
634 memcpy (dest, src, nbytes);
635 return dest + nbytes;
638 /* Makes certain that the compression buffer of H has room for another
639 element. If there's not room, pads out the current instruction
640 octet with zero and dumps out the buffer. */
642 ensure_buf_space (struct file_handle *h)
644 struct sfm_fhuser_ext *ext = h->ext;
646 if (ext->ptr >= ext->end)
648 memset (ext->x, 0, ext->y - ext->x);
651 if (!bufwrite (h, ext->buf, sizeof *ext->buf * 128))
657 /* Writes case ELEM consisting of N_ELEM flt64 elements to the system
658 file represented by H. Return success. */
660 sfm_write_case (struct file_handle * h, const flt64 *elem, int n_elem)
662 struct sfm_fhuser_ext *ext = h->ext;
663 const flt64 *end_elem = &elem[n_elem];
664 char *elem_type = ext->elem_type;
668 if (ext->compressed == 0)
669 return bufwrite (h, elem, sizeof *elem * n_elem);
671 if (ext->buf == NULL)
673 ext->buf = xmalloc (sizeof *ext->buf * 128);
675 ext->end = &ext->buf[128];
676 ext->x = (unsigned char *) (ext->ptr++);
677 ext->y = (unsigned char *) (ext->ptr);
679 for (; elem < end_elem; elem++, elem_type++)
681 if (ext->x >= ext->y)
683 if (!ensure_buf_space (h))
685 ext->x = (unsigned char *) (ext->ptr++);
686 ext->y = (unsigned char *) (ext->ptr);
689 if (*elem_type == NUMERIC)
691 if (*elem == -FLT64_MAX)
698 int value = *elem < 0 ? *elem - EPSILON : *elem + EPSILON;
700 if (value >= 1 - COMPRESSION_BIAS
701 && value <= 251 - COMPRESSION_BIAS
702 && approx_eq (value, *elem))
704 *ext->x++ = value + COMPRESSION_BIAS;
711 if (0 == memcmp ((char *) elem,
721 if (!ensure_buf_space (h))
729 /* Closes a system file after we're done with it. */
731 sfm_close (struct file_handle * h)
733 struct sfm_fhuser_ext *ext = h->ext;
735 if (ext->buf != NULL && ext->ptr > ext->buf)
737 memset (ext->x, 0, ext->y - ext->x);
738 bufwrite (h, ext->buf, (ext->ptr - ext->buf) * sizeof *ext->buf);
741 /* Attempt to seek back to the beginning in order to write the
742 number of cases. If that's not possible (i.e., we're writing to
743 a tty or a pipe), then it's not a big deal because we wrote the
744 code that indicates an unknown number of cases. */
745 if (0 == fseek (ext->file, offsetof (struct sysfile_header, ncases),
748 int32 n_cases = ext->n_cases;
750 /* I don't really care about the return value: it doesn't matter
751 whether this data is written. This is the only situation in
752 which you will see me fail to check a return value. */
753 fwrite (&n_cases, sizeof n_cases, 1, ext->file);
756 if (EOF == fclose (ext->file))
757 msg (ME, _("%s: Closing system file: %s."), h->fn, strerror (errno));
760 free (ext->elem_type);
764 static struct fh_ext_class sfm_w_class =
767 N_("writing as a system file"),