1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
21 #include "sfm-write.h"
29 #include <unistd.h> /* Required by SunOS4. */
33 #include "dictionary.h"
35 #include "file-handle.h"
41 #include "value-labels.h"
45 #include "debug-print.h"
47 /* Compression bias used by PSPP. Values between (1 -
48 COMPRESSION_BIAS) and (251 - COMPRESSION_BIAS) inclusive can be
50 #define COMPRESSION_BIAS 100
52 /* System file writer. */
55 struct file_handle *fh; /* File handle. */
56 FILE *file; /* File stream. */
58 int needs_translation; /* 0=use fast path, 1=translation needed. */
59 int compress; /* 1=compressed, 0=not compressed. */
60 int case_cnt; /* Number of cases written so far. */
61 size_t flt64_cnt; /* Number of flt64 elements in case. */
63 /* Compression buffering. */
64 flt64 *buf; /* Buffered data. */
65 flt64 *end; /* Buffer end. */
66 flt64 *ptr; /* Current location in buffer. */
67 unsigned char *x; /* Location in current instruction octet. */
68 unsigned char *y; /* End of instruction octet. */
71 struct sfm_var *vars; /* Variables. */
72 size_t var_cnt; /* Number of variables. */
75 /* A variable in a system file. */
78 int width; /* 0=numeric, otherwise string width. */
79 int fv; /* Index into case. */
80 size_t flt64_cnt; /* Number of flt64 elements. */
83 static char *append_string_max (char *, const char *, const char *);
84 static int write_header (struct sfm_writer *, const struct dictionary *);
85 static int buf_write (struct sfm_writer *, const void *, size_t);
86 static int write_variable (struct sfm_writer *, struct variable *);
87 static int write_value_labels (struct sfm_writer *,
88 struct variable *, int idx);
89 static int write_rec_7_34 (struct sfm_writer *);
91 static int write_longvar_table (struct sfm_writer *w,
92 const struct dictionary *dict);
94 static int write_variable_display_parameters (struct sfm_writer *w,
95 const struct dictionary *dict);
98 static int write_documents (struct sfm_writer *, const struct dictionary *);
99 static int does_dict_need_translation (const struct dictionary *);
102 var_flt64_cnt (const struct variable *v)
104 return v->type == NUMERIC ? 1 : DIV_RND_UP (v->width, sizeof (flt64));
107 /* Opens the system file designated by file handle FH for writing
108 cases from dictionary D. If COMPRESS is nonzero, the
109 system file will be compressed. If OMIT_LONGNAMES is nonzero, the
110 long name table will be omitted.
112 No reference to D is retained, so it may be modified or
113 destroyed at will after this function returns. */
115 sfm_open_writer (struct file_handle *fh,
116 const struct dictionary *d, int compress,
117 short omit_longnames)
119 struct sfm_writer *w = NULL;
123 if (!fh_open (fh, "system file", "we"))
126 /* Create and initialize writer. */
127 w = xmalloc (sizeof *w);
129 w->file = fopen (handle_get_filename (fh), "wb");
131 w->needs_translation = does_dict_need_translation (d);
132 w->compress = compress;
136 w->buf = w->end = w->ptr = NULL;
139 w->var_cnt = dict_get_var_cnt (d);
140 w->vars = xmalloc (sizeof *w->vars * w->var_cnt);
141 for (i = 0; i < w->var_cnt; i++)
143 const struct variable *dv = dict_get_var (d, i);
144 struct sfm_var *sv = &w->vars[i];
145 sv->width = dv->width;
147 sv->flt64_cnt = var_flt64_cnt (dv);
150 /* Check that file create succeeded. */
153 msg (ME, _("Error opening \"%s\" for writing "
154 "as a system file: %s."),
155 handle_get_filename (w->fh), strerror (errno));
160 /* Write the file header. */
161 if (!write_header (w, d))
164 /* Write basic variable info. */
165 for (i = 0; i < dict_get_var_cnt (d); i++)
166 write_variable (w, dict_get_var (d, i));
168 /* Write out value labels. */
169 for (idx = i = 0; i < dict_get_var_cnt (d); i++)
171 struct variable *v = dict_get_var (d, i);
173 if (!write_value_labels (w, v, idx))
175 idx += var_flt64_cnt (v);
178 if (dict_get_documents (d) != NULL && !write_documents (w, d))
181 if (!write_rec_7_34 (w))
185 /* Write variable display info. */
186 if ( !write_variable_display_parameters(w, d))
190 if ( ! omit_longnames )
192 if (!write_longvar_table (w, d))
196 /* Write record 999. */
205 rec_999.rec_type = 999;
208 if (!buf_write (w, &rec_999, sizeof rec_999))
214 w->buf = xmalloc (sizeof *w->buf * 128);
216 w->end = &w->buf[128];
217 w->x = (unsigned char *) w->ptr++;
218 w->y = (unsigned char *) w->ptr;
224 sfm_close_writer (w);
229 does_dict_need_translation (const struct dictionary *d)
235 for (i = 0; i < dict_get_var_cnt (d); i++)
237 struct variable *v = dict_get_var (d, i);
238 if (v->fv != case_idx)
245 /* Returns value of X truncated to two least-significant digits. */
256 /* Write the sysfile_header header to system file W. */
258 write_header (struct sfm_writer *w, const struct dictionary *d)
260 struct sysfile_header hdr;
266 memcpy (hdr.rec_type, "$FL2", 4);
268 p = stpcpy (hdr.prod_name, "@(#) SPSS DATA FILE ");
269 p = append_string_max (p, version, &hdr.prod_name[60]);
270 p = append_string_max (p, " - ", &hdr.prod_name[60]);
271 p = append_string_max (p, host_system, &hdr.prod_name[60]);
272 memset (p, ' ', &hdr.prod_name[60] - p);
277 for (i = 0; i < dict_get_var_cnt (d); i++)
278 w->flt64_cnt += var_flt64_cnt (dict_get_var (d, i));
279 hdr.case_size = w->flt64_cnt;
281 hdr.compress = w->compress;
283 if (dict_get_weight (d) != NULL)
285 struct variable *weight_var;
286 int recalc_weight_idx = 1;
289 weight_var = dict_get_weight (d);
292 struct variable *v = dict_get_var (d, i);
295 recalc_weight_idx += var_flt64_cnt (v);
297 hdr.weight_idx = recalc_weight_idx;
303 hdr.bias = COMPRESSION_BIAS;
305 if (time (&t) == (time_t) -1)
307 memcpy (hdr.creation_date, "01 Jan 70", 9);
308 memcpy (hdr.creation_time, "00:00:00", 8);
312 static const char *month_name[12] =
314 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
315 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
317 struct tm *tmp = localtime (&t);
318 int day = rerange (tmp->tm_mday);
319 int mon = rerange (tmp->tm_mon + 1);
320 int year = rerange (tmp->tm_year);
321 int hour = rerange (tmp->tm_hour + 1);
322 int min = rerange (tmp->tm_min + 1);
323 int sec = rerange (tmp->tm_sec + 1);
326 sprintf (buf, "%02d %s %02d", day, month_name[mon - 1], year);
327 memcpy (hdr.creation_date, buf, sizeof hdr.creation_date);
328 sprintf (buf, "%02d:%02d:%02d", hour - 1, min - 1, sec - 1);
329 memcpy (hdr.creation_time, buf, sizeof hdr.creation_time);
333 const char *label = dict_get_label (d);
337 st_bare_pad_copy (hdr.file_label, label, sizeof hdr.file_label);
340 memset (hdr.padding, 0, sizeof hdr.padding);
342 if (!buf_write (w, &hdr, sizeof hdr))
347 /* Translates format spec from internal form in SRC to system file
350 write_format_spec (struct fmt_spec *src, int32 *dest)
352 *dest = (formats[src->type].spss << 16) | (src->w << 8) | src->d;
355 /* Write the variable record(s) for primary variable P and secondary
356 variable S to system file W. */
358 write_variable (struct sfm_writer *w, struct variable *v)
360 struct sysfile_variable sv;
362 /* Missing values. */
363 flt64 m[3]; /* Missing value values. */
364 int nm; /* Number of missing values, possibly negative. */
368 sv.has_var_label = (v->label != NULL);
370 switch (v->miss_type)
378 for (nm = 0; nm < v->miss_type; nm++)
379 m[nm] = v->missing[nm].f;
382 m[0] = v->missing[0].f;
383 m[1] = v->missing[1].f;
387 m[0] = second_lowest_flt64;
388 m[1] = v->missing[0].f;
392 m[0] = v->missing[0].f;
396 case MISSING_RANGE_1:
397 m[0] = v->missing[0].f;
398 m[1] = v->missing[1].f;
399 m[2] = v->missing[2].f;
403 m[0] = second_lowest_flt64;
404 m[1] = v->missing[0].f;
405 m[2] = v->missing[1].f;
409 m[0] = v->missing[0].f;
410 m[1] = second_lowest_flt64;
411 m[2] = v->missing[1].f;
419 sv.n_missing_values = nm;
420 write_format_spec (&v->print, &sv.print);
421 write_format_spec (&v->write, &sv.write);
422 memcpy (sv.name, v->name, strlen (v->name));
423 memset (&sv.name[strlen (v->name)], ' ', SHORT_NAME_LEN - strlen (v->name));
424 if (!buf_write (w, &sv, sizeof sv))
438 l.label_len = min (strlen (v->label), 255);
439 ext_len = ROUND_UP (l.label_len, sizeof l.label_len);
440 memcpy (l.label, v->label, l.label_len);
441 memset (&l.label[l.label_len], ' ', ext_len - l.label_len);
443 if (!buf_write (w, &l, offsetof (struct label, label) + ext_len))
447 if (nm && !buf_write (w, m, sizeof *m * nm))
450 if (v->type == ALPHA && v->width > (int) sizeof (flt64))
456 sv.has_var_label = 0;
457 sv.n_missing_values = 0;
458 memset (&sv.print, 0, sizeof sv.print);
459 memset (&sv.write, 0, sizeof sv.write);
460 memset (&sv.name, 0, sizeof sv.name);
462 pad_count = DIV_RND_UP (v->width, (int) sizeof (flt64)) - 1;
463 for (i = 0; i < pad_count; i++)
464 if (!buf_write (w, &sv, sizeof sv))
471 /* Writes the value labels for variable V having system file variable
472 index IDX to system file W. Returns
473 nonzero only if successful. */
475 write_value_labels (struct sfm_writer *w, struct variable *v, int idx)
477 struct value_label_rec
491 struct val_labs_iterator *i;
492 struct value_label_rec *vlr;
493 struct var_idx_rec vir;
498 if (!val_labs_count (v->val_labs))
501 /* Pass 1: Count bytes. */
502 vlr_size = (sizeof (struct value_label_rec)
503 + sizeof (flt64) * (val_labs_count (v->val_labs) - 1));
504 for (vl = val_labs_first (v->val_labs, &i); vl != NULL;
505 vl = val_labs_next (v->val_labs, &i))
506 vlr_size += ROUND_UP (strlen (vl->label) + 1, sizeof (flt64));
508 /* Pass 2: Copy bytes. */
509 vlr = xmalloc (vlr_size);
511 vlr->n_labels = val_labs_count (v->val_labs);
513 for (vl = val_labs_first_sorted (v->val_labs, &i); vl != NULL;
514 vl = val_labs_next (v->val_labs, &i))
516 size_t len = strlen (vl->label);
518 *loc++ = vl->value.f;
519 *(unsigned char *) loc = len;
520 memcpy (&((unsigned char *) loc)[1], vl->label, len);
521 memset (&((unsigned char *) loc)[1 + len], ' ',
522 REM_RND_UP (len + 1, sizeof (flt64)));
523 loc += DIV_RND_UP (len + 1, sizeof (flt64));
526 if (!buf_write (w, vlr, vlr_size))
535 vir.vars[0] = idx + 1;
536 if (!buf_write (w, &vir, sizeof vir))
542 /* Writes record type 6, document record. */
544 write_documents (struct sfm_writer *w, const struct dictionary *d)
548 int32 rec_type P; /* Always 6. */
549 int32 n_lines P; /* Number of lines of documents. */
553 const char *documents;
556 documents = dict_get_documents (d);
557 n_lines = strlen (documents) / 80;
560 rec_6.n_lines = n_lines;
561 if (!buf_write (w, &rec_6, sizeof rec_6))
563 if (!buf_write (w, documents, 80 * n_lines))
569 /* Write the alignment, width and scale values */
571 write_variable_display_parameters (struct sfm_writer *w,
572 const struct dictionary *dict)
584 vdp_hdr.rec_type = 7;
585 vdp_hdr.subtype = 11;
586 vdp_hdr.elem_size = 4;
587 vdp_hdr.n_elem = w->var_cnt * 3;
589 if (!buf_write (w, &vdp_hdr, sizeof vdp_hdr))
592 for ( i = 0 ; i < w->var_cnt ; ++i )
603 v = dict_get_var(dict, i);
605 params.measure = v->measure;
606 params.width = v->display_width;
607 params.align = v->alignment;
609 if (!buf_write (w, ¶ms, sizeof(params)))
616 /* Writes the long variable name table */
618 write_longvar_table (struct sfm_writer *w, const struct dictionary *dict)
633 lv_hdr.elem_size = 1;
636 dict_get_varname_block(dict, &buf, &bufsize);
641 lv_hdr.n_elem = bufsize ;
643 if (!buf_write (w, &lv_hdr, sizeof(lv_hdr) ))
646 if (!buf_write (w, buf, bufsize))
657 /* Writes record type 7, subtypes 3 and 4. */
659 write_rec_7_34 (struct sfm_writer *w)
676 /* Components of the version number, from major to minor. */
677 int version_component[3];
679 /* Used to step through the version string. */
682 /* Parses the version string, which is assumed to be of the form
683 #.#x, where each # is a string of digits, and x is a single
685 version_component[0] = strtol (bare_version, &p, 10);
688 version_component[1] = strtol (bare_version, &p, 10);
689 version_component[2] = (isalpha ((unsigned char) *p)
690 ? tolower ((unsigned char) *p) - 'a' : 0);
692 rec_7.rec_type_3 = 7;
694 rec_7.data_type_3 = sizeof (int32);
696 rec_7.elem_3[0] = version_component[0];
697 rec_7.elem_3[1] = version_component[1];
698 rec_7.elem_3[2] = version_component[2];
699 rec_7.elem_3[3] = -1;
701 /* PORTME: 1=IEEE754, 2=IBM 370, 3=DEC VAX E. */
708 /* PORTME: 1=big-endian, 2=little-endian. */
715 /* PORTME: 1=EBCDIC, 2=7-bit ASCII, 3=8-bit ASCII, 4=DEC Kanji. */
718 rec_7.rec_type_4 = 7;
720 rec_7.data_type_4 = sizeof (flt64);
722 rec_7.elem_4[0] = -FLT64_MAX;
723 rec_7.elem_4[1] = FLT64_MAX;
724 rec_7.elem_4[2] = second_lowest_flt64;
726 if (!buf_write (w, &rec_7, sizeof rec_7))
731 /* Write NBYTES starting at BUF to the system file represented by
734 buf_write (struct sfm_writer *w, const void *buf, size_t nbytes)
736 assert (buf != NULL);
737 if (fwrite (buf, nbytes, 1, w->file) != 1)
739 msg (ME, _("%s: Writing system file: %s."),
740 handle_get_filename (w->fh), strerror (errno));
746 /* Copies string DEST to SRC with the proviso that DEST does not reach
747 byte END; no null terminator is copied. Returns a pointer to the
748 byte after the last byte copied. */
750 append_string_max (char *dest, const char *src, const char *end)
752 int nbytes = min (end - dest, (int) strlen (src));
753 memcpy (dest, src, nbytes);
754 return dest + nbytes;
757 /* Makes certain that the compression buffer of H has room for another
758 element. If there's not room, pads out the current instruction
759 octet with zero and dumps out the buffer. */
761 ensure_buf_space (struct sfm_writer *w)
763 if (w->ptr >= w->end)
765 memset (w->x, 0, w->y - w->x);
768 if (!buf_write (w, w->buf, sizeof *w->buf * 128))
774 static void write_compressed_data (struct sfm_writer *w, const flt64 *elem);
776 /* Writes case C to system file W.
777 Returns nonzero if successful. */
779 sfm_write_case (struct sfm_writer *w, struct ccase *c)
783 if (!w->needs_translation && !w->compress
784 && sizeof (flt64) == sizeof (union value))
786 /* Fast path: external and internal representations are the
787 same and the dictionary is properly ordered. Write
789 buf_write (w, case_data_all (c), sizeof (union value) * w->flt64_cnt);
793 /* Slow path: internal and external representations differ.
794 Write into a bounce buffer, then write to W. */
800 bounce_size = sizeof *bounce * w->flt64_cnt;
801 bounce = bounce_cur = local_alloc (bounce_size);
803 for (i = 0; i < w->var_cnt; i++)
805 struct sfm_var *v = &w->vars[i];
808 *bounce_cur = case_num (c, v->fv);
810 memcpy (bounce_cur, case_data (c, v->fv)->s, v->width);
811 bounce_cur += v->flt64_cnt;
815 buf_write (w, bounce, bounce_size);
817 write_compressed_data (w, bounce);
826 put_instruction (struct sfm_writer *w, unsigned char instruction)
830 if (!ensure_buf_space (w))
832 w->x = (unsigned char *) w->ptr++;
833 w->y = (unsigned char *) w->ptr;
835 *w->x++ = instruction;
839 put_element (struct sfm_writer *w, const flt64 *elem)
841 if (!ensure_buf_space (w))
843 memcpy (w->ptr++, elem, sizeof *elem);
847 write_compressed_data (struct sfm_writer *w, const flt64 *elem)
851 for (i = 0; i < w->var_cnt; i++)
853 struct sfm_var *v = &w->vars[i];
857 if (*elem == -FLT64_MAX)
858 put_instruction (w, 255);
859 else if (*elem >= 1 - COMPRESSION_BIAS
860 && *elem <= 251 - COMPRESSION_BIAS
861 && *elem == (int) *elem)
862 put_instruction (w, (int) *elem + COMPRESSION_BIAS);
865 put_instruction (w, 253);
866 put_element (w, elem);
874 for (j = 0; j < v->flt64_cnt; j++, elem++)
876 if (!memcmp (elem, " ", sizeof (flt64)))
877 put_instruction (w, 254);
880 put_instruction (w, 253);
881 put_element (w, elem);
888 /* Closes a system file after we're done with it. */
890 sfm_close_writer (struct sfm_writer *w)
895 fh_close (w->fh, "system file", "we");
900 if (w->buf != NULL && w->ptr > w->buf)
902 memset (w->x, 0, w->y - w->x);
903 buf_write (w, w->buf, (w->ptr - w->buf) * sizeof *w->buf);
906 /* Seek back to the beginning and update the number of cases.
907 This is just a courtesy to later readers, so there's no need
908 to check return values or report errors. */
909 if (!fseek (w->file, offsetof (struct sysfile_header, case_cnt), SEEK_SET))
911 int32 case_cnt = w->case_cnt;
913 /* I don't really care about the return value: it doesn't
914 matter whether this data is written. */
915 fwrite (&case_cnt, sizeof case_cnt, 1, w->file);
918 if (fclose (w->file) == EOF)
919 msg (ME, _("%s: Closing system file: %s."),
920 handle_get_filename (w->fh), strerror (errno));