1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 1997-2000, 2006-2007, 2009-2014 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
25 #include "data/any-reader.h"
26 #include "data/case.h"
27 #include "data/casereader-provider.h"
28 #include "data/casereader.h"
29 #include "data/dictionary.h"
30 #include "data/file-handle-def.h"
31 #include "data/file-name.h"
32 #include "data/format.h"
33 #include "data/identifier.h"
34 #include "data/missing-values.h"
35 #include "data/value-labels.h"
36 #include "data/value.h"
37 #include "data/variable.h"
38 #include "libpspp/float-format.h"
39 #include "libpspp/i18n.h"
40 #include "libpspp/integer-format.h"
41 #include "libpspp/message.h"
42 #include "libpspp/misc.h"
43 #include "libpspp/pool.h"
44 #include "libpspp/str.h"
46 #include "gl/localcharset.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
52 #define _(msgid) gettext (msgid)
53 #define N_(msgid) (msgid)
63 struct pcp_dir_entry main;
64 struct pcp_dir_entry variables;
65 struct pcp_dir_entry labels;
66 struct pcp_dir_entry data;
69 struct pcp_main_header
71 char product[63]; /* "PCSPSS SYSTEM FILE..." */
72 unsigned int nominal_case_size; /* Number of var positions. */
73 char creation_date[9]; /* "[m]m/dd/yy". */
74 char creation_time[9]; /* "[H]H:MM:SS". */
75 char file_label[65]; /* File label. */
84 struct fmt_spec format;
88 struct pcp_value_label *val_labs;
94 struct pcp_value_label
100 /* System file reader. */
103 struct any_reader any_reader;
105 /* Resource tracking. */
106 struct pool *pool; /* All system file state. */
109 unsigned int file_size;
110 struct any_read_info info;
111 struct pcp_directory directory;
112 struct pcp_main_header header;
113 struct pcp_var_record *vars;
117 struct file_handle *fh; /* File handle. */
118 struct fh_lock *lock; /* Mutual exclusion for file handle. */
119 FILE *file; /* File stream. */
120 unsigned int pos; /* Position in file. */
121 bool error; /* I/O or corruption error? */
122 struct caseproto *proto; /* Format of output cases. */
125 unsigned int n_cases; /* Number of cases */
126 const char *encoding; /* String encoding. */
130 uint8_t opcodes[8]; /* Current block of opcodes. */
131 size_t opcode_idx; /* Next opcode to interpret, 8 if none left. */
132 bool corruption_warning; /* Warned about possible corruption? */
135 static struct pcp_reader *
136 pcp_reader_cast (const struct any_reader *r_)
138 assert (r_->klass == &pcp_file_reader_class);
139 return UP_CAST (r_, struct pcp_reader, any_reader);
142 static const struct casereader_class pcp_file_casereader_class;
144 static bool pcp_close (struct any_reader *);
146 static bool read_variables_record (struct pcp_reader *);
148 static void pcp_msg (struct pcp_reader *r, off_t, int class,
149 const char *format, va_list args)
150 PRINTF_FORMAT (4, 0);
151 static void pcp_warn (struct pcp_reader *, off_t, const char *, ...)
152 PRINTF_FORMAT (3, 4);
153 static void pcp_error (struct pcp_reader *, off_t, const char *, ...)
154 PRINTF_FORMAT (3, 4);
156 static bool read_bytes (struct pcp_reader *, void *, size_t)
158 static int try_read_bytes (struct pcp_reader *, void *, size_t)
160 static bool read_uint16 (struct pcp_reader *, unsigned int *)
162 static bool read_uint32 (struct pcp_reader *, unsigned int *)
164 static bool read_float (struct pcp_reader *, double *)
166 static double parse_float (const uint8_t number[8]);
167 static bool read_string (struct pcp_reader *, char *, size_t)
169 static bool skip_bytes (struct pcp_reader *, size_t) WARN_UNUSED_RESULT;
171 static bool pcp_seek (struct pcp_reader *, off_t);
173 static bool pcp_is_sysmis(const uint8_t *);
175 /* Dictionary reader. */
177 static bool read_dictionary (struct pcp_reader *);
178 static bool read_main_header (struct pcp_reader *, struct pcp_main_header *);
179 static void parse_header (struct pcp_reader *,
180 const struct pcp_main_header *,
181 struct any_read_info *, struct dictionary *);
182 static bool parse_variable_records (struct pcp_reader *, struct dictionary *,
183 struct pcp_var_record *, size_t n);
185 /* Tries to open FH for reading as an SPSS/PC+ system file. Returns a
186 pcp_reader if successful, otherwise NULL. */
187 static struct any_reader *
188 pcp_open (struct file_handle *fh)
190 struct pcp_reader *r;
193 /* Create and initialize reader. */
194 r = xzalloc (sizeof *r);
195 r->any_reader.klass = &pcp_file_reader_class;
196 r->pool = pool_create ();
197 pool_register (r->pool, free, r);
199 r->opcode_idx = sizeof r->opcodes;
201 /* TRANSLATORS: this fragment will be interpolated into
202 messages in fh_lock() that identify types of files. */
203 r->lock = fh_lock (fh, FH_REF_FILE, N_("SPSS/PC+ system file"),
209 r->file = fn_open (fh_get_file_name (fh), "rb");
212 msg (ME, _("Error opening `%s' for reading as an SPSS/PC+ "
214 fh_get_file_name (r->fh), strerror (errno));
218 /* Fetch file size. */
219 if (fstat (fileno (r->file), &s))
221 pcp_error (ME, 0, _("%s: stat failed (%s)."),
222 fh_get_file_name (r->fh), strerror (errno));
225 if (s.st_size > UINT_MAX)
227 pcp_error (ME, 0, _("%s: file too large."), fh_get_file_name (r->fh));
230 r->file_size = s.st_size;
232 /* Read dictionary. */
233 if (!read_dictionary (r))
236 if (!pcp_seek (r, r->directory.data.ofs))
239 return &r->any_reader;
242 pcp_close (&r->any_reader);
247 pcp_read_dir_entry (struct pcp_reader *r, struct pcp_dir_entry *de)
249 if (!read_uint32 (r, &de->ofs) || !read_uint32 (r, &de->len))
252 if (de->len > r->file_size || de->ofs > r->file_size - de->len)
254 pcp_error (r, r->pos - 8, _("Directory entry is for a %u-byte record "
255 "starting at offset %u but file is only "
257 de->ofs, de->len, r->file_size);
265 read_dictionary (struct pcp_reader *r)
267 unsigned int two, zero;
269 if (!read_uint32 (r, &two) || !read_uint32 (r, &zero))
271 if (two != 2 || zero != 0)
272 pcp_warn (r, 0, _("Directory fields have unexpected values "
273 "(%u,%u)."), two, zero);
275 if (!pcp_read_dir_entry (r, &r->directory.main)
276 || !pcp_read_dir_entry (r, &r->directory.variables)
277 || !pcp_read_dir_entry (r, &r->directory.labels)
278 || !pcp_read_dir_entry (r, &r->directory.data))
281 if (!read_main_header (r, &r->header))
284 read_variables_record (r);
289 struct get_strings_aux
300 add_string__ (struct get_strings_aux *aux,
301 const char *string, bool id, char *title)
303 if (aux->n >= aux->allocated)
305 aux->allocated = 2 * (aux->allocated + 1);
306 aux->titles = pool_realloc (aux->pool, aux->titles,
307 aux->allocated * sizeof *aux->titles);
308 aux->strings = pool_realloc (aux->pool, aux->strings,
309 aux->allocated * sizeof *aux->strings);
310 aux->ids = pool_realloc (aux->pool, aux->ids,
311 aux->allocated * sizeof *aux->ids);
314 aux->titles[aux->n] = title;
315 aux->strings[aux->n] = pool_strdup (aux->pool, string);
316 aux->ids[aux->n] = id;
320 static void PRINTF_FORMAT (3, 4)
321 add_string (struct get_strings_aux *aux,
322 const char *string, const char *title, ...)
326 va_start (args, title);
327 add_string__ (aux, string, false, pool_vasprintf (aux->pool, title, args));
331 static void PRINTF_FORMAT (3, 4)
332 add_id (struct get_strings_aux *aux, const char *id, const char *title, ...)
336 va_start (args, title);
337 add_string__ (aux, id, true, pool_vasprintf (aux->pool, title, args));
341 /* Retrieves significant string data from R in its raw format, to allow the
342 caller to try to detect the encoding in use.
344 Returns the number of strings retrieved N. Sets each of *TITLESP, *IDSP,
345 and *STRINGSP to an array of N elements allocated from POOL. For each I in
346 0...N-1, UTF-8 string *TITLESP[I] describes *STRINGSP[I], which is in
347 whatever encoding system file R uses. *IDS[I] is true if *STRINGSP[I] must
348 be a valid PSPP language identifier, false if *STRINGSP[I] is free-form
351 pcp_get_strings (const struct any_reader *r_, struct pool *pool,
352 char ***titlesp, bool **idsp, char ***stringsp)
354 struct pcp_reader *r = pcp_reader_cast (r_);
355 struct get_strings_aux aux;
367 for (i = 0; i < r->n_vars; i++)
368 if (r->vars[i].width != -1)
369 add_id (&aux, r->vars[i].name, _("Variable %zu"), ++var_idx);
372 for (i = 0; i < r->n_vars; i++)
373 if (r->vars[i].width != -1)
376 if (r->vars[i].label)
377 add_string (&aux, r->vars[i].label, _("Variable %zu Label"),
380 for (j = 0; j < r->vars[i].n_val_labs; j++)
381 add_string (&aux, r->vars[i].label,
382 _("Variable %zu Value Label %zu"), var_idx, j);
385 add_string (&aux, r->header.creation_date, _("Creation Date"));
386 add_string (&aux, r->header.creation_time, _("Creation Time"));
387 add_string (&aux, r->header.product, _("Product"));
388 add_string (&aux, r->header.file_label, _("File Label"));
390 *titlesp = aux.titles;
392 *stringsp = aux.strings;
397 find_and_delete_var (struct dictionary *dict, const char *name)
399 struct variable *var = dict_lookup_var (dict, name);
401 dict_delete_var (dict, var);
404 /* Decodes the dictionary read from R, saving it into into *DICT. Character
405 strings in R are decoded using ENCODING, or an encoding obtained from R if
406 ENCODING is null, or the locale encoding if R specifies no encoding.
408 If INFOP is non-null, then it receives additional info about the system
409 file, which the caller must eventually free with any_read_info_destroy()
410 when it is no longer needed.
412 This function consumes R. The caller must use it again later, even to
413 destroy it with pcp_close(). */
414 static struct casereader *
415 pcp_decode (struct any_reader *r_, const char *encoding,
416 struct dictionary **dictp, struct any_read_info *infop)
418 struct pcp_reader *r = pcp_reader_cast (r_);
419 struct dictionary *dict;
421 if (encoding == NULL)
423 encoding = locale_charset ();
424 pcp_warn (r, -1, _("Using default encoding %s to read this SPSS/PC+ "
425 "system file. For best results, specify an "
426 "encoding explicitly. Use SYSFILE INFO with "
427 "ENCODING=\"DETECT\" to analyze the possible "
432 dict = dict_create (encoding);
433 r->encoding = dict_get_encoding (dict);
435 parse_header (r, &r->header, &r->info, dict);
436 if (!parse_variable_records (r, dict, r->vars, r->n_vars))
439 /* Create an index of dictionary variable widths for
440 pcp_read_case to use. We cannot use the `struct variable's
441 from the dictionary we created, because the caller owns the
442 dictionary and may destroy or modify its variables. */
443 r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
445 find_and_delete_var (dict, "CASENUM_");
446 find_and_delete_var (dict, "DATE_");
447 find_and_delete_var (dict, "WEIGHT_");
453 memset (&r->info, 0, sizeof r->info);
456 return casereader_create_sequential
457 (NULL, r->proto, r->n_cases, &pcp_file_casereader_class, r);
460 pcp_close (&r->any_reader);
466 /* Closes R, which should have been returned by pcp_open() but not already
467 closed with pcp_decode() or this function.
468 Returns true if an I/O error has occurred on READER, false
471 pcp_close (struct any_reader *r_)
473 struct pcp_reader *r = pcp_reader_cast (r_);
478 if (fn_close (fh_get_file_name (r->fh), r->file) == EOF)
480 msg (ME, _("Error closing system file `%s': %s."),
481 fh_get_file_name (r->fh), strerror (errno));
487 any_read_info_destroy (&r->info);
492 pool_destroy (r->pool);
497 /* Destroys READER. */
499 pcp_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
501 struct pcp_reader *r = r_;
502 pcp_close (&r->any_reader);
505 /* Returns true if FILE is an SPSS/PC+ system file,
508 pcp_detect (FILE *file)
510 static const char signature[4] = "SPSS";
511 char buf[sizeof signature];
513 if (fseek (file, 0x104, SEEK_SET)
514 || (fread (buf, sizeof buf, 1, file) != 1 && !feof (file)))
517 return !memcmp (buf, signature, sizeof buf);
520 /* Reads the main header of the SPSS/PC+ system file. Initializes *HEADER and
521 *INFO, except for the string fields in *INFO, which parse_header() will
522 initialize later once the file's encoding is known. */
524 read_main_header (struct pcp_reader *r, struct pcp_main_header *header)
526 unsigned int base_ofs = r->directory.main.ofs;
527 size_t min_values, min_data_size;
528 unsigned int zero0, zero1, zero2;
529 unsigned int one0, one1;
530 unsigned int compressed;
531 unsigned int n_cases1;
534 if (!pcp_seek (r, base_ofs))
537 if (r->directory.main.len < 0xb0)
539 pcp_error (r, r->pos, _("This is not an SPSS/PC+ system file."));
542 else if (r->directory.main.len > 0xb0)
543 pcp_warn (r, r->pos, _("Record 0 has unexpected length %u."),
544 r->directory.main.len);
546 if (!read_uint16 (r, &one0)
547 || !read_string (r, header->product, sizeof header->product)
548 || !read_bytes (r, sysmis, sizeof sysmis)
549 || !read_uint32 (r, &zero0)
550 || !read_uint32 (r, &zero1)
551 || !read_uint16 (r, &one1)
552 || !read_uint16 (r, &compressed)
553 || !read_uint16 (r, &header->nominal_case_size)
554 || !read_uint32 (r, &r->n_cases)
555 || !read_uint16 (r, &zero2)
556 || !read_uint32 (r, &n_cases1)
557 || !read_string (r, header->creation_date, sizeof header->creation_date)
558 || !read_string (r, header->creation_time, sizeof header->creation_time)
559 || !read_string (r, header->file_label, sizeof header->file_label))
562 if (!pcp_is_sysmis (sysmis))
564 double d = parse_float (sysmis);
565 pcp_warn (r, base_ofs, _("Record 0 specifies unexpected system missing "
566 "value %g (%a)."), d, d);
568 if (one0 != 1 || one1 != 1 || zero0 != 0 || zero1 != 0 || zero2 != 0)
569 pcp_warn (r, base_ofs, _("Record 0 reserved fields have unexpected values "
570 "(%u,%u,%u,%u,%u)."),
571 one0, one1, zero0, zero1, zero2);
572 if (n_cases1 != r->n_cases)
573 pcp_warn (r, base_ofs, _("Record 0 case counts differ (%u versus %u)."),
574 r->n_cases, n_cases1);
575 if (compressed != 0 && compressed != 1)
577 pcp_error (r, base_ofs, _("Invalid compression type %u."), compressed);
581 r->compressed = compressed != 0;
583 min_values = xtimes (header->nominal_case_size, r->n_cases);
584 min_data_size = xtimes (compressed ? 1 : 8, min_values);
585 if (r->directory.data.len < min_data_size
586 || size_overflow_p (min_data_size))
588 pcp_warn (r, base_ofs, _("Record 0 claims %u cases with %u values per "
589 "case (requiring at least %zu bytes) but data "
590 "record is only %u bytes long."),
591 r->n_cases, header->nominal_case_size, min_data_size,
592 r->directory.data.len);
600 read_value_labels (struct pcp_reader *r, struct pcp_var_record *var,
601 unsigned int start, unsigned int end)
603 size_t allocated_val_labs = 0;
607 if (end > r->directory.labels.len)
609 pcp_warn (r, r->pos - 32,
610 _("Value labels claimed to end at offset %u in labels record "
611 "but labels record is only %u bytes."),
612 end, r->directory.labels.len);
616 start += r->directory.labels.ofs;
617 end += r->directory.labels.ofs;
618 if (start > end || end > r->file_size)
620 pcp_warn (r, r->pos - 32,
621 _("Value labels claimed to be at offset %u with length %u "
622 "but file size is only %u bytes."),
623 start, end - start, r->file_size);
627 if (!pcp_seek (r, start))
630 while (r->pos < end && end - r->pos > 8)
632 struct pcp_value_label *vl;
635 if (var->n_val_labs >= allocated_val_labs)
636 var->val_labs = x2nrealloc (var->val_labs, &allocated_val_labs,
637 sizeof *var->val_labs);
638 vl = &var->val_labs[var->n_val_labs];
640 if (!read_bytes (r, vl->value, sizeof vl->value)
641 || !read_bytes (r, &len, 1))
644 if (end - r->pos < len)
647 _("Value labels end with partial label (%u bytes left in "
648 "record, label length %"PRIu8")."),
652 vl->label = pool_malloc (r->pool, len + 1);
653 if (!read_bytes (r, vl->label, len))
656 vl->label[len] = '\0';
660 pcp_warn (r, r->pos, _("%u leftover bytes following value labels."),
667 read_var_label (struct pcp_reader *r, struct pcp_var_record *var,
673 if (ofs >= r->directory.labels.len)
675 pcp_warn (r, r->pos - 32,
676 _("Variable label claimed to start at offset %u in labels "
677 "record but labels record is only %u bytes."),
678 ofs, r->directory.labels.len);
682 if (!pcp_seek (r, ofs + r->directory.labels.ofs) || !read_bytes (r, &len, 1))
685 if (len >= r->directory.labels.len - ofs)
687 pcp_warn (r, r->pos - 1,
688 _("Variable label with length %u starting at offset %u in "
689 "labels record overruns end of %u-byte labels record."),
690 len, ofs + 1, r->directory.labels.len);
694 var->label = pool_malloc (r->pool, len + 1);
695 var->label[len] = '\0';
696 return read_bytes (r, var->label, len);
699 /* Reads the variables record (record 1) into R. */
701 read_variables_record (struct pcp_reader *r)
705 if (!pcp_seek (r, r->directory.variables.ofs))
707 if (r->directory.variables.len != r->header.nominal_case_size * 32)
709 pcp_error (r, r->pos, _("Record 1 has length %u (expected %u)."),
710 r->directory.variables.len, r->header.nominal_case_size * 32);
714 r->vars = pool_calloc (r->pool,
715 r->header.nominal_case_size, sizeof *r->vars);
716 for (i = 0; i < r->header.nominal_case_size; i++)
718 struct pcp_var_record *var = &r->vars[r->n_vars++];
719 unsigned int value_label_start, value_label_end;
720 unsigned int var_label_ofs;
725 if (!read_uint32 (r, &value_label_start)
726 || !read_uint32 (r, &value_label_end)
727 || !read_uint32 (r, &var_label_ofs)
728 || !read_uint32 (r, &format)
729 || !read_string (r, var->name, sizeof var->name)
730 || !read_bytes (r, var->missing, sizeof var->missing))
733 raw_type = format >> 16;
734 if (!fmt_from_io (raw_type, &var->format.type))
736 pcp_error (r, var->pos, _("Variable %u has invalid type %"PRIu8"."),
741 var->format.w = (format >> 8) & 0xff;
742 var->format.d = format & 0xff;
743 fmt_fix_output (&var->format);
744 var->width = fmt_var_width (&var->format);
748 unsigned int save_pos = r->pos;
749 if (!read_var_label (r, var, var_label_ofs)
750 || !pcp_seek (r, save_pos))
754 if (value_label_end > value_label_start && var->width <= 8)
756 unsigned int save_pos = r->pos;
757 if (!read_value_labels (r, var, value_label_start, value_label_end)
758 || !pcp_seek (r, save_pos))
764 int extra = DIV_RND_UP (var->width - 8, 8);
766 if (!skip_bytes (r, 32 * extra))
775 recode_and_trim_string (struct pool *pool, const char *from, const char *in)
777 struct substring out;
779 out = recode_substring_pool ("UTF-8", from, ss_cstr (in), pool);
780 ss_trim (&out, ss_cstr (" "));
781 return ss_xstrdup (out);
785 parse_header (struct pcp_reader *r, const struct pcp_main_header *header,
786 struct any_read_info *info, struct dictionary *dict)
788 const char *dict_encoding = dict_get_encoding (dict);
791 memset (info, 0, sizeof *info);
793 info->integer_format = INTEGER_LSB_FIRST;
794 info->float_format = FLOAT_IEEE_DOUBLE_LE;
795 info->compression = r->compressed ? ANY_COMP_SIMPLE : ANY_COMP_NONE;
796 info->case_cnt = r->n_cases;
798 /* Convert file label to UTF-8 and put it into DICT. */
799 label = recode_and_trim_string (r->pool, dict_encoding, header->file_label);
800 dict_set_label (dict, label);
803 /* Put creation date, time, and product in UTF-8 into INFO. */
804 info->creation_date = recode_and_trim_string (r->pool, dict_encoding,
805 header->creation_date);
806 info->creation_time = recode_and_trim_string (r->pool, dict_encoding,
807 header->creation_time);
808 info->product = recode_and_trim_string (r->pool, dict_encoding,
812 /* Reads a variable (type 2) record from R and adds the
813 corresponding variable to DICT.
814 Also skips past additional variable records for long string
817 parse_variable_records (struct pcp_reader *r, struct dictionary *dict,
818 struct pcp_var_record *var_recs, size_t n_var_recs)
820 const char *dict_encoding = dict_get_encoding (dict);
821 struct pcp_var_record *rec;
823 for (rec = var_recs; rec < &var_recs[n_var_recs]; rec++)
825 struct variable *var;
830 name = recode_string_pool ("UTF-8", dict_encoding,
831 rec->name, -1, r->pool);
832 name[strcspn (name, " ")] = '\0';
833 weight = !strcmp (name, "$WEIGHT") && rec->width == 0;
835 /* Transform $DATE => DATE_, $WEIGHT => WEIGHT_, $CASENUM => CASENUM_. */
837 name = pool_asprintf (r->pool, "%s_", name + 1);
839 if (!dict_id_is_valid (dict, name, false) || name[0] == '#')
841 pcp_error (r, rec->pos, _("Invalid variable name `%s'."), name);
845 var = rec->var = dict_create_var (dict, name, rec->width);
848 char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
849 pcp_warn (r, rec->pos, _("Renaming variable with duplicate name "
852 var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
856 dict_set_weight (dict, var);
858 /* Set the short name the same as the long name. */
859 var_set_short_name (var, 0, name);
861 /* Get variable label, if any. */
866 utf8_label = recode_string ("UTF-8", dict_encoding, rec->label, -1);
867 var_set_label (var, utf8_label);
871 /* Add value labels. */
872 for (i = 0; i < rec->n_val_labs; i++)
877 value_init (&value, rec->width);
878 if (var_is_numeric (var))
879 value.f = parse_float (rec->val_labs[i].value);
881 memcpy (value_str_rw (&value, rec->width),
882 rec->val_labs[i].value, rec->width);
884 utf8_label = recode_string ("UTF-8", dict_encoding,
885 rec->val_labs[i].label, -1);
886 var_add_value_label (var, &value, utf8_label);
889 value_destroy (&value, rec->width);
892 /* Set missing values. */
893 if (rec->width <= 8 && !pcp_is_sysmis (rec->missing))
895 int width = var_get_width (var);
896 struct missing_values mv;
898 mv_init_pool (r->pool, &mv, width);
899 if (var_is_numeric (var))
900 mv_add_num (&mv, parse_float (rec->missing));
902 mv_add_str (&mv, rec->missing, MIN (width, 8));
903 var_set_missing_values (var, &mv);
907 var_set_both_formats (var, &rec->format);
915 static void read_error (struct casereader *, const struct pcp_reader *);
917 static bool read_case_number (struct pcp_reader *, double *);
918 static int read_case_string (struct pcp_reader *, uint8_t *, size_t);
919 static int read_opcode (struct pcp_reader *);
920 static bool read_compressed_number (struct pcp_reader *, double *);
921 static int read_compressed_string (struct pcp_reader *, uint8_t *);
922 static int read_whole_strings (struct pcp_reader *, uint8_t *, size_t);
924 /* Reads and returns one case from READER's file. Returns a null
925 pointer if not successful. */
926 static struct ccase *
927 pcp_file_casereader_read (struct casereader *reader, void *r_)
929 struct pcp_reader *r = r_;
930 unsigned int start_pos = r->pos;
935 if (r->error || !r->n_cases)
939 c = case_create (r->proto);
940 for (i = 0; i < r->n_vars; i++)
942 struct pcp_var_record *var = &r->vars[i];
943 union value *v = case_data_rw_idx (c, i);
946 retval = read_case_number (r, &v->f);
948 retval = read_case_string (r, value_str_rw (v, var->width),
953 pcp_error (r, r->pos, _("File ends in partial case."));
957 if (r->pos > r->directory.data.ofs + r->directory.data.len)
959 pcp_error (r, r->pos, _("Case beginning at offset 0x%08x extends past "
960 "end of data record at offset 0x%08x."),
961 start_pos, r->directory.data.ofs + r->directory.data.len);
968 read_error (reader, r);
973 /* Issues an error that an unspecified error occurred PCP, and
976 read_error (struct casereader *r, const struct pcp_reader *pcp)
978 msg (ME, _("Error reading case from file %s."), fh_get_name (pcp->fh));
979 casereader_force_error (r);
982 /* Reads a number from R and stores its value in *D.
983 If R is compressed, reads a compressed number;
984 otherwise, reads a number in the regular way.
985 Returns true if successful, false if end of file is
986 reached immediately. */
988 read_case_number (struct pcp_reader *r, double *d)
993 if (!try_read_bytes (r, number, sizeof number))
995 *d = parse_float (number);
999 return read_compressed_number (r, d);
1002 /* Reads LENGTH string bytes from R into S. Always reads a multiple of 8
1003 bytes; if LENGTH is not a multiple of 8, then extra bytes are read and
1004 discarded without being written to S. Reads compressed strings if S is
1005 compressed. Returns 1 if successful, 0 if end of file is reached
1006 immediately, or -1 for some kind of error. */
1008 read_case_string (struct pcp_reader *r, uint8_t *s, size_t length)
1010 size_t whole = ROUND_DOWN (length, 8);
1011 size_t partial = length % 8;
1015 int retval = read_whole_strings (r, s, whole);
1023 int retval = read_whole_strings (r, bounce, sizeof bounce);
1026 memcpy (s + whole, bounce, partial);
1032 /* Reads and returns the next compression opcode from R. */
1034 read_opcode (struct pcp_reader *r)
1036 assert (r->compressed);
1037 if (r->opcode_idx >= sizeof r->opcodes)
1039 int retval = try_read_bytes (r, r->opcodes, sizeof r->opcodes);
1044 return r->opcodes[r->opcode_idx++];
1047 /* Reads a compressed number from R and stores its value in D.
1048 Returns true if successful, false if end of file is
1049 reached immediately. */
1051 read_compressed_number (struct pcp_reader *r, double *d)
1053 int opcode = read_opcode (r);
1064 return read_float (r, d);
1067 *d = opcode - 105.0;
1072 /* Reads a compressed 8-byte string segment from R and stores it in DST. */
1074 read_compressed_string (struct pcp_reader *r, uint8_t *dst)
1079 opcode = read_opcode (r);
1086 retval = read_bytes (r, dst, 8);
1087 return retval == 1 ? 1 : -1;
1090 if (!r->corruption_warning)
1092 r->corruption_warning = true;
1093 pcp_warn (r, r->pos,
1094 _("Possible compressed data corruption: "
1095 "string contains compressed integer (opcode %d)."),
1098 memset (dst, ' ', 8);
1103 /* Reads LENGTH string bytes from R into S. LENGTH must be a multiple of 8.
1104 Reads compressed strings if S is compressed. Returns 1 if successful, 0 if
1105 end of file is reached immediately, or -1 for some kind of error. */
1107 read_whole_strings (struct pcp_reader *r, uint8_t *s, size_t length)
1109 assert (length % 8 == 0);
1111 return try_read_bytes (r, s, length);
1116 for (ofs = 0; ofs < length; ofs += 8)
1118 int retval = read_compressed_string (r, s + ofs);
1128 /* Displays a corruption message. */
1130 pcp_msg (struct pcp_reader *r, off_t offset,
1131 int class, const char *format, va_list args)
1136 ds_init_empty (&text);
1138 ds_put_format (&text, _("`%s' near offset 0x%llx: "),
1139 fh_get_file_name (r->fh), (long long int) offset);
1141 ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
1142 ds_put_vformat (&text, format, args);
1144 m.category = msg_class_to_category (class);
1145 m.severity = msg_class_to_severity (class);
1151 m.text = ds_cstr (&text);
1156 /* Displays a warning for offset OFFSET in the file. */
1158 pcp_warn (struct pcp_reader *r, off_t offset, const char *format, ...)
1162 va_start (args, format);
1163 pcp_msg (r, offset, MW, format, args);
1167 /* Displays an error for the current file position,
1168 marks it as in an error state,
1169 and aborts reading it using longjmp. */
1171 pcp_error (struct pcp_reader *r, off_t offset, const char *format, ...)
1175 va_start (args, format);
1176 pcp_msg (r, offset, ME, format, args);
1182 /* Reads BYTE_CNT bytes into BUF.
1183 Returns 1 if exactly BYTE_CNT bytes are successfully read.
1184 Returns -1 if an I/O error or a partial read occurs.
1185 Returns 0 for an immediate end-of-file and, if EOF_IS_OK is false, reports
1188 read_bytes_internal (struct pcp_reader *r, bool eof_is_ok,
1189 void *buf, size_t byte_cnt)
1191 size_t bytes_read = fread (buf, 1, byte_cnt, r->file);
1192 r->pos += bytes_read;
1193 if (bytes_read == byte_cnt)
1195 else if (ferror (r->file))
1197 pcp_error (r, r->pos, _("System error: %s."), strerror (errno));
1200 else if (!eof_is_ok || bytes_read != 0)
1202 pcp_error (r, r->pos, _("Unexpected end of file."));
1209 /* Reads BYTE_CNT into BUF.
1210 Returns true if successful.
1211 Returns false upon I/O error or if end-of-file is encountered. */
1213 read_bytes (struct pcp_reader *r, void *buf, size_t byte_cnt)
1215 return read_bytes_internal (r, false, buf, byte_cnt) == 1;
1218 /* Reads BYTE_CNT bytes into BUF.
1219 Returns 1 if exactly BYTE_CNT bytes are successfully read.
1220 Returns 0 if an immediate end-of-file is encountered.
1221 Returns -1 if an I/O error or a partial read occurs. */
1223 try_read_bytes (struct pcp_reader *r, void *buf, size_t byte_cnt)
1225 return read_bytes_internal (r, true, buf, byte_cnt);
1228 /* Reads a 16-bit signed integer from R and stores its value in host format in
1229 *X. Returns true if successful, otherwise false. */
1231 read_uint16 (struct pcp_reader *r, unsigned int *x)
1234 if (read_bytes (r, integer, sizeof integer) != 1)
1236 *x = integer_get (INTEGER_LSB_FIRST, integer, sizeof integer);
1240 /* Reads a 32-bit signed integer from R and stores its value in host format in
1241 *X. Returns true if successful, otherwise false. */
1243 read_uint32 (struct pcp_reader *r, unsigned int *x)
1246 if (read_bytes (r, integer, sizeof integer) != 1)
1248 *x = integer_get (INTEGER_LSB_FIRST, integer, sizeof integer);
1252 /* Reads exactly SIZE - 1 bytes into BUFFER
1253 and stores a null byte into BUFFER[SIZE - 1]. */
1255 read_string (struct pcp_reader *r, char *buffer, size_t size)
1260 ok = read_bytes (r, buffer, size - 1);
1262 buffer[size - 1] = '\0';
1266 /* Skips BYTES bytes forward in R. */
1268 skip_bytes (struct pcp_reader *r, size_t bytes)
1273 size_t chunk = MIN (sizeof buffer, bytes);
1274 if (!read_bytes (r, buffer, chunk))
1283 pcp_seek (struct pcp_reader *r, off_t offset)
1285 if (fseeko (r->file, offset, SEEK_SET))
1287 pcp_error (r, 0, _("%s: seek failed (%s)."),
1288 fh_get_file_name (r->fh), strerror (errno));
1295 /* Reads a 64-bit floating-point number from R and returns its
1296 value in host format. */
1298 read_float (struct pcp_reader *r, double *d)
1302 if (!read_bytes (r, number, sizeof number))
1306 *d = parse_float (number);
1312 parse_float (const uint8_t number[8])
1314 return (pcp_is_sysmis (number)
1316 : float_get_double (FLOAT_IEEE_DOUBLE_LE, number));
1320 pcp_is_sysmis(const uint8_t *p)
1322 static const uint8_t sysmis[8]
1323 = { 0xf5, 0x1e, 0x26, 0x02, 0x8a, 0x8c, 0xed, 0xff };
1324 return !memcmp (p, sysmis, 8);
1327 static const struct casereader_class pcp_file_casereader_class =
1329 pcp_file_casereader_read,
1330 pcp_file_casereader_destroy,
1335 const struct any_reader_class pcp_file_reader_class =
1337 N_("SPSS/PC+ System File"),