X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=23acd96fcaa1d7ad33d4a70f11c18842db4dfdc0;hb=0c335df036f26ba66ad3d6d4fd0fef6b7f8bf359;hp=d9d26d0a2d782aa726e8b69ceb1d6a6ac4bf1bde;hpb=b8f75b2ac6bc701ecacaa248d630918d7a7346e2;p=pspp-builds.git diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index d9d26d0a..23acd96f 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,8 +16,8 @@ #include -#include -#include +#include "data/sys-file-reader.h" +#include "data/sys-file-private.h" #include #include @@ -25,36 +25,36 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "c-ctype.h" -#include "inttostr.h" -#include "minmax.h" -#include "unlocked-io.h" -#include "xalloc.h" -#include "xsize.h" +#include "data/attributes.h" +#include "data/case.h" +#include "data/casereader-provider.h" +#include "data/casereader.h" +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/file-name.h" +#include "data/format.h" +#include "data/missing-values.h" +#include "data/mrset.h" +#include "data/short-names.h" +#include "data/value-labels.h" +#include "data/value.h" +#include "data/variable.h" +#include "libpspp/array.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" +#include "libpspp/stringi-set.h" + +#include "gl/c-ctype.h" +#include "gl/inttostr.h" +#include "gl/minmax.h" +#include "gl/unlocked-io.h" +#include "gl/xalloc.h" +#include "gl/xsize.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -100,6 +100,8 @@ static struct variable **make_var_by_value_idx (struct sfm_reader *, static struct variable *lookup_var_by_value_idx (struct sfm_reader *, struct variable **, int value_idx); +static struct variable *lookup_var_by_short_name (struct dictionary *, + const char *short_name); static void sys_msg (struct sfm_reader *r, int class, const char *format, va_list args) @@ -128,12 +130,19 @@ static void text_warn (struct sfm_reader *r, struct text_record *text, const char *format, ...) PRINTF_FORMAT (3, 4); static char *text_get_token (struct text_record *, - struct substring delimiters); + struct substring delimiters, char *delimiter); static bool text_match (struct text_record *, char c); +static bool text_read_variable_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); static bool text_read_short_name (struct sfm_reader *, struct dictionary *, struct text_record *, struct substring delimiters, struct variable **); +static const char *text_parse_counted_string (struct sfm_reader *, + struct text_record *); +static size_t text_pos (const struct text_record *); static bool close_reader (struct sfm_reader *r); @@ -169,6 +178,8 @@ static void read_machine_integer_info (struct sfm_reader *, ); static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); +static void read_mrsets (struct sfm_reader *, size_t size, size_t count, + struct dictionary *); static void read_display_parameters (struct sfm_reader *, size_t size, size_t count, struct dictionary *); @@ -282,7 +293,7 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->file = fn_open (fh_get_file_name (fh), "rb"); if (r->file == NULL) { - msg (ME, _("Error opening \"%s\" for reading as a system file: %s."), + msg (ME, _("Error opening `%s' for reading as a system file: %s."), fh_get_file_name (r->fh), strerror (errno)); goto error; } @@ -413,7 +424,7 @@ close_reader (struct sfm_reader *r) { if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) { - msg (ME, _("Error closing system file \"%s\": %s."), + msg (ME, _("Error closing system file `%s': %s."), fh_get_file_name (r->fh), strerror (errno)); r->error = true; } @@ -584,7 +595,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, /* Check variable name. */ if (name[0] == '$' || name[0] == '#') - sys_error (r, "Variable name begins with invalid character `%c'.", + sys_error (r, _("Variable name begins with invalid character `%c'."), name[0]); if (!var_is_plausible_name (name, false)) sys_error (r, _("Invalid variable name `%s'."), name); @@ -606,16 +617,20 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, sys_error (r, _("Variable label indicator field is not 0 or 1.")); if (has_variable_label == 1) { - size_t len; + size_t len, read_len; char label[255 + 1]; len = read_int (r); - if (len >= sizeof label) - sys_error (r, _("Variable %s has label of invalid length %zu."), - name, len); - read_string (r, label, len + 1); + + /* Read up to 255 bytes of label. */ + read_len = MIN (sizeof label - 1, len); + read_string (r, label, read_len + 1); var_set_label (var, label); + /* Skip unread label bytes. */ + skip_bytes (r, len - read_len); + + /* Skip label padding up to multiple of 4 bytes. */ skip_bytes (r, ROUND_UP (len, 4) - len); } @@ -729,7 +744,7 @@ parse_format_spec (struct sfm_reader *r, unsigned int s, else var_set_write_format (v, &f); } - else if (*++format_warning_cnt <= max_format_warnings) + else if (++*format_warning_cnt <= max_format_warnings) { char fmt_string[FMT_STRING_LEN_MAX + 1]; sys_warn (r, _("%s variable %s has invalid %s format %s."), @@ -825,8 +840,9 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, break; case 7: - /* Used by the MRSETS command. */ - break; + case 19: + read_mrsets (r, size, count, dict); + return; case 8: /* Used by the SPSS Data Entry software. */ @@ -873,7 +889,9 @@ read_extension_record (struct sfm_reader *r, struct dictionary *dict, return; default: - sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"), + sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send " + "a copy of this file, and the syntax which created it " + "to %s."), subtype, PACKAGE_BUGREPORT); break; } @@ -1007,6 +1025,168 @@ read_machine_float_info (struct sfm_reader *r, size_t size, size_t count) lowest, "LOWEST"); } +/* Read record type 7, subtype 7 or 19. */ +static void +read_mrsets (struct sfm_reader *r, size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text; + struct mrset *mrset; + + text = open_text_record (r, size * count); + for (;;) + { + const char *name, *label, *counted; + struct stringi_set var_names; + size_t allocated_vars; + char delimiter; + int width; + + mrset = xzalloc (sizeof *mrset); + + name = text_get_token (text, ss_cstr ("="), NULL); + if (name == NULL) + break; + mrset->name = xstrdup (name); + + if (mrset->name[0] != '$') + { + sys_warn (r, _("`%s' does not begin with `$' at offset %zu " + "in MRSETS record."), mrset->name, text_pos (text)); + break; + } + + if (text_match (text, 'C')) + { + mrset->type = MRSET_MC; + if (!text_match (text, ' ')) + { + sys_warn (r, _("Missing space following `%c' at offset %zu " + "in MRSETS record."), 'C', text_pos (text)); + break; + } + } + else if (text_match (text, 'D')) + { + mrset->type = MRSET_MD; + mrset->cat_source = MRSET_VARLABELS; + } + else if (text_match (text, 'E')) + { + char *number; + + mrset->type = MRSET_MD; + mrset->cat_source = MRSET_COUNTEDVALUES; + if (!text_match (text, ' ')) + { + sys_warn (r, _("Missing space following `%c' at offset %zu " + "in MRSETS record."), 'E', text_pos (text)); + break; + } + + number = text_get_token (text, ss_cstr (" "), NULL); + if (!strcmp (number, "11")) + mrset->label_from_var_label = true; + else if (strcmp (number, "1")) + sys_warn (r, _("Unexpected label source value `%s' " + "following `E' at offset %zu in MRSETS record."), + number, text_pos (text)); + } + else + { + sys_warn (r, _("Missing `C', `D', or `E' at offset %zu " + "in MRSETS record."), + text_pos (text)); + break; + } + + if (mrset->type == MRSET_MD) + { + counted = text_parse_counted_string (r, text); + if (counted == NULL) + break; + } + + label = text_parse_counted_string (r, text); + if (label == NULL) + break; + mrset->label = label[0] != '\0' ? xstrdup (label) : NULL; + + stringi_set_init (&var_names); + allocated_vars = 0; + width = INT_MAX; + do + { + struct variable *var; + const char *var_name; + + var_name = text_get_token (text, ss_cstr (" \n"), &delimiter); + if (var_name == NULL) + { + sys_warn (r, _("Missing new-line parsing variable names " + "at offset %zu in MRSETS record."), + text_pos (text)); + break; + } + + var = lookup_var_by_short_name (dict, var_name); + if (var == NULL) + continue; + if (!stringi_set_insert (&var_names, var_name)) + { + sys_warn (r, _("Duplicate variable name %s " + "at offset %zu in MRSETS record."), + var_name, text_pos (text)); + continue; + } + + if (mrset->label == NULL && mrset->label_from_var_label + && var_has_label (var)) + mrset->label = xstrdup (var_get_label (var)); + + if (mrset->n_vars + && var_get_type (var) != var_get_type (mrset->vars[0])) + { + sys_warn (r, _("MRSET %s contains both string and " + "numeric variables."), name); + continue; + } + width = MIN (width, var_get_width (var)); + + if (mrset->n_vars >= allocated_vars) + mrset->vars = x2nrealloc (mrset->vars, &allocated_vars, + sizeof *mrset->vars); + mrset->vars[mrset->n_vars++] = var; + } + while (delimiter != '\n'); + + if (mrset->n_vars < 2) + { + sys_warn (r, _("MRSET %s has only %zu variables."), mrset->name, + mrset->n_vars); + mrset_destroy (mrset); + continue; + } + + if (mrset->type == MRSET_MD) + { + mrset->width = width; + value_init (&mrset->counted, width); + if (width == 0) + mrset->counted.f = strtod (counted, NULL); + else + value_copy_str_rpad (&mrset->counted, width, + (const uint8_t *) counted, ' '); + } + + dict_add_mrset (dict, mrset); + mrset = NULL; + stringi_set_destroy (&var_names); + } + mrset_destroy (mrset); + close_text_record (r, text); +} + /* Read record type 7, subtype 11, which specifies how variables should be displayed in GUI environments. */ static void @@ -1161,7 +1341,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, if (length < 1 || length > MAX_STRING) { sys_warn (r, _("%s listed as string of invalid length %s " - "in very length string record."), + "in very long string record."), var_get_name (var), length_s); continue; } @@ -1191,7 +1371,7 @@ read_long_string_map (struct sfm_reader *r, size_t size, size_t count, var_set_short_name (var, i, var_get_short_name (seg, 0)); if (ROUND_UP (width, 8) != ROUND_UP (alloc_width, 8)) sys_error (r, _("Very long string with width %ld has segment %d " - "of width %d (expected %d)"), + "of width %d (expected %d)."), length, i, width, alloc_width); } dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); @@ -1331,7 +1511,7 @@ read_value_labels (struct sfm_reader *r, sys_warn (r, _("Duplicate value label for %g on %s."), label->value.f, var_get_name (v)); else - sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), + sys_warn (r, _("Duplicate value label for `%.*s' on %s."), max_width, value_str (&label->value, max_width), var_get_name (v)); } @@ -1355,7 +1535,7 @@ read_attributes (struct sfm_reader *r, struct text_record *text, int index; /* Parse the key. */ - key = text_get_token (text, ss_cstr ("(")); + key = text_get_token (text, ss_cstr ("("), NULL); if (key == NULL) return; @@ -1366,10 +1546,10 @@ read_attributes (struct sfm_reader *r, struct text_record *text, char *value; size_t length; - value = text_get_token (text, ss_cstr ("\n")); + value = text_get_token (text, ss_cstr ("\n"), NULL); if (value == NULL) { - text_warn (r, text, _("Error parsing attribute value %s[%d]"), + text_warn (r, text, _("Error parsing attribute value %s[%d]."), key, index); break; } @@ -1383,7 +1563,7 @@ read_attributes (struct sfm_reader *r, struct text_record *text, else { text_warn (r, text, - _("Attribute value %s[%d] is not quoted: %s"), + _("Attribute value %s[%d] is not quoted: %s."), key, index, value); attribute_add_value (attr, value); } @@ -1472,7 +1652,7 @@ read_long_string_value_labels (struct sfm_reader *r, { sys_warn (r, _("Ignoring long string value record for variable %s " "because the record's width (%d) does not match the " - "variable's width (%d)"), + "variable's width (%d)."), var_name, width, var_get_width (v)); skip_long_string_value_labels (r, n_labels); continue; @@ -1508,11 +1688,11 @@ read_long_string_value_labels (struct sfm_reader *r, first 255 bytes. The maximum documented length of a label is 120 bytes so this is more than generous. */ - skip_bytes (r, sizeof label - (label_length + 1)); + skip_bytes (r, (label_length + 1) - sizeof label); } if (!skip && !var_add_value_label (v, &value, label)) - sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), + sys_warn (r, _("Duplicate value label for `%.*s' on %s."), width, value_str (&value, width), var_get_name (v)); } } @@ -1530,7 +1710,7 @@ read_variable_attributes (struct sfm_reader *r, for (;;) { struct variable *var; - if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var)) + if (!text_read_variable_name (r, dict, text, ss_cstr (":"), &var)) break; read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL); } @@ -1595,11 +1775,11 @@ sys_file_casereader_read (struct casereader *reader, void *r_) return c; eof: - case_unref (c); if (i != 0) partial_record (r); if (r->case_cnt != -1) read_error (reader, r); + case_unref (c); return NULL; } @@ -1770,7 +1950,7 @@ read_compressed_string (struct sfm_reader *r, uint8_t *dst) { r->corruption_warning = true; sys_warn (r, _("Possible compressed data corruption: " - "string contains compressed integer (opcode %d)"), + "string contains compressed integer (opcode %d)."), opcode); } } @@ -1952,7 +2132,7 @@ read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) return false; - *value = text_get_token (text, ss_buffer ("\t\0", 2)); + *value = text_get_token (text, ss_buffer ("\t\0", 2), NULL); if (*value == NULL) return false; @@ -1964,18 +2144,39 @@ read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, } } +static bool +text_read_variable_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *name; + + name = text_get_token (text, delimiters, NULL); + if (name == NULL) + return false; + + *var = dict_lookup_var (dict, name); + if (*var != NULL) + return true; + + text_warn (r, text, _("Dictionary record refers to unknown variable %s."), + name); + return false; +} + + static bool text_read_short_name (struct sfm_reader *r, struct dictionary *dict, struct text_record *text, struct substring delimiters, struct variable **var) { - char *short_name = text_get_token (text, delimiters); + char *short_name = text_get_token (text, delimiters, NULL); if (short_name == NULL) return false; *var = lookup_var_by_short_name (dict, short_name); if (*var == NULL) - text_warn (r, text, _("Variable map refers to unknown variable %s."), + text_warn (r, text, _("Dictionary record refers to unknown variable %s."), short_name); return true; } @@ -1997,16 +2198,78 @@ text_warn (struct sfm_reader *r, struct text_record *text, } static char * -text_get_token (struct text_record *text, struct substring delimiters) +text_get_token (struct text_record *text, struct substring delimiters, + char *delimiter) { struct substring token; + char *end; if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) return NULL; - ss_data (token)[ss_length (token)] = '\0'; + + end = &ss_data (token)[ss_length (token)]; + if (delimiter != NULL) + *delimiter = *end; + *end = '\0'; return ss_data (token); } +/* Reads a integer value expressed in decimal, then a space, then a string that + consists of exactly as many bytes as specified by the integer, then a space, + from TEXT. Returns the string, null-terminated, as a subset of TEXT's + buffer (so the caller should not free the string). */ +static const char * +text_parse_counted_string (struct sfm_reader *r, struct text_record *text) +{ + size_t start; + size_t n; + char *s; + + start = text->pos; + n = 0; + for (;;) + { + int c = text->buffer.string[text->pos]; + if (c < '0' || c > '9') + break; + n = (n * 10) + (c - '0'); + text->pos++; + } + if (start == text->pos) + { + sys_warn (r, _("Expecting digit at offset %zu in MRSETS record."), + text->pos); + return NULL; + } + + if (!text_match (text, ' ')) + { + sys_warn (r, _("Expecting space at offset %zu in MRSETS record."), + text->pos); + return NULL; + } + + if (text->pos + n > text->buffer.length) + { + sys_warn (r, _("%zu-byte string starting at offset %zu " + "exceeds record length %zu."), + n, text->pos, text->buffer.length); + return NULL; + } + + s = &text->buffer.string[text->pos]; + if (s[n] != ' ') + { + sys_warn (r, + _("Expecting space at offset %zu following %zu-byte string."), + text->pos + n, n); + return NULL; + } + s[n] = '\0'; + text->pos += n + 1; + return s; +} + static bool text_match (struct text_record *text, char c) { @@ -2018,6 +2281,13 @@ text_match (struct text_record *text, char c) else return false; } + +/* Returns the current byte offset inside the TEXT's string. */ +static size_t +text_pos (const struct text_record *text) +{ + return text->pos; +} /* Messages. */ @@ -2029,14 +2299,16 @@ sys_msg (struct sfm_reader *r, int class, const char *format, va_list args) struct string text; ds_init_empty (&text); - ds_put_format (&text, "\"%s\" near offset 0x%lx: ", - fh_get_file_name (r->fh), (unsigned long) ftell (r->file)); + ds_put_format (&text, "`%s' near offset 0x%llx: ", + fh_get_file_name (r->fh), (long long int) ftello (r->file)); ds_put_vformat (&text, format, args); m.category = msg_class_to_category (class); m.severity = msg_class_to_severity (class); m.where.file_name = NULL; m.where.line_number = 0; + m.where.first_column = 0; + m.where.last_column = 0; m.text = ds_cstr (&text); msg_emit (&m);