X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=07471e8831990ae4d757b2e5a424e3182e5ef013;hb=refs%2Fbuilds%2F20120402030503%2Fpspp;hp=336549b7614248bee9ebaf7077dbcd1b0f2add1d;hpb=261869b71aa60b8974c4a6b98e35b74af5d11de5;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 336549b761..07471e8831 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -88,6 +88,7 @@ enum /* Fields from the top-level header record. */ struct sfm_header_record { + char magic[5]; /* First 4 bytes of file, then null. */ int weight_idx; /* 0 if unweighted, otherwise a var index. */ int nominal_case_size; /* Number of var positions. */ @@ -213,6 +214,7 @@ static void skip_extension_record (struct sfm_reader *, int subtype); static const char *choose_encoding ( struct sfm_reader *, + const struct sfm_header_record *, const struct sfm_extension_record *ext_integer, const struct sfm_extension_record *ext_encoding); @@ -310,12 +312,17 @@ sfm_read_info_destroy (struct sfm_read_info *info) /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. + Ordinarily the reader attempts to automatically detect the character + encoding based on the file's contents. This isn't always possible, + especially for files written by old versions of SPSS or PSPP, so specifying + a nonnull ENCODING overrides the choice of character encoding. + If INFO is non-null, then it receives additional info about the system file, which the caller must eventually free with sfm_read_info_destroy() when it is no longer needed. */ struct casereader * -sfm_open_reader (struct file_handle *fh, struct dictionary **dictp, - struct sfm_read_info *infop) +sfm_open_reader (struct file_handle *fh, const char *volatile encoding, + struct dictionary **dictp, struct sfm_read_info *infop) { struct sfm_reader *volatile r = NULL; struct sfm_read_info info; @@ -452,8 +459,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dictp, First, figure out the correct character encoding, because this determines how the rest of the header data is to be interpreted. */ - dict = dict_create (choose_encoding (r, extensions[EXT_INTEGER], - extensions[EXT_ENCODING])); + dict = dict_create (encoding + ? encoding + : choose_encoding (r, &header, extensions[EXT_INTEGER], + extensions[EXT_ENCODING])); r->encoding = dict_get_encoding (dict); /* These records don't use variables at all. */ @@ -598,13 +607,13 @@ sys_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) bool sfm_detect (FILE *file) { - char rec_type[5]; + char magic[5]; - if (fread (rec_type, 4, 1, file) != 1) + if (fread (magic, 4, 1, file) != 1) return false; - rec_type[4] = '\0'; + magic[4] = '\0'; - return !strcmp ("$FL2", rec_type); + return !strcmp (ASCII_MAGIC, magic) || !strcmp (EBCDIC_MAGIC, magic); } /* Reads the global header of the system file. Initializes *HEADER and *INFO, @@ -614,14 +623,14 @@ static void read_header (struct sfm_reader *r, struct sfm_read_info *info, struct sfm_header_record *header) { - char rec_type[5]; uint8_t raw_layout_code[4]; uint8_t raw_bias[8]; - read_string (r, rec_type, sizeof rec_type); + read_string (r, header->magic, sizeof header->magic); read_string (r, header->eye_catcher, sizeof header->eye_catcher); - if (strcmp ("$FL2", rec_type) != 0) + if (strcmp (ASCII_MAGIC, header->magic) + && strcmp (EBCDIC_MAGIC, header->magic)) sys_error (r, 0, _("This is not an SPSS system file.")); /* Identify integer format. */ @@ -1185,6 +1194,7 @@ parse_machine_integer_info (struct sfm_reader *r, static const char * choose_encoding (struct sfm_reader *r, + const struct sfm_header_record *header, const struct sfm_extension_record *ext_integer, const struct sfm_extension_record *ext_encoding) { @@ -1223,6 +1233,10 @@ choose_encoding (struct sfm_reader *r, } } + /* If the file magic number is EBCDIC then its character data is too. */ + if (!strcmp (header->magic, EBCDIC_MAGIC)) + return "EBCDIC-US"; + return locale_charset (); } @@ -2425,7 +2439,7 @@ text_parse_counted_string (struct sfm_reader *r, struct text_record *text) start = text->pos; n = 0; - for (;;) + while (text->pos < text->buffer.length) { int c = text->buffer.string[text->pos]; if (c < '0' || c > '9') @@ -2433,7 +2447,7 @@ text_parse_counted_string (struct sfm_reader *r, struct text_record *text) n = (n * 10) + (c - '0'); text->pos++; } - if (start == text->pos) + if (text->pos >= text->buffer.length || start == text->pos) { sys_warn (r, text->start, _("Expecting digit at offset %zu in MRSETS record."),