X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fsys-file-reader.c;h=3369edf738fd2cb10191fbf6e9856f987a25b4ab;hb=refs%2Fbuilds%2F20120730001945%2Fpspp;hp=024b4ae1827994d1d6d60a7820e4535295190f5d;hpb=dff37440177a355bfc0cf9ff56428114e29f5106;p=pspp diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 024b4ae182..3369edf738 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -312,12 +312,17 @@ sfm_read_info_destroy (struct sfm_read_info *info) /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. + Ordinarily the reader attempts to automatically detect the character + encoding based on the file's contents. This isn't always possible, + especially for files written by old versions of SPSS or PSPP, so specifying + a nonnull ENCODING overrides the choice of character encoding. + If INFO is non-null, then it receives additional info about the system file, which the caller must eventually free with sfm_read_info_destroy() when it is no longer needed. */ struct casereader * -sfm_open_reader (struct file_handle *fh, struct dictionary **dictp, - struct sfm_read_info *infop) +sfm_open_reader (struct file_handle *fh, const char *volatile encoding, + struct dictionary **dictp, struct sfm_read_info *infop) { struct sfm_reader *volatile r = NULL; struct sfm_read_info info; @@ -454,8 +459,10 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dictp, First, figure out the correct character encoding, because this determines how the rest of the header data is to be interpreted. */ - dict = dict_create (choose_encoding (r, &header, extensions[EXT_INTEGER], - extensions[EXT_ENCODING])); + dict = dict_create (encoding + ? encoding + : choose_encoding (r, &header, extensions[EXT_INTEGER], + extensions[EXT_ENCODING])); r->encoding = dict_get_encoding (dict); /* These records don't use variables at all. */ @@ -1584,7 +1591,6 @@ parse_long_var_name_map (struct sfm_reader *r, while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { /* Validate long name. */ - /* XXX need to reencode name to UTF-8 */ if (!dict_id_is_valid (dict, long_name, false)) { sys_warn (r, record->pos, @@ -2432,7 +2438,7 @@ text_parse_counted_string (struct sfm_reader *r, struct text_record *text) start = text->pos; n = 0; - for (;;) + while (text->pos < text->buffer.length) { int c = text->buffer.string[text->pos]; if (c < '0' || c > '9') @@ -2440,7 +2446,7 @@ text_parse_counted_string (struct sfm_reader *r, struct text_record *text) n = (n * 10) + (c - '0'); text->pos++; } - if (start == text->pos) + if (text->pos >= text->buffer.length || start == text->pos) { sys_warn (r, text->start, _("Expecting digit at offset %zu in MRSETS record."),