X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fpor-file-reader.c;h=94a2faf33b1ff4849c1d19a7f4801f6ec28d4b28;hb=refs%2Fheads%2Fctables12;hp=cd8b213e8d74062bffd5b6e344a1d29047b7f9a9;hpb=8f04b0ced35a66cfdebefbcb53c81979add36ca3;p=pspp diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index cd8b213e8d..94a2faf33b 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012, 2013, 2014, 2015 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -15,7 +15,6 @@ along with this program. If not, see . */ #include -#include "por-file-reader.h" #include #include @@ -26,25 +25,27 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "minmax.h" -#include "xalloc.h" +#include "data/any-reader.h" +#include "data/casereader-provider.h" +#include "data/casereader.h" +#include "data/dictionary.h" +#include "data/file-handle-def.h" +#include "data/file-name.h" +#include "data/format.h" +#include "data/missing-values.h" +#include "data/short-names.h" +#include "data/value-labels.h" +#include "data/variable.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/pool.h" +#include "libpspp/str.h" + +#include "gl/minmax.h" +#include "gl/xalloc.h" +#include "gl/xmemdup0.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -63,17 +64,20 @@ static const char portable_to_local[256] = /* Portable file reader. */ struct pfm_reader { + struct any_reader any_reader; struct pool *pool; /* All the portable file state. */ jmp_buf bail_out; /* longjmp() target for error handling. */ + struct dictionary *dict; + struct any_read_info info; struct file_handle *fh; /* File handle. */ struct fh_lock *lock; /* Read lock for file. */ FILE *file; /* File stream. */ int line_length; /* Number of characters so far on this line. */ char cc; /* Current character. */ char *trans; /* 256-byte character set translation table. */ - int var_cnt; /* Number of variables. */ + int n_vars; /* Number of variables. */ int weight_index; /* 0-based index of weight variable, or -1. */ struct caseproto *proto; /* Format of output cases. */ bool ok; /* Set false on I/O error. */ @@ -81,6 +85,13 @@ struct pfm_reader static const struct casereader_class por_file_casereader_class; +static struct pfm_reader * +pfm_reader_cast (const struct any_reader *r_) +{ + assert (r_->klass == &por_file_reader_class); + return UP_CAST (r_, struct pfm_reader, any_reader); +} + static void error (struct pfm_reader *r, const char *msg,...) PRINTF_FORMAT (2, 3) @@ -91,24 +102,23 @@ error (struct pfm_reader *r, const char *msg,...) static void error (struct pfm_reader *r, const char *msg, ...) { - struct msg m; struct string text; va_list args; ds_init_empty (&text); - ds_put_format (&text, _("portable file %s corrupt at offset 0x%lx: "), - fh_get_file_name (r->fh), ftell (r->file)); + ds_put_format (&text, _("portable file %s corrupt at offset 0x%llx: "), + fh_get_file_name (r->fh), (long long int) ftello (r->file)); va_start (args, msg); ds_put_vformat (&text, msg, args); va_end (args); - m.category = MSG_GENERAL; - m.severity = MSG_ERROR; - m.where.file_name = NULL; - m.where.line_number = 0; - m.text = ds_cstr (&text); - - msg_emit (&m); + struct msg *m = xmalloc (sizeof *m); + *m = (struct msg) { + .category = MSG_C_GENERAL, + .severity = MSG_S_ERROR, + .text = ds_steal_cstr (&text), + }; + msg_emit (m); r->ok = false; @@ -120,40 +130,40 @@ error (struct pfm_reader *r, const char *msg, ...) static void warning (struct pfm_reader *r, const char *msg, ...) { - struct msg m; struct string text; va_list args; ds_init_empty (&text); - ds_put_format (&text, _("reading portable file %s at offset 0x%lx: "), - fh_get_file_name (r->fh), ftell (r->file)); + ds_put_format (&text, _("reading portable file %s at offset 0x%llx: "), + fh_get_file_name (r->fh), (long long int) ftello (r->file)); va_start (args, msg); ds_put_vformat (&text, msg, args); va_end (args); - m.category = MSG_GENERAL; - m.severity = MSG_WARNING; - m.where.file_name = NULL; - m.where.line_number = 0; - m.text = ds_cstr (&text); - - msg_emit (&m); + struct msg *m = xmalloc (sizeof *m); + *m = (struct msg) { + .category = MSG_C_GENERAL, + .severity = MSG_S_WARNING, + .text = ds_steal_cstr (&text), + }; + msg_emit (m); } /* Close and destroy R. Returns false if an error was detected on R, true otherwise. */ static bool -close_reader (struct pfm_reader *r) +pfm_close (struct any_reader *r_) { + struct pfm_reader *r = pfm_reader_cast (r_); bool ok; - if (r == NULL) - return true; + dict_unref (r->dict); + any_read_info_destroy (&r->info); if (r->file) { - if (fn_close (fh_get_file_name (r->fh), r->file) == EOF) + if (fn_close (r->fh, r->file) == EOF) { - msg (ME, _("Error closing portable file \"%s\": %s."), + msg (ME, _("Error closing portable file `%s': %s."), fh_get_file_name (r->fh), strerror (errno)); r->ok = false; } @@ -174,7 +184,7 @@ static void por_file_casereader_destroy (struct casereader *reader, void *r_) { struct pfm_reader *r = r_; - if (!close_reader (r)) + if (!pfm_close (&r->any_reader)) casereader_force_error (reader); } @@ -228,7 +238,7 @@ match (struct pfm_reader *r, int c) } static void read_header (struct pfm_reader *); -static void read_version_data (struct pfm_reader *, struct pfm_read_info *); +static void read_version_data (struct pfm_reader *, struct any_read_info *); static void read_variables (struct pfm_reader *, struct dictionary *); static void read_value_label (struct pfm_reader *, struct dictionary *); static void read_documents (struct pfm_reader *, struct dictionary *); @@ -236,18 +246,18 @@ static void read_documents (struct pfm_reader *, struct dictionary *); /* Reads the dictionary from file with handle H, and returns it in a dictionary structure. This dictionary may be modified in order to rename, reorder, and delete variables, etc. */ -struct casereader * -pfm_open_reader (struct file_handle *fh, struct dictionary **dict, - struct pfm_read_info *info) +static struct any_reader * +pfm_open (struct file_handle *fh) { struct pool *volatile pool = NULL; struct pfm_reader *volatile r = NULL; - *dict = dict_create (); - /* Create and initialize reader. */ pool = pool_create (); r = pool_alloc (pool, sizeof *r); + r->any_reader.klass = &por_file_reader_class; + r->dict = dict_create (get_default_encoding ()); + memset (&r->info, 0, sizeof r->info); r->pool = pool; r->fh = fh_ref (fh); r->lock = NULL; @@ -255,7 +265,7 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->line_length = 0; r->weight_index = -1; r->trans = NULL; - r->var_cnt = 0; + r->n_vars = 0; r->proto = NULL; r->ok = true; if (setjmp (r->bail_out)) @@ -269,10 +279,10 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, goto error; /* Open file. */ - r->file = fn_open (fh_get_file_name (r->fh), "rb"); + r->file = fn_open (r->fh, "rb"); if (r->file == NULL) { - msg (ME, _("An error occurred while opening \"%s\" for reading " + msg (ME, _("An error occurred while opening `%s' for reading " "as a portable file: %s."), fh_get_file_name (r->fh), strerror (errno)); goto error; @@ -280,31 +290,47 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict, /* Read header, version, date info, product id, variables. */ read_header (r); - read_version_data (r, info); - read_variables (r, *dict); + read_version_data (r, &r->info); + read_variables (r, r->dict); /* Read value labels. */ while (match (r, 'D')) - read_value_label (r, *dict); + read_value_label (r, r->dict); /* Read documents. */ if (match (r, 'E')) - read_documents (r, *dict); + read_documents (r, r->dict); /* Check that we've made it to the data. */ if (!match (r, 'F')) error (r, _("Data record expected.")); - r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); - return casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, - &por_file_casereader_class, r); + r->proto = caseproto_ref_pool (dict_get_proto (r->dict), r->pool); + return &r->any_reader; error: - close_reader (r); - dict_destroy (*dict); - *dict = NULL; + pfm_close (&r->any_reader); return NULL; } + +static struct casereader * +pfm_decode (struct any_reader *r_, const char *encoding UNUSED, + struct dictionary **dictp, struct any_read_info *info) +{ + struct pfm_reader *r = pfm_reader_cast (r_); + + *dictp = r->dict; + r->dict = NULL; + + if (info) + { + *info = r->info; + memset (&r->info, 0, sizeof r->info); + } + + return casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, + &por_file_casereader_class, r); +} /* Returns the value of base-30 digit C, or -1 if C is not a base-30 digit. */ @@ -528,11 +554,11 @@ read_header (struct pfm_reader *r) /* Reads the version and date info record, as well as product and subproduct identification records if present. */ static void -read_version_data (struct pfm_reader *r, struct pfm_read_info *info) +read_version_data (struct pfm_reader *r, struct any_read_info *info) { static const char empty_string[] = ""; char *date, *time; - const char *product, *author, *subproduct; + const char *product, *subproduct; int i; /* Read file. */ @@ -541,7 +567,11 @@ read_version_data (struct pfm_reader *r, struct pfm_read_info *info) date = read_pool_string (r); time = read_pool_string (r); product = match (r, '1') ? read_pool_string (r) : empty_string; - author = match (r, '2') ? read_pool_string (r) : empty_string; + if (match (r, '2')) + { + /* Skip "author" field. */ + read_pool_string (r); + } subproduct = match (r, '3') ? read_pool_string (r) : empty_string; /* Validate file. */ @@ -553,16 +583,25 @@ read_version_data (struct pfm_reader *r, struct pfm_read_info *info) /* Save file info. */ if (info != NULL) { + memset (info, 0, sizeof *info); + + info->float_format = FLOAT_NATIVE_DOUBLE; + info->integer_format = INTEGER_NATIVE; + info->compression = ANY_COMP_NONE; + info->n_cases = -1; + /* Date. */ + info->creation_date = xmalloc (11); for (i = 0; i < 8; i++) { static const int map[] = {6, 7, 8, 9, 3, 4, 0, 1}; info->creation_date[map[i]] = date[i]; } info->creation_date[2] = info->creation_date[5] = ' '; - info->creation_date[10] = 0; + info->creation_date[10] = '\0'; /* Time. */ + info->creation_time = xmalloc (9); for (i = 0; i < 6; i++) { static const int map[] = {0, 1, 3, 4, 6, 7}; @@ -572,8 +611,8 @@ read_version_data (struct pfm_reader *r, struct pfm_read_info *info) info->creation_time[8] = 0; /* Product. */ - str_copy_trunc (info->product, sizeof info->product, product); - str_copy_trunc (info->subproduct, sizeof info->subproduct, subproduct); + info->product = xstrdup (product); + info->product_ext = xstrdup (subproduct); } } @@ -641,12 +680,12 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) if (!match (r, '4')) error (r, _("Expected variable count record.")); - r->var_cnt = read_int (r); - if (r->var_cnt <= 0) - error (r, _("Invalid number of variables %d."), r->var_cnt); + r->n_vars = read_int (r); + if (r->n_vars <= 0) + error (r, _("Invalid number of variables %d."), r->n_vars); - /* Purpose of this value is unknown. It is typically 161. */ - read_int (r); + if (match (r, '5')) + read_int (r); if (match (r, '6')) { @@ -655,7 +694,7 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) error (r, _("Weight variable name (%s) truncated."), weight_name); } - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->n_vars; i++) { int width; char name[256]; @@ -677,7 +716,8 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) for (j = 0; j < 6; j++) fmt[j] = read_int (r); - if (!var_is_valid_name (name, false) || *name == '#' || *name == '$') + if (!dict_id_is_valid (dict, name, false) + || *name == '#' || *name == '$') error (r, _("Invalid variable name `%s' in position %d."), name, i); str_uppercase (name); @@ -687,17 +727,15 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) v = dict_create_var (dict, name, width); if (v == NULL) { - int i; - for (i = 1; i < 100000; i++) + unsigned long int i; + for (i = 1; ; i++) { - char try_name[VAR_NAME_LEN + 1]; - sprintf (try_name, "%.*s_%d", VAR_NAME_LEN - 6, name, i); + char *try_name = xasprintf ("%s_%lu", name, i); v = dict_create_var (dict, try_name, width); + free (try_name); if (v != NULL) break; } - if (v == NULL) - error (r, _("Duplicate variable name %s in position %d."), name, i); warning (r, _("Duplicate variable name %s in position %d renamed " "to %s."), name, i, var_get_name (v)); } @@ -739,7 +777,7 @@ read_variables (struct pfm_reader *r, struct dictionary *dict) { char label[256]; read_string (r, label); - var_set_label (v, label); + var_set_label (v, label); /* XXX */ } } @@ -821,15 +859,12 @@ read_value_label (struct pfm_reader *r, struct dictionary *dict) static void read_documents (struct pfm_reader *r, struct dictionary *dict) { - int line_cnt; - int i; - - line_cnt = read_int (r); - for (i = 0; i < line_cnt; i++) + int n_lines = read_int (r); + for (int i = 0; i < n_lines; i++) { char line[256]; read_string (r, line); - dict_add_document_line (dict, line); + dict_add_document_line (dict, line, false); } } @@ -858,12 +893,12 @@ por_file_casereader_read (struct casereader *reader, void *r_) return NULL; } - for (i = 0; i < r->var_cnt; i++) + for (i = 0; i < r->n_vars; i++) { int width = caseproto_get_width (r->proto, i); if (width == 0) - case_data_rw_idx (c, i)->f = read_float (r); + *case_num_rw_idx (c, i) = read_float (r); else { uint8_t buf[256]; @@ -875,24 +910,37 @@ por_file_casereader_read (struct casereader *reader, void *r_) return c; } -/* Returns true if FILE is an SPSS portable file, - false otherwise. */ -bool +/* Detects whether FILE is an SPSS portable file. Returns 1 if so, 0 if not, + and a negative errno value if there is an error reading FILE. */ +static int pfm_detect (FILE *file) { unsigned char header[464]; char trans[256]; - int cooked_cnt, raw_cnt; + int n_cooked, n_raws, line_len; int i; - cooked_cnt = raw_cnt = 0; - while (cooked_cnt < sizeof header) + n_cooked = n_raws = 0; + line_len = 0; + while (n_cooked < sizeof header) { int c = getc (file); - if (c == EOF || raw_cnt++ > 512) - return false; - else if (c != '\n' && c != '\r') - header[cooked_cnt++] = c; + if (c == EOF || n_raws++ > 512) + return ferror (file) ? -errno : 0; + else if (c == '\n') + { + while (line_len < 80 && n_cooked < sizeof header) + { + header[n_cooked++] = ' '; + line_len++; + } + line_len = 0; + } + else if (c != '\r') + { + header[n_cooked++] = c; + line_len++; + } } memset (trans, 0, 256); @@ -905,9 +953,9 @@ pfm_detect (FILE *file) for (i = 0; i < 8; i++) if (trans[header[i + 456]] != "SPSSPORT"[i]) - return false; + return 0; - return true; + return 1; } static const struct casereader_class por_file_casereader_class = @@ -917,3 +965,13 @@ static const struct casereader_class por_file_casereader_class = NULL, NULL, }; + +const struct any_reader_class por_file_reader_class = + { + N_("SPSS Portable File"), + pfm_detect, + pfm_open, + pfm_close, + pfm_decode, + NULL, /* get_strings */ + };