X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=src%2Fdata%2Fgnumeric-reader.c;h=5fee6919c0a51891f5240328606086cf331eb15b;hb=c64c9e72a7040c8b36aa8709848efc5c37b7b72e;hp=197c1d1a59f208981cbb9c75a008b995bde24e89;hpb=47d2b8fc4c255ea1bc2c2874f853a20895ed0494;p=pspp diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index 197c1d1a59..5fee6919c0 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -16,31 +16,8 @@ #include -#include "libpspp/message.h" -#include "libpspp/misc.h" - -#include "gl/minmax.h" -#include "gl/c-strtod.h" - -#include "gettext.h" -#define _(msgid) gettext (msgid) -#define N_(msgid) (msgid) - -#include "spreadsheet-reader.h" - -#if !GNM_SUPPORT - -struct casereader * -gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictionary **dict) -{ - msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric"); - - return NULL; -} - -#else - #include "data/gnumeric-reader.h" +#include "spreadsheet-reader.h" #include #include @@ -50,15 +27,43 @@ gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictio #include "data/case.h" #include "data/casereader-provider.h" +#include "data/data-in.h" #include "data/dictionary.h" +#include "data/format.h" #include "data/identifier.h" #include "data/value.h" #include "data/variable.h" #include "libpspp/i18n.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" #include "libpspp/str.h" +#include "gl/c-strtod.h" +#include "gl/minmax.h" #include "gl/xalloc.h" +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +/* Shamelessly lifted from the Gnumeric sources: + https://git.gnome.org/browse/gnumeric/tree/src/value.h + */ +enum gnm_value_type +{ + VALUE_EMPTY = 10, + VALUE_BOOLEAN = 20, + VALUE_INTEGER = 30, /* Note, this was removed from gnumeric in 2006 - old versions may of + course still be around. New ones are supposed to use float.*/ + VALUE_FLOAT = 40, + VALUE_ERROR = 50, + VALUE_STRING = 60, + VALUE_CELLRANGE = 70, + VALUE_ARRAY = 80 +}; + + + static void gnm_file_casereader_destroy (struct casereader *, void *); static struct ccase *gnm_file_casereader_read (struct casereader *, void *); @@ -100,7 +105,7 @@ struct sheet_detail int maxrow; }; -struct state_data +struct state_data { /* The libxml reader for this instance */ xmlTextReaderPtr xtr; @@ -128,7 +133,6 @@ state_data_destroy (struct state_data *sd) struct gnumeric_reader { struct spreadsheet spreadsheet; - int ref_cnt; struct state_data rsd; struct state_data msd; @@ -137,7 +141,7 @@ struct gnumeric_reader int stop_col; int start_row; int stop_row; - + struct sheet_detail *sheets; const xmlChar *target_sheet; @@ -147,15 +151,17 @@ struct gnumeric_reader struct dictionary *dict; struct ccase *first_case; bool used_first_case; + + enum gnm_value_type vtype; }; void -gnumeric_destroy (struct spreadsheet *s) +gnumeric_unref (struct spreadsheet *s) { struct gnumeric_reader *r = (struct gnumeric_reader *) s; - if (0 == --r->ref_cnt) + if (0 == --s->ref_cnt) { int i; @@ -163,10 +169,15 @@ gnumeric_destroy (struct spreadsheet *s) { xmlFree (r->sheets[i].name); } - + + free (r->sheets); state_data_destroy (&r->msd); + dict_unref (r->dict); + + free (s->file_name); + free (r); } } @@ -178,7 +189,7 @@ gnumeric_get_sheet_name (struct spreadsheet *s, int n) struct gnumeric_reader *gr = (struct gnumeric_reader *) s; assert (n < s->n_sheets); - return gr->sheets[n].name; + return gr->sheets[n].name; } @@ -191,19 +202,19 @@ gnumeric_get_sheet_range (struct spreadsheet *s, int n) { int ret; struct gnumeric_reader *gr = (struct gnumeric_reader *) s; - + assert (n < s->n_sheets); - while ( + while ( (gr->sheets[n].stop_col == -1) - && + && (1 == (ret = xmlTextReaderRead (gr->msd.xtr))) ) { process_node (gr, &gr->msd); } - return create_cell_ref ( + return create_cell_range ( gr->sheets[n].start_col, gr->sheets[n].start_row, gr->sheets[n].stop_col, @@ -224,10 +235,10 @@ gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) if (r->first_case && ! r->used_first_case ) case_unref (r->first_case); - if (r->proto) + if (r->proto) caseproto_unref (r->proto); - gnumeric_destroy (&r->spreadsheet); + gnumeric_unref (&r->spreadsheet); } @@ -429,7 +440,7 @@ process_node (struct gnumeric_reader *r, struct state_data *sd) */ static void convert_xml_string_to_value (struct ccase *c, const struct variable *var, - const xmlChar *xv) + const xmlChar *xv, enum gnm_value_type type, int col, int row) { union value *v = case_data_rw (c, var); @@ -437,7 +448,7 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var, value_set_missing (v, var_get_width (var)); else if ( var_is_alpha (var)) value_copy_str_rpad (v, var_get_width (var), xv, ' '); - else + else if (type == VALUE_FLOAT || type == VALUE_INTEGER) { const char *text = CHAR_CAST (const char *, xv); char *endptr; @@ -447,6 +458,29 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var, if ( errno != 0 || endptr == text) v->f = SYSMIS; } + else + { + const char *text = CHAR_CAST (const char *, xv); + + const struct fmt_spec *fmt = var_get_write_format (var); + + char *m = data_in (ss_cstr (text), "UTF-8", + fmt->type, + v, + var_get_width (var), + "UTF-8"); + + if (m) + { + char buf [FMT_STRING_LEN_MAX + 1]; + char *cell = create_cell_ref (col, row); + + msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"), + cell, fmt_to_string (fmt, buf), m); + free (cell); + } + free (m); + } } struct var_spec @@ -454,6 +488,7 @@ struct var_spec char *name; int width; xmlChar *first_value; + int first_type; }; @@ -462,7 +497,7 @@ gnumeric_error_handler (void *ctx, const char *mesg, UNUSED xmlParserSeverities sev, xmlTextReaderLocatorPtr loc) { struct gnumeric_reader *r = ctx; - + msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"), "Gnumeric", r->spreadsheet.file_name, @@ -472,8 +507,8 @@ gnumeric_error_handler (void *ctx, const char *mesg, static struct gnumeric_reader * gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_errors) -{ - int ret; +{ + int ret = -1; struct state_data *sd; xmlTextReaderPtr xtr; @@ -509,15 +544,15 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro { r = xzalloc (sizeof *r); r->spreadsheet.n_sheets = -1; - r->spreadsheet.file_name = filename; + r->spreadsheet.file_name = strdup (filename); sd = &r->msd; } else { sd = &r->rsd; } - - if (show_errors) + + if (show_errors) xmlTextReaderSetErrorHandler (xtr, gnumeric_error_handler, r); r->target_sheet = NULL; @@ -526,7 +561,8 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro sd->row = sd->col = -1; sd->state = STATE_PRE_INIT; sd->xtr = xtr; - r->ref_cnt++; + r->spreadsheet.ref_cnt++; + /* Advance to the start of the workbook. This gives us some confidence that we are actually dealing with a gnumeric @@ -542,8 +578,7 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro if ( ret != 1) { /* Does not seem to be a gnumeric file */ - xmlFreeTextReader (sd->xtr); - free (r); + gnumeric_unref (&r->spreadsheet); return NULL; } @@ -556,7 +591,7 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro if ( XML_CHAR_ENCODING_UTF8 != xce) { - /* I have been told that ALL gnumeric files are UTF8 encoded. If that is correct, this + /* I have been told that ALL gnumeric files are UTF8 encoded. If that is correct, this can never happen. */ msg (MW, _("The gnumeric file `%s' is encoded as %s instead of the usual UTF-8 encoding. " "Any non-ascii characters will be incorrectly imported."), @@ -582,6 +617,7 @@ struct casereader * gnumeric_make_reader (struct spreadsheet *spreadsheet, const struct spreadsheet_read_options *opts) { + int type = 0; int x = 0; struct gnumeric_reader *r = NULL; unsigned long int vstart = 0; @@ -649,6 +685,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, n_cases --; } + /* Read in the first row of cells, including the headers if read_names was set */ while ( @@ -657,9 +694,29 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, ) { int idx; + + if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_TEXT) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); + + type = _xmlchar_to_int (attr); + + xmlFree (attr); + } + process_node (r, &r->rsd); - if ( r->rsd.row > r->start_row ) break; + if ( r->rsd.row > r->start_row ) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); + + r->vtype = _xmlchar_to_int (attr); + + xmlFree (attr); + break; + } if ( r->rsd.col < r->start_col || (r->stop_col != -1 && r->rsd.col > r->stop_col)) @@ -676,10 +733,13 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, var_spec [i].name = NULL; var_spec [i].width = -1; var_spec [i].first_value = NULL; + var_spec [i].first_type = -1; } n_var_specs = idx + 1 ; } + var_spec [idx].first_type = type; + if ( r->rsd.node_type == XML_READER_TYPE_TEXT ) { xmlChar *value = xmlTextReaderValue (r->rsd.xtr); @@ -711,7 +771,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, xmlChar *attr = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); - if ( NULL == attr || 60 != _xmlchar_to_int (attr)) + if ( NULL == attr || VALUE_STRING != _xmlchar_to_int (attr)) var_spec [idx].width = 0; free (attr); @@ -769,7 +829,10 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, var = dict_get_var (r->dict, x++); convert_xml_string_to_value (r->first_case, var, - var_spec[i].first_value); + var_spec[i].first_value, + var_spec[i].first_type, + r->rsd.col + i - 1, + r->rsd.row - 1); } for ( i = 0 ; i < n_var_specs ; ++i ) @@ -779,7 +842,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, } free (var_spec); - + return casereader_create_sequential (NULL, @@ -796,8 +859,6 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, } free (var_spec); - dict_destroy (spreadsheet->dict); - spreadsheet->dict = NULL; gnm_file_casereader_destroy (NULL, r); @@ -828,11 +889,22 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) if (r->start_col == -1) r->start_col = r->rsd.min_col; + while ((r->rsd.state == STATE_CELL || r->rsd.state == STATE_CELLS_START ) && r->rsd.row == current_row && (ret = xmlTextReaderRead (r->rsd.xtr))) { process_node (r, &r->rsd); + if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_ELEMENT) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); + + r->vtype = _xmlchar_to_int (attr); + + xmlFree (attr); + } + if ( r->rsd.col < r->start_col || (r->stop_col != -1 && r->rsd.col > r->stop_col)) continue; @@ -843,17 +915,17 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) if ( r->stop_row != -1 && r->rsd.row > r->stop_row) break; + if ( r->rsd.node_type == XML_READER_TYPE_TEXT ) { xmlChar *value = xmlTextReaderValue (r->rsd.xtr); - const int idx = r->rsd.col - r->start_col; - const struct variable *var = dict_get_var (r->dict, idx); - convert_xml_string_to_value (c, var, value); + convert_xml_string_to_value (c, var, value, r->vtype, + r->rsd.col, r->rsd.row); - free (value); + xmlFree (value); } } @@ -865,6 +937,3 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) return NULL; } } - - -#endif /* GNM_SUPPORT */