X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fgnumeric-reader.c;h=07c1814f8e4c2c7b349332b3abf021477824464e;hb=c7037d42254bb3c0e1dac2e1bd6ef95c6db8ba27;hp=4ddd80be00c6cc8225bbeacf97eb5656022c16ea;hpb=f758c0c7097e197d7b6fb9bb3c3d9d6e9138e0a7;p=pspp diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index 4ddd80be00..07c1814f8e 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -31,7 +31,7 @@ #if !GNM_SUPPORT struct casereader * -gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) +gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictionary **dict) { msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric"); @@ -48,6 +48,8 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic #include #include +#include "data/format.h" +#include "data/data-in.h" #include "data/case.h" #include "data/casereader-provider.h" #include "data/dictionary.h" @@ -59,10 +61,30 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic #include "gl/xalloc.h" + +/* Shamelessly lifted from the Gnumeric sources: + https://git.gnome.org/browse/gnumeric/tree/src/value.h + */ +enum gnm_value_type +{ + VALUE_EMPTY = 10, + VALUE_BOOLEAN = 20, + VALUE_INTEGER = 30, /* Note, this was removed from gnumeric in 2006 - old versions may of + course still be around. New ones are supposed to use float.*/ + VALUE_FLOAT = 40, + VALUE_ERROR = 50, + VALUE_STRING = 60, + VALUE_CELLRANGE = 70, + VALUE_ARRAY = 80 +}; + + + static void gnm_file_casereader_destroy (struct casereader *, void *); static struct ccase *gnm_file_casereader_read (struct casereader *, void *); + static const struct casereader_class gnm_file_casereader_class = { gnm_file_casereader_read, @@ -73,159 +95,348 @@ static const struct casereader_class gnm_file_casereader_class = enum reader_state { - STATE_INIT = 0, /* Initial state */ + STATE_PRE_INIT = 0, /* Initial state */ + STATE_SHEET_COUNT, /* Found the sheet index */ + STATE_INIT , /* Other Initial state */ STATE_SHEET_START, /* Found the start of a sheet */ STATE_SHEET_NAME, /* Found the sheet name */ STATE_MAXROW, + STATE_MAXCOL, STATE_SHEET_FOUND, /* Found the sheet that we actually want */ STATE_CELLS_START, /* Found the start of the cell array */ STATE_CELL /* Found a cell */ }; +struct sheet_detail +{ + /* The name of the sheet (utf8 encoding) */ + char *name; -struct gnumeric_reader + int start_col; + int stop_col; + int start_row; + int stop_row; + + int maxcol; + int maxrow; +}; + +struct state_data { + /* The libxml reader for this instance */ xmlTextReaderPtr xtr; + /* An internal state variable */ enum reader_state state; + + int node_type; + int current_sheet; + int row; int col; - int node_type; - int sheet_index; + int min_col; +}; - const xmlChar *target_sheet; - int target_sheet_index; - int start_row; +static void +state_data_destroy (struct state_data *sd) +{ + xmlFreeTextReader (sd->xtr); +} + + +struct gnumeric_reader +{ + struct spreadsheet spreadsheet; + int ref_cnt; + + struct state_data rsd; + struct state_data msd; + int start_col; - int stop_row; int stop_col; + int start_row; + int stop_row; + + struct sheet_detail *sheets; + + const xmlChar *target_sheet; + int target_sheet_index; struct caseproto *proto; struct dictionary *dict; struct ccase *first_case; bool used_first_case; + + enum gnm_value_type vtype; }; -static void process_node (struct gnumeric_reader *r); + +void +gnumeric_destroy (struct spreadsheet *s) +{ + struct gnumeric_reader *r = (struct gnumeric_reader *) s; + + if (0 == --r->ref_cnt) + { + int i; + + for (i = 0; i < s->n_sheets; ++i) + { + xmlFree (r->sheets[i].name); + } + + free (r->sheets); + state_data_destroy (&r->msd); + + free (r); + } +} + + +const char * +gnumeric_get_sheet_name (struct spreadsheet *s, int n) +{ + struct gnumeric_reader *gr = (struct gnumeric_reader *) s; + assert (n < s->n_sheets); + + return gr->sheets[n].name; +} + + +static void process_node (struct gnumeric_reader *r, struct state_data *sd); + + + +char * +gnumeric_get_sheet_range (struct spreadsheet *s, int n) +{ + int ret; + struct gnumeric_reader *gr = (struct gnumeric_reader *) s; + + assert (n < s->n_sheets); + + while ( + (gr->sheets[n].stop_col == -1) + && + (1 == (ret = xmlTextReaderRead (gr->msd.xtr))) + ) + { + process_node (gr, &gr->msd); + } + + return create_cell_range ( + gr->sheets[n].start_col, + gr->sheets[n].start_row, + gr->sheets[n].stop_col, + gr->sheets[n].stop_row); +} static void gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { struct gnumeric_reader *r = r_; + if ( r == NULL) return ; - if ( r->xtr) - xmlFreeTextReader (r->xtr); + state_data_destroy (&r->rsd); - if ( ! r->used_first_case ) + if (r->first_case && ! r->used_first_case ) case_unref (r->first_case); - caseproto_unref (r->proto); + if (r->proto) + caseproto_unref (r->proto); - free (r); + gnumeric_destroy (&r->spreadsheet); } + static void -process_node (struct gnumeric_reader *r) +process_node (struct gnumeric_reader *r, struct state_data *sd) { - xmlChar *name = xmlTextReaderName (r->xtr); + xmlChar *name = xmlTextReaderName (sd->xtr); if (name == NULL) name = xmlStrdup (_xml ("--")); + sd->node_type = xmlTextReaderNodeType (sd->xtr); - r->node_type = xmlTextReaderNodeType (r->xtr); - - switch ( r->state) + switch (sd->state) { + case STATE_PRE_INIT: + sd->current_sheet = -1; + if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && + XML_READER_TYPE_ELEMENT == sd->node_type) + { + sd->state = STATE_SHEET_COUNT; + } + break; + + case STATE_SHEET_COUNT: + if (0 == xmlStrcasecmp (name, _xml("gnm:SheetName")) && + XML_READER_TYPE_ELEMENT == sd->node_type) + { + ++sd->current_sheet; + if (sd->current_sheet + 1 > r->spreadsheet.n_sheets) + { + struct sheet_detail *detail ; + r->sheets = xrealloc (r->sheets, (sd->current_sheet + 1) * sizeof *r->sheets); + detail = &r->sheets[sd->current_sheet]; + detail->start_col = detail->stop_col = detail->start_row = detail->stop_row = -1; + detail->name = NULL; + r->spreadsheet.n_sheets = sd->current_sheet + 1; + } + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && + XML_READER_TYPE_END_ELEMENT == sd->node_type) + { + sd->state = STATE_INIT; + sd->current_sheet = -1; + } + else if (XML_READER_TYPE_TEXT == sd->node_type) + { + if ( r->sheets [r->spreadsheet.n_sheets - 1].name == NULL) + r->sheets [r->spreadsheet.n_sheets - 1].name = CHAR_CAST (char *, xmlTextReaderValue (sd->xtr)); + } + break; + case STATE_INIT: if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && - XML_READER_TYPE_ELEMENT == r->node_type) + XML_READER_TYPE_ELEMENT == sd->node_type) { - ++r->sheet_index; - r->state = STATE_SHEET_START; + ++sd->current_sheet; + sd->state = STATE_SHEET_START; } break; case STATE_SHEET_START: if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && - XML_READER_TYPE_ELEMENT == r->node_type) + XML_READER_TYPE_ELEMENT == sd->node_type) { - r->state = STATE_SHEET_NAME; + sd->state = STATE_SHEET_NAME; } break; case STATE_SHEET_NAME: if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) + XML_READER_TYPE_END_ELEMENT == sd->node_type) { - r->state = STATE_INIT; + sd->state = STATE_INIT; } - else if (XML_READER_TYPE_TEXT == r->node_type) + else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && + XML_READER_TYPE_END_ELEMENT == sd->node_type) + { + sd->state = STATE_INIT; + } + else if (XML_READER_TYPE_TEXT == sd->node_type) { - if ( r->target_sheet != NULL) + if ( r->target_sheet != NULL) { - xmlChar *value = xmlTextReaderValue (r->xtr); + xmlChar *value = xmlTextReaderValue (sd->xtr); if ( 0 == xmlStrcmp (value, r->target_sheet)) - r->state = STATE_SHEET_FOUND; + sd->state = STATE_SHEET_FOUND; free (value); } - else if (r->target_sheet_index == r->sheet_index) + else if (r->target_sheet_index == sd->current_sheet + 1) { - r->state = STATE_SHEET_FOUND; + sd->state = STATE_SHEET_FOUND; + } + else if (r->target_sheet_index == -1) + { + sd->state = STATE_SHEET_FOUND; } } break; case STATE_SHEET_FOUND: if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && - XML_READER_TYPE_ELEMENT == r->node_type) + XML_READER_TYPE_ELEMENT == sd->node_type) { - if (! xmlTextReaderIsEmptyElement (r->xtr)) - r->state = STATE_CELLS_START; + sd->min_col = INT_MAX; + if (! xmlTextReaderIsEmptyElement (sd->xtr)) + sd->state = STATE_CELLS_START; } else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxRow")) && - XML_READER_TYPE_ELEMENT == r->node_type) + XML_READER_TYPE_ELEMENT == sd->node_type) + { + sd->state = STATE_MAXROW; + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && + XML_READER_TYPE_ELEMENT == sd->node_type) { - r->state = STATE_MAXROW; + sd->state = STATE_MAXCOL; } else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) + XML_READER_TYPE_END_ELEMENT == sd->node_type) { - r->state = STATE_INIT; + sd->state = STATE_INIT; } break; case STATE_MAXROW: if (0 == xmlStrcasecmp (name, _xml("gnm:MaxRow")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) + XML_READER_TYPE_END_ELEMENT == sd->node_type) + { + sd->state = STATE_SHEET_FOUND; + } + else if (sd->node_type == XML_READER_TYPE_TEXT) { - r->state = STATE_SHEET_FOUND; + xmlChar *value = xmlTextReaderValue (sd->xtr); + r->sheets[sd->current_sheet].maxrow = _xmlchar_to_int (value); + xmlFree (value); } + break; + case STATE_MAXCOL: + if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && + XML_READER_TYPE_END_ELEMENT == sd->node_type) + { + sd->state = STATE_SHEET_FOUND; + } + else if (sd->node_type == XML_READER_TYPE_TEXT) + { + xmlChar *value = xmlTextReaderValue (sd->xtr); + r->sheets[sd->current_sheet].maxcol = _xmlchar_to_int (value); + xmlFree (value); + } + break; case STATE_CELLS_START: if (0 == xmlStrcasecmp (name, _xml ("gnm:Cell")) && - XML_READER_TYPE_ELEMENT == r->node_type) + XML_READER_TYPE_ELEMENT == sd->node_type) { xmlChar *attr = NULL; - r->state = STATE_CELL; - attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Col")); - r->col = _xmlchar_to_int (attr); + attr = xmlTextReaderGetAttribute (sd->xtr, _xml ("Col")); + sd->col = _xmlchar_to_int (attr); free (attr); - attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Row")); - r->row = _xmlchar_to_int (attr); + if (sd->col < sd->min_col) + sd->min_col = sd->col; + + attr = xmlTextReaderGetAttribute (sd->xtr, _xml ("Row")); + sd->row = _xmlchar_to_int (attr); free (attr); - } - else if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) - r->state = STATE_SHEET_NAME; + if (r->sheets[sd->current_sheet].start_row == -1) + { + r->sheets[sd->current_sheet].start_row = sd->row; + } + + if (r->sheets[sd->current_sheet].start_col == -1) + { + r->sheets[sd->current_sheet].start_col = sd->col; + } + if (! xmlTextReaderIsEmptyElement (sd->xtr)) + sd->state = STATE_CELL; + } + else if ( (0 == xmlStrcasecmp (name, _xml("gnm:Cells"))) && (XML_READER_TYPE_END_ELEMENT == sd->node_type) ) + { + r->sheets[sd->current_sheet].stop_col = sd->col; + r->sheets[sd->current_sheet].stop_row = sd->row; + sd->state = STATE_SHEET_NAME; + } break; case STATE_CELL: - if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) + if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && XML_READER_TYPE_END_ELEMENT == sd->node_type) { - r->state = STATE_CELLS_START; + sd->state = STATE_CELLS_START; } break; default: @@ -241,7 +452,7 @@ process_node (struct gnumeric_reader *r) */ static void convert_xml_string_to_value (struct ccase *c, const struct variable *var, - const xmlChar *xv) + const xmlChar *xv, enum gnm_value_type type, int col, int row) { union value *v = case_data_rw (c, var); @@ -249,7 +460,7 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var, value_set_missing (v, var_get_width (var)); else if ( var_is_alpha (var)) value_copy_str_rpad (v, var_get_width (var), xv, ' '); - else + else if (type == VALUE_FLOAT || type == VALUE_INTEGER) { const char *text = CHAR_CAST (const char *, xv); char *endptr; @@ -259,6 +470,29 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var, if ( errno != 0 || endptr == text) v->f = SYSMIS; } + else + { + const char *text = CHAR_CAST (const char *, xv); + + const struct fmt_spec *fmt = var_get_write_format (var); + + char *m = data_in (ss_cstr (text), "UTF-8", + fmt->type, + v, + var_get_width (var), + "UTF-8"); + + if (m) + { + char buf [FMT_STRING_LEN_MAX + 1]; + char *cell = create_cell_ref (col, row); + + msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"), + cell, fmt_to_string (fmt, buf), m); + free (cell); + } + free (m); + } } struct var_spec @@ -266,128 +500,266 @@ struct var_spec char *name; int width; xmlChar *first_value; + int first_type; }; -struct casereader * -gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) + +static void +gnumeric_error_handler (void *ctx, const char *mesg, + UNUSED xmlParserSeverities sev, xmlTextReaderLocatorPtr loc) { - unsigned long int vstart = 0; - int ret; - casenumber n_cases = CASENUMBER_MAX; - int i; - struct var_spec *var_spec = NULL; - int n_var_specs = 0; + struct gnumeric_reader *r = ctx; + + msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"), + "Gnumeric", + r->spreadsheet.file_name, + xmlTextReaderLocatorLineNumber (loc), + mesg); +} - struct gnumeric_reader *r = NULL; +static struct gnumeric_reader * +gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_errors) +{ + int ret = -1; + struct state_data *sd; - gzFile gz = gzopen (gri->file_name, "r"); + xmlTextReaderPtr xtr; + gzFile gz; + + assert (r == NULL || filename == NULL); - if ( NULL == gz) + if (filename) { - msg (ME, _("Error opening `%s' for reading as a Gnumeric file: %s."), - gri->file_name, strerror (errno)); + gz = gzopen (filename, "r"); + } + else + { + gz = gzopen (r->spreadsheet.file_name, "r"); + } - goto error; + if (NULL == gz) + return NULL; + + + xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, + (xmlInputCloseCallback) gzclose, gz, + NULL, NULL, + show_errors ? 0 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING) ); + + if (xtr == NULL) + { + gzclose (gz); + return NULL; + } + + if (r == NULL) + { + r = xzalloc (sizeof *r); + r->spreadsheet.n_sheets = -1; + r->spreadsheet.file_name = filename; + sd = &r->msd; + } + else + { + sd = &r->rsd; + } + + if (show_errors) + xmlTextReaderSetErrorHandler (xtr, gnumeric_error_handler, r); + + r->target_sheet = NULL; + r->target_sheet_index = -1; + + sd->row = sd->col = -1; + sd->state = STATE_PRE_INIT; + sd->xtr = xtr; + r->ref_cnt++; + + /* Advance to the start of the workbook. + This gives us some confidence that we are actually dealing with a gnumeric + spreadsheet. + */ + while ( (sd->state != STATE_INIT ) + && 1 == (ret = xmlTextReaderRead (sd->xtr))) + { + process_node (r, sd); } - r = xzalloc (sizeof *r); - r->xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, - (xmlInputCloseCallback) gzclose, gz, - NULL, NULL, 0); + if ( ret != 1) + { + /* Does not seem to be a gnumeric file */ + xmlFreeTextReader (sd->xtr); + free (r); + return NULL; + } - if ( r->xtr == NULL ) - goto error; + r->spreadsheet.type = SPREADSHEET_GNUMERIC; - if ( gri->cell_range ) + if (show_errors) { - if ( ! convert_cell_ref (gri->cell_range, + const xmlChar *enc = xmlTextReaderConstEncoding (sd->xtr); + xmlCharEncoding xce = xmlParseCharEncoding (CHAR_CAST (const char *, enc)); + + if ( XML_CHAR_ENCODING_UTF8 != xce) + { + /* I have been told that ALL gnumeric files are UTF8 encoded. If that is correct, this + can never happen. */ + msg (MW, _("The gnumeric file `%s' is encoded as %s instead of the usual UTF-8 encoding. " + "Any non-ascii characters will be incorrectly imported."), + r->spreadsheet.file_name, + enc); + } + } + + return r; +} + + +struct spreadsheet * +gnumeric_probe (const char *filename, bool report_errors) +{ + struct gnumeric_reader *r = gnumeric_reopen (NULL, filename, report_errors); + + return &r->spreadsheet; +} + + +struct casereader * +gnumeric_make_reader (struct spreadsheet *spreadsheet, + const struct spreadsheet_read_options *opts) +{ + int type = 0; + int x = 0; + struct gnumeric_reader *r = NULL; + unsigned long int vstart = 0; + int ret; + casenumber n_cases = CASENUMBER_MAX; + int i; + struct var_spec *var_spec = NULL; + int n_var_specs = 0; + + r = (struct gnumeric_reader *) (spreadsheet); + + r = gnumeric_reopen (r, NULL, true); + + if ( opts->cell_range ) + { + if ( ! convert_cell_ref (opts->cell_range, &r->start_col, &r->start_row, &r->stop_col, &r->stop_row)) { msg (SE, _("Invalid cell range `%s'"), - gri->cell_range); + opts->cell_range); goto error; } } else { - r->start_col = 0; + r->start_col = -1; r->start_row = 0; r->stop_col = -1; r->stop_row = -1; } - r->state = STATE_INIT; - r->target_sheet = BAD_CAST gri->sheet_name; - r->target_sheet_index = gri->sheet_index; - r->row = r->col = -1; - r->sheet_index = 0; + r->target_sheet = BAD_CAST opts->sheet_name; + r->target_sheet_index = opts->sheet_index; + r->rsd.row = r->rsd.col = -1; + r->rsd.current_sheet = -1; + r->first_case = NULL; + r->proto = NULL; /* Advance to the start of the cells for the target sheet */ - while ( (r->state != STATE_CELL || r->row < r->start_row ) - && 1 == (ret = xmlTextReaderRead (r->xtr))) + while ( (r->rsd.state != STATE_CELL || r->rsd.row < r->start_row ) + && 1 == (ret = xmlTextReaderRead (r->rsd.xtr))) { xmlChar *value ; - process_node (r); - value = xmlTextReaderValue (r->xtr); + process_node (r, &r->rsd); + value = xmlTextReaderValue (r->rsd.xtr); - if ( r->state == STATE_MAXROW && r->node_type == XML_READER_TYPE_TEXT) + if ( r->rsd.state == STATE_MAXROW && r->rsd.node_type == XML_READER_TYPE_TEXT) { n_cases = 1 + _xmlchar_to_int (value) ; } free (value); } - /* If a range has been given, then use that to calculate the number of cases */ - if ( gri->cell_range) + if ( opts->cell_range) { n_cases = MIN (n_cases, r->stop_row - r->start_row + 1); } - if ( gri->read_names ) + if ( opts->read_names ) { r->start_row++; n_cases --; } + /* Read in the first row of cells, including the headers if read_names was set */ while ( - (( r->state == STATE_CELLS_START && r->row <= r->start_row) || r->state == STATE_CELL ) - && (ret = xmlTextReaderRead (r->xtr)) + (( r->rsd.state == STATE_CELLS_START && r->rsd.row <= r->start_row) || r->rsd.state == STATE_CELL ) + && (ret = xmlTextReaderRead (r->rsd.xtr)) ) { int idx; - process_node (r); - if ( r->row > r->start_row ) break; + if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_TEXT) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); - if ( r->col < r->start_col || - (r->stop_col != -1 && r->col > r->stop_col)) + type = _xmlchar_to_int (attr); + + xmlFree (attr); + } + + process_node (r, &r->rsd); + + if ( r->rsd.row > r->start_row ) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); + + r->vtype = _xmlchar_to_int (attr); + + xmlFree (attr); + break; + } + + if ( r->rsd.col < r->start_col || + (r->stop_col != -1 && r->rsd.col > r->stop_col)) continue; - idx = r->col - r->start_col; + idx = r->rsd.col - r->start_col; if ( idx >= n_var_specs ) { + int i; + var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1)); + for (i = n_var_specs; i <= idx; ++i) + { + var_spec [i].name = NULL; + var_spec [i].width = -1; + var_spec [i].first_value = NULL; + var_spec [i].first_type = -1; + } n_var_specs = idx + 1 ; - var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs); - var_spec [idx].name = NULL; - var_spec [idx].width = -1; - var_spec [idx].first_value = NULL; } - if ( r->node_type == XML_READER_TYPE_TEXT ) + var_spec [idx].first_type = type; + + if ( r->rsd.node_type == XML_READER_TYPE_TEXT ) { - xmlChar *value = xmlTextReaderValue (r->xtr); + xmlChar *value = xmlTextReaderValue (r->rsd.xtr); const char *text = CHAR_CAST (const char *, value); - if ( r->row < r->start_row) + if ( r->rsd.row < r->start_row) { - if ( gri->read_names ) + if ( opts->read_names ) { var_spec [idx].name = xstrdup (text); } @@ -397,21 +769,21 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic var_spec [idx].first_value = xmlStrdup (value); if (-1 == var_spec [idx].width ) - var_spec [idx].width = (gri->asw == -1) ? - ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : gri->asw; + var_spec [idx].width = (opts->asw == -1) ? + ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : opts->asw; } free (value); } - else if ( r->node_type == XML_READER_TYPE_ELEMENT - && r->state == STATE_CELL) + else if ( r->rsd.node_type == XML_READER_TYPE_ELEMENT + && r->rsd.state == STATE_CELL) { - if ( r->row == r->start_row ) + if ( r->rsd.row == r->start_row ) { xmlChar *attr = - xmlTextReaderGetAttribute (r->xtr, _xml ("ValueType")); + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); - if ( NULL == attr || 60 != _xmlchar_to_int (attr)) + if ( NULL == attr || VALUE_STRING != _xmlchar_to_int (attr)) var_spec [idx].width = 0; free (attr); @@ -420,17 +792,20 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } { - const xmlChar *enc = xmlTextReaderConstEncoding (r->xtr); + const xmlChar *enc = xmlTextReaderConstEncoding (r->rsd.xtr); if ( enc == NULL) goto error; /* Create the dictionary and populate it */ - *dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); + spreadsheet->dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); } for (i = 0 ; i < n_var_specs ; ++i ) { char *name; + if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) + continue; + /* Probably no data exists for this variable, so allocate a default width */ if ( var_spec[i].width == -1 ) @@ -447,7 +822,7 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic if ( n_var_specs == 0 ) { msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), - gri->file_name); + spreadsheet->file_name); goto error; } @@ -455,12 +830,21 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic r->first_case = case_create (r->proto); case_set_missing (r->first_case); + for ( i = 0 ; i < n_var_specs ; ++i ) { - const struct variable *var = dict_get_var (r->dict, i); + const struct variable *var; + + if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) + continue; + var = dict_get_var (r->dict, x++); + convert_xml_string_to_value (r->first_case, var, - var_spec[i].first_value); + var_spec[i].first_value, + var_spec[i].first_type, + r->rsd.col + i - 1, + r->rsd.row - 1); } for ( i = 0 ; i < n_var_specs ; ++i ) @@ -470,6 +854,7 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } free (var_spec); + return casereader_create_sequential (NULL, @@ -486,7 +871,8 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } free (var_spec); - dict_destroy (*dict); + dict_destroy (spreadsheet->dict); + spreadsheet->dict = NULL; gnm_file_casereader_destroy (NULL, r); @@ -503,7 +889,7 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) int ret = 0; struct gnumeric_reader *r = r_; - int current_row = r->row; + int current_row = r->rsd.row; if ( !r->used_first_case ) { @@ -514,34 +900,47 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) c = case_create (r->proto); case_set_missing (c); - while ((r->state == STATE_CELL || r->state == STATE_CELLS_START ) - && r->row == current_row && (ret = xmlTextReaderRead (r->xtr))) + if (r->start_col == -1) + r->start_col = r->rsd.min_col; + + + while ((r->rsd.state == STATE_CELL || r->rsd.state == STATE_CELLS_START ) + && r->rsd.row == current_row && (ret = xmlTextReaderRead (r->rsd.xtr))) { - process_node (r); + process_node (r, &r->rsd); + + if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_ELEMENT) + { + xmlChar *attr = + xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType")); + + r->vtype = _xmlchar_to_int (attr); + + xmlFree (attr); + } - if ( r->col < r->start_col || (r->stop_col != -1 && - r->col > r->stop_col)) + if ( r->rsd.col < r->start_col || (r->stop_col != -1 && + r->rsd.col > r->stop_col)) continue; - if ( r->col - r->start_col >= caseproto_get_n_widths (r->proto)) + if ( r->rsd.col - r->start_col >= caseproto_get_n_widths (r->proto)) continue; - if ( r->stop_row != -1 && r->row > r->stop_row) + if ( r->stop_row != -1 && r->rsd.row > r->stop_row) break; - if ( r->node_type == XML_READER_TYPE_TEXT ) - { - xmlChar *value = xmlTextReaderValue (r->xtr); - - const int idx = r->col - r->start_col; + if ( r->rsd.node_type == XML_READER_TYPE_TEXT ) + { + xmlChar *value = xmlTextReaderValue (r->rsd.xtr); + const int idx = r->rsd.col - r->start_col; const struct variable *var = dict_get_var (r->dict, idx); - convert_xml_string_to_value (c, var, value); + convert_xml_string_to_value (c, var, value, r->vtype, + r->rsd.col, r->rsd.row); - free (value); + xmlFree (value); } - } if (ret == 1)