X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fgnumeric-reader.c;h=514616ace74b59749cef5930344533a7bda51683;hb=613619667f835ee621a23c130f4aab28dcfe42d7;hp=aa0e161d4aff067d8261706cef2ff51a35a768d9;hpb=e37c7ed26e2f4ee22e2648b72f7a4fc8a5c6fa7e;p=pspp diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index aa0e161d4a..514616ace7 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,12 +28,10 @@ #include "spreadsheet-reader.h" -#include "c-xvasprintf.h" - #if !GNM_SUPPORT struct casereader * -gnumeric_open_reader (struct spreadsheet_read_info *gri, struct spreadsheet_read_options *opts, struct dictionary **dict) +gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictionary **dict) { msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric"); @@ -89,7 +87,16 @@ enum reader_state struct sheet_detail { - xmlChar *name; + /* The name of the sheet (utf8 encoding) */ + char *name; + + int start_col; + int stop_col; + int start_row; + int stop_row; + + int maxcol; + int maxrow; }; @@ -97,15 +104,17 @@ struct gnumeric_reader { struct spreadsheet spreadsheet; + /* The libxml reader for this instance */ xmlTextReaderPtr xtr; + /* An internal state variable */ enum reader_state state; int row; int col; int min_col; int node_type; - int sheet_index; + int current_sheet; int start_col; int stop_col; @@ -128,48 +137,68 @@ const char * gnumeric_get_sheet_name (struct spreadsheet *s, int n) { struct gnumeric_reader *gr = (struct gnumeric_reader *) s; - assert (n < s->sheets); + assert (n < s->n_sheets); - return gr->sheets[n].name; + return gr->sheets[n].name; } -const char * + +static void process_node (struct gnumeric_reader *r); + + + +char * gnumeric_get_sheet_range (struct spreadsheet *s, int n) { + int ret; struct gnumeric_reader *gr = (struct gnumeric_reader *) s; - assert (n < s->sheets); - - return "I havent the fogiest idea"; -} + + assert (n < s->n_sheets); + while ( + (gr->sheets[n].stop_col == -1) + && + (1 == (ret = xmlTextReaderRead (gr->xtr))) + ) + { + process_node (gr); + } -static void process_node (struct gnumeric_reader *r); + return create_cell_ref ( + gr->sheets[n].start_col, + gr->sheets[n].start_row, + gr->sheets[n].stop_col, + gr->sheets[n].stop_row); +} static void gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { - int i; struct gnumeric_reader *r = r_; if ( r == NULL) return ; if ( r->xtr) xmlFreeTextReader (r->xtr); + r->xtr = NULL; if ( ! r->used_first_case ) case_unref (r->first_case); caseproto_unref (r->proto); - for (i = 0; i < r->spreadsheet.sheets; ++i) +#if 0 + for (i = 0; i < r->spreadsheet.n_sheets; ++i) { xmlFree (r->sheets[i].name); } free (r->sheets); + free (r); +#endif } static void @@ -179,17 +208,16 @@ process_node (struct gnumeric_reader *r) if (name == NULL) name = xmlStrdup (_xml ("--")); - r->node_type = xmlTextReaderNodeType (r->xtr); - switch ( r->state) + switch (r->state) { case STATE_PRE_INIT: + r->current_sheet = -1; if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && XML_READER_TYPE_ELEMENT == r->node_type) { r->state = STATE_SHEET_COUNT; - r->spreadsheet.sheets = 0; } break; @@ -197,17 +225,25 @@ process_node (struct gnumeric_reader *r) if (0 == xmlStrcasecmp (name, _xml("gnm:SheetName")) && XML_READER_TYPE_ELEMENT == r->node_type) { - r->spreadsheet.sheets++; - r->sheets = xrealloc (r->sheets, r->spreadsheet.sheets * sizeof *r->sheets); + ++r->current_sheet; + if (r->current_sheet + 1 > r->spreadsheet.n_sheets) + { + struct sheet_detail *sd ; + r->sheets = xrealloc (r->sheets, (r->current_sheet + 1) * sizeof *r->sheets); + sd = &r->sheets[r->current_sheet]; + sd->start_col = sd->stop_col = sd->start_row = sd->stop_row = -1; + r->spreadsheet.n_sheets = r->current_sheet + 1; + } } else if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { r->state = STATE_INIT; + r->current_sheet = -1; } else if (XML_READER_TYPE_TEXT == r->node_type) { - r->sheets [r->spreadsheet.sheets - 1].name = xmlTextReaderValue (r->xtr); + r->sheets [r->spreadsheet.n_sheets - 1].name = CHAR_CAST (char *, xmlTextReaderValue (r->xtr)); } break; @@ -215,7 +251,7 @@ process_node (struct gnumeric_reader *r) if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && XML_READER_TYPE_ELEMENT == r->node_type) { - ++r->sheet_index; + ++r->current_sheet; r->state = STATE_SHEET_START; } break; @@ -232,16 +268,25 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_INIT; } + else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_INIT; + } else if (XML_READER_TYPE_TEXT == r->node_type) { - if ( r->target_sheet != NULL) + if ( r->target_sheet != NULL) { xmlChar *value = xmlTextReaderValue (r->xtr); if ( 0 == xmlStrcmp (value, r->target_sheet)) r->state = STATE_SHEET_FOUND; free (value); } - else if (r->target_sheet_index == r->sheet_index) + else if (r->target_sheet_index == r->current_sheet + 1) + { + r->state = STATE_SHEET_FOUND; + } + else if (r->target_sheet_index == -1) { r->state = STATE_SHEET_FOUND; } @@ -268,7 +313,7 @@ process_node (struct gnumeric_reader *r) else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { - r->state = STATE_INIT; + r->state = STATE_INIT; } break; case STATE_MAXROW: @@ -280,6 +325,7 @@ process_node (struct gnumeric_reader *r) else if (r->node_type == XML_READER_TYPE_TEXT) { xmlChar *value = xmlTextReaderValue (r->xtr); + r->sheets[r->current_sheet].maxrow = _xmlchar_to_int (value); xmlFree (value); } break; @@ -292,6 +338,7 @@ process_node (struct gnumeric_reader *r) else if (r->node_type == XML_READER_TYPE_TEXT) { xmlChar *value = xmlTextReaderValue (r->xtr); + r->sheets[r->current_sheet].maxcol = _xmlchar_to_int (value); xmlFree (value); } break; @@ -300,7 +347,6 @@ process_node (struct gnumeric_reader *r) XML_READER_TYPE_ELEMENT == r->node_type) { xmlChar *attr = NULL; - r->state = STATE_CELL; attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Col")); r->col = _xmlchar_to_int (attr); @@ -312,15 +358,28 @@ process_node (struct gnumeric_reader *r) attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Row")); r->row = _xmlchar_to_int (attr); free (attr); - } - else if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) - r->state = STATE_SHEET_NAME; + if (r->sheets[r->current_sheet].start_row == -1) + { + r->sheets[r->current_sheet].start_row = r->row; + } + + if (r->sheets[r->current_sheet].start_col == -1) + { + r->sheets[r->current_sheet].start_col = r->col; + } + if (! xmlTextReaderIsEmptyElement (r->xtr)) + r->state = STATE_CELL; + } + else if ( (0 == xmlStrcasecmp (name, _xml("gnm:Cells"))) && (XML_READER_TYPE_END_ELEMENT == r->node_type) ) + { + r->sheets[r->current_sheet].stop_col = r->col; + r->sheets[r->current_sheet].stop_row = r->row; + r->state = STATE_SHEET_NAME; + } break; case STATE_CELL: - if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) + if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { r->state = STATE_CELLS_START; } @@ -369,35 +428,72 @@ struct var_spec void gnumeric_destroy (struct spreadsheet *s) { - struct gnumeric_reader *r = (struct gnumeric *) s; gnm_file_casereader_destroy (NULL, s); } -struct spreadsheet * -gnumeric_probe (const char *filename) + +static void +gnumeric_error_handler (void *ctx, const char *mesg, + UNUSED xmlParserSeverities sev, xmlTextReaderLocatorPtr loc) { + struct gnumeric_reader *r = ctx; + + msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"), + "Gnumeric", + r->spreadsheet.file_name, + xmlTextReaderLocatorLineNumber (loc), + mesg); +} + +static struct gnumeric_reader * +gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_errors) +{ int ret; - struct gnumeric_reader *r = NULL; + xmlTextReaderPtr xtr; + gzFile gz; + + assert (r == NULL || filename == NULL); - gzFile gz = gzopen (filename, "r"); + if (r && r->xtr) + xmlFreeTextReader (r->xtr); + + if (filename) + gz = gzopen (filename, "r"); + else + gz = gzopen ( r->spreadsheet.file_name, "r"); if (NULL == gz) return NULL; + xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, - (xmlInputCloseCallback) gzclose, gz, - NULL, NULL, 0); + (xmlInputCloseCallback) gzclose, gz, + NULL, NULL, + show_errors ? 0 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING) ); if (xtr == NULL) - return NULL; + { + gzclose (gz); + return NULL; + } - r = xzalloc (sizeof *r); + if (r == NULL) + { + r = xzalloc (sizeof *r); + r->spreadsheet.n_sheets = -1; + r->spreadsheet.file_name = filename; + } - r->xtr = xtr; - r->spreadsheet.sheets = -1; - r->state = STATE_PRE_INIT; + if (show_errors) + xmlTextReaderSetErrorHandler (xtr, gnumeric_error_handler, r); + r->target_sheet = NULL; + r->target_sheet_index = -1; + + r->row = r->col = -1; + r->state = STATE_PRE_INIT; + r->xtr = xtr; /* Advance to the start of the workbook. This gives us some confidence that we are actually dealing with a gnumeric @@ -409,26 +505,51 @@ gnumeric_probe (const char *filename) process_node (r); } - if (ret != 1) + + if ( ret != 1) { - /* Not a gnumeric spreadsheet */ + /* Does not seem to be a gnumeric file */ + xmlFreeTextReader (r->xtr); free (r); - gzclose (gz); return NULL; } - + r->spreadsheet.type = SPREADSHEET_GNUMERIC; - r->spreadsheet.file_name = filename; - + + if (show_errors) + { + const xmlChar *enc = xmlTextReaderConstEncoding (r->xtr); + xmlCharEncoding xce = xmlParseCharEncoding (CHAR_CAST (const char *, enc)); + + if ( XML_CHAR_ENCODING_UTF8 != xce) + { + /* I have been told that ALL gnumeric files are UTF8 encoded. If that is correct, this + can never happen. */ + msg (MW, _("The gnumeric file `%s' is encoded as %s instead of the usual UTF-8 encoding. " + "Any non-ascii characters will be incorrectly imported."), + r->spreadsheet.file_name, + enc); + } + } + + return r; +} + + +struct spreadsheet * +gnumeric_probe (const char *filename, bool report_errors) +{ + struct gnumeric_reader *r = gnumeric_reopen (NULL, filename, report_errors); + return &r->spreadsheet; } struct casereader * gnumeric_make_reader (struct spreadsheet *spreadsheet, - const struct spreadsheet_read_info *gri, - struct spreadsheet_read_options *opts) + const struct spreadsheet_read_options *opts) { + int x = 0; struct gnumeric_reader *r = NULL; unsigned long int vstart = 0; int ret; @@ -439,6 +560,9 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, r = (struct gnumeric_reader *) (spreadsheet); + if (r->row != -1) + r = gnumeric_reopen (r, NULL, true); + if ( opts->cell_range ) { if ( ! convert_cell_ref (opts->cell_range, @@ -458,11 +582,10 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, r->stop_row = -1; } - r->target_sheet = BAD_CAST opts->sheet_name; r->target_sheet_index = opts->sheet_index; r->row = r->col = -1; - r->sheet_index = 0; + r->current_sheet = -1; /* Advance to the start of the cells for the target sheet */ while ( (r->state != STATE_CELL || r->row < r->start_row ) @@ -479,7 +602,6 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, free (value); } - /* If a range has been given, then use that to calculate the number of cases */ if ( opts->cell_range) @@ -487,7 +609,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, n_cases = MIN (n_cases, r->stop_row - r->start_row + 1); } - if ( gri->read_names ) + if ( opts->read_names ) { r->start_row++; n_cases --; @@ -531,7 +653,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, if ( r->row < r->start_row) { - if ( gri->read_names ) + if ( opts->read_names ) { var_spec [idx].name = xstrdup (text); } @@ -541,8 +663,8 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, var_spec [idx].first_value = xmlStrdup (value); if (-1 == var_spec [idx].width ) - var_spec [idx].width = (gri->asw == -1) ? - ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : gri->asw; + var_spec [idx].width = (opts->asw == -1) ? + ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : opts->asw; } free (value); @@ -602,13 +724,15 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, r->first_case = case_create (r->proto); case_set_missing (r->first_case); - int x = 0; + for ( i = 0 ; i < n_var_specs ; ++i ) { + const struct variable *var; + if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) continue; - const struct variable *var = dict_get_var (r->dict, x++); + var = dict_get_var (r->dict, x++); convert_xml_string_to_value (r->first_case, var, var_spec[i].first_value); @@ -622,16 +746,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet, free (var_spec); - - if (opts->cell_range == NULL) - { - opts->cell_range = c_xasprintf ("%c%d:%c%ld", - r->start_col + 'A', - r->start_row, - r->stop_col + 'A' + caseproto_get_n_widths (r->proto), - r->start_row + n_cases); - } - + return casereader_create_sequential (NULL, r->proto,