X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fgnumeric-reader.c;h=69dd23689fc6b979d54540c23ae65511b46a885f;hb=8f5194875a0a3d41fef91825fd8378bb004d6f51;hp=b9def31cb6d7ee9c9b728cb1a76125f08716ec18;hpb=f93823750113914727d641dd7e526349fe98cba0;p=pspp diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index b9def31cb6..69dd23689f 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,6 +20,7 @@ #include "libpspp/misc.h" #include "gl/minmax.h" +#include "gl/c-strtod.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -27,10 +28,12 @@ #include "spreadsheet-reader.h" +#include "c-xvasprintf.h" + #if !GNM_SUPPORT struct casereader * -gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) +gnumeric_open_reader (struct spreadsheet_read_info *gri, struct spreadsheet_read_options *opts, struct dictionary **dict) { msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric"); @@ -72,47 +75,108 @@ static const struct casereader_class gnm_file_casereader_class = enum reader_state { - STATE_INIT = 0, /* Initial state */ + STATE_PRE_INIT = 0, /* Initial state */ + STATE_SHEET_COUNT, /* Found the sheet index */ + STATE_INIT , /* Other Initial state */ STATE_SHEET_START, /* Found the start of a sheet */ STATE_SHEET_NAME, /* Found the sheet name */ STATE_MAXROW, + STATE_MAXCOL, STATE_SHEET_FOUND, /* Found the sheet that we actually want */ STATE_CELLS_START, /* Found the start of the cell array */ STATE_CELL /* Found a cell */ }; +struct sheet_detail +{ + xmlChar *name; + + int start_col; + int stop_col; + int start_row; + int stop_row; + + int maxcol; + int maxrow; +}; + struct gnumeric_reader { + struct spreadsheet spreadsheet; + + /* The libxml reader for this instance */ xmlTextReaderPtr xtr; + /* An internal state variable */ enum reader_state state; + int row; int col; + int min_col; int node_type; int sheet_index; + int start_col; + int stop_col; + int start_row; + int stop_row; + + struct sheet_detail *sheets; const xmlChar *target_sheet; int target_sheet_index; - int start_row; - int start_col; - int stop_row; - int stop_col; - struct caseproto *proto; struct dictionary *dict; struct ccase *first_case; bool used_first_case; }; + +const char * +gnumeric_get_sheet_name (struct spreadsheet *s, int n) +{ + struct gnumeric_reader *gr = (struct gnumeric_reader *) s; + assert (n < s->sheets); + + return gr->sheets[n].name; +} + + static void process_node (struct gnumeric_reader *r); +char * +gnumeric_get_sheet_range (struct spreadsheet *s, int n) +{ + int ret; + struct gnumeric_reader *gr = (struct gnumeric_reader *) s; + + assert (n < s->sheets); + + while ( + (gr->sheets[n].stop_col == -1) + && + (1 == (ret = xmlTextReaderRead (gr->xtr))) + ) + { + process_node (gr); + } + + + return create_cell_ref ( + gr->sheets[n].start_col, + gr->sheets[n].start_row, + gr->sheets[n].stop_col, + gr->sheets[n].stop_row); +} + + static void gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { + int i; struct gnumeric_reader *r = r_; if ( r == NULL) return ; @@ -125,6 +189,13 @@ gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) caseproto_unref (r->proto); + for (i = 0; i < r->spreadsheet.sheets; ++i) + { + xmlFree (r->sheets[i].name); + } + + free (r->sheets); + free (r); } @@ -140,10 +211,42 @@ process_node (struct gnumeric_reader *r) switch ( r->state) { + case STATE_PRE_INIT: + r->sheet_index = -1; + if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_COUNT; + r->spreadsheet.sheets = 0; + } + break; + + case STATE_SHEET_COUNT: + if (0 == xmlStrcasecmp (name, _xml("gnm:SheetName")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + struct sheet_detail *sd ; + r->spreadsheet.sheets++; + r->sheets = xrealloc (r->sheets, r->spreadsheet.sheets * sizeof *r->sheets); + sd = &r->sheets[r->spreadsheet.sheets - 1]; + sd->start_col = sd->stop_col = sd->start_row = sd->stop_row = -1; + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_INIT; + } + else if (XML_READER_TYPE_TEXT == r->node_type) + { + r->sheets [r->spreadsheet.sheets - 1].name = xmlTextReaderValue (r->xtr); + } + break; + case STATE_INIT: if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && XML_READER_TYPE_ELEMENT == r->node_type) { + ++r->sheet_index; r->state = STATE_SHEET_START; } break; @@ -153,29 +256,32 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_SHEET_NAME; } - else if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && - XML_READER_TYPE_END_ELEMENT == r->node_type) - { - r->state = STATE_INIT; - } break; case STATE_SHEET_NAME: if (0 == xmlStrcasecmp (name, _xml("gnm:Name")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { - r->state = STATE_SHEET_START; + r->state = STATE_INIT; + } + else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_INIT; } else if (XML_READER_TYPE_TEXT == r->node_type) { - ++r->sheet_index; - if ( r->target_sheet != NULL) + if ( r->target_sheet != NULL) { xmlChar *value = xmlTextReaderValue (r->xtr); if ( 0 == xmlStrcmp (value, r->target_sheet)) r->state = STATE_SHEET_FOUND; free (value); } - else if (r->target_sheet_index == r->sheet_index) + else if (r->target_sheet_index == r->sheet_index + 1) + { + r->state = STATE_SHEET_FOUND; + } + else if (r->target_sheet_index == -1) { r->state = STATE_SHEET_FOUND; } @@ -185,6 +291,7 @@ process_node (struct gnumeric_reader *r) if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && XML_READER_TYPE_ELEMENT == r->node_type) { + r->min_col = INT_MAX; if (! xmlTextReaderIsEmptyElement (r->xtr)) r->state = STATE_CELLS_START; } @@ -193,10 +300,15 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_MAXROW; } + else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_MAXCOL; + } else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { - r->state = STATE_INIT; + r->state = STATE_INIT; } break; case STATE_MAXROW: @@ -205,6 +317,26 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_SHEET_FOUND; } + else if (r->node_type == XML_READER_TYPE_TEXT) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + r->sheets[r->sheet_index].maxrow = _xmlchar_to_int (value); + xmlFree (value); + } + break; + case STATE_MAXCOL: + if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_FOUND; + } + else if (r->node_type == XML_READER_TYPE_TEXT) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + r->sheets[r->sheet_index].maxcol = _xmlchar_to_int (value); + xmlFree (value); + } + break; case STATE_CELLS_START: if (0 == xmlStrcasecmp (name, _xml ("gnm:Cell")) && XML_READER_TYPE_ELEMENT == r->node_type) @@ -216,19 +348,36 @@ process_node (struct gnumeric_reader *r) r->col = _xmlchar_to_int (attr); free (attr); + if (r->col < r->min_col) + r->min_col = r->col; + attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Row")); r->row = _xmlchar_to_int (attr); free (attr); + if (r->sheets[r->sheet_index].start_row == -1) + { + r->sheets[r->sheet_index].start_row = r->row; + } + + if (r->sheets[r->sheet_index].start_col == -1) + { + r->sheets[r->sheet_index].start_col = r->col; + } } else if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && XML_READER_TYPE_END_ELEMENT == r->node_type) - r->state = STATE_SHEET_NAME; - + { + r->sheets[r->sheet_index].stop_col = r->col; + r->sheets[r->sheet_index].stop_row = r->row; + r->state = STATE_SHEET_NAME; + } break; case STATE_CELL: if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && XML_READER_TYPE_END_ELEMENT == r->node_type) - r->state = STATE_CELLS_START; + { + r->state = STATE_CELLS_START; + } break; default: break; @@ -257,7 +406,7 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var, char *endptr; errno = 0; - v->f = strtod (text, &endptr); + v->f = c_strtod (text, &endptr); if ( errno != 0 || endptr == text) v->f = SYSMIS; } @@ -270,61 +419,126 @@ struct var_spec xmlChar *first_value; }; -struct casereader * -gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) + +void +gnumeric_destroy (struct spreadsheet *s) { - unsigned long int vstart = 0; + struct gnumeric_reader *r = (struct gnumeric *) s; + gnm_file_casereader_destroy (NULL, s); +} + + +static struct gnumeric_reader * +gnumeric_reopen (struct gnumeric_reader *r, const char *filename) +{ int ret; - casenumber n_cases = CASENUMBER_MAX; - int i; - struct var_spec *var_spec = NULL; - int n_var_specs = 0; - struct gnumeric_reader *r = NULL; + xmlTextReaderPtr xtr; + gzFile gz; - gzFile gz = gzopen (gri->file_name, "r"); + assert (r == NULL || filename == NULL); - if ( NULL == gz) - { - msg (ME, _("Error opening `%s' for reading as a Gnumeric file: %s."), - gri->file_name, strerror (errno)); + if (r && r->xtr) + xmlFreeTextReader (r->xtr); - goto error; - } + if (filename) + gz = gzopen (filename, "r"); + else + gz = gzopen ( r->spreadsheet.file_name, "r"); - r = xzalloc (sizeof *r); + if (NULL == gz) + return NULL; - r->xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, + xtr = xmlReaderForIO ((xmlInputReadCallback) gzread, (xmlInputCloseCallback) gzclose, gz, NULL, NULL, 0); - if ( r->xtr == NULL) - goto error; + if (xtr == NULL) + { + gzclose (gz); + return NULL; + } + + if (r == NULL) + { + r = xzalloc (sizeof *r); + r->spreadsheet.sheets = -1; + r->spreadsheet.file_name = filename; + } + + r->target_sheet = NULL; + r->target_sheet_index = -1; - if ( gri->cell_range ) + r->row = r->col = -1; + r->state = STATE_PRE_INIT; + r->xtr = xtr; + + /* Advance to the start of the workbook. + This gives us some confidence that we are actually dealing with a gnumeric + spreadsheet. + */ + while ( (r->state != STATE_INIT ) + && 1 == (ret = xmlTextReaderRead (r->xtr))) { - if ( ! convert_cell_ref (gri->cell_range, + process_node (r); + } + + r->spreadsheet.type = SPREADSHEET_GNUMERIC; + + return r; +} + + +struct spreadsheet * +gnumeric_probe (const char *filename) +{ + struct gnumeric_reader *r = gnumeric_reopen (NULL, filename); + + return &r->spreadsheet; +} + + +struct casereader * +gnumeric_make_reader (struct spreadsheet *spreadsheet, + const struct spreadsheet_read_info *gri, + struct spreadsheet_read_options *opts) +{ + struct gnumeric_reader *r = NULL; + unsigned long int vstart = 0; + int ret; + casenumber n_cases = CASENUMBER_MAX; + int i; + struct var_spec *var_spec = NULL; + int n_var_specs = 0; + + r = (struct gnumeric_reader *) (spreadsheet); + + if (r->row != -1) + r = gnumeric_reopen (r, NULL); + + if ( opts->cell_range ) + { + if ( ! convert_cell_ref (opts->cell_range, &r->start_col, &r->start_row, &r->stop_col, &r->stop_row)) { msg (SE, _("Invalid cell range `%s'"), - gri->cell_range); + opts->cell_range); goto error; } } else { - r->start_col = 0; + r->start_col = -1; r->start_row = 0; r->stop_col = -1; r->stop_row = -1; } - r->state = STATE_INIT; - r->target_sheet = BAD_CAST gri->sheet_name; - r->target_sheet_index = gri->sheet_index; + r->target_sheet = BAD_CAST opts->sheet_name; + r->target_sheet_index = opts->sheet_index; r->row = r->col = -1; - r->sheet_index = 0; + r->sheet_index = -1; /* Advance to the start of the cells for the target sheet */ while ( (r->state != STATE_CELL || r->row < r->start_row ) @@ -341,10 +555,9 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic free (value); } - /* If a range has been given, then use that to calculate the number of cases */ - if ( gri->cell_range) + if ( opts->cell_range) { n_cases = MIN (n_cases, r->stop_row - r->start_row + 1); } @@ -375,11 +588,15 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic if ( idx >= n_var_specs ) { + int i; + var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1)); + for (i = n_var_specs; i <= idx; ++i) + { + var_spec [i].name = NULL; + var_spec [i].width = -1; + var_spec [i].first_value = NULL; + } n_var_specs = idx + 1 ; - var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs); - var_spec [idx].name = NULL; - var_spec [idx].width = -1; - var_spec [idx].first_value = NULL; } if ( r->node_type == XML_READER_TYPE_TEXT ) @@ -421,15 +638,21 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } } - - /* Create the dictionary and populate it */ - *dict = r->dict = dict_create ( - CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr))); + { + const xmlChar *enc = xmlTextReaderConstEncoding (r->xtr); + if ( enc == NULL) + goto error; + /* Create the dictionary and populate it */ + spreadsheet->dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); + } for (i = 0 ; i < n_var_specs ; ++i ) { char *name; + if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) + continue; + /* Probably no data exists for this variable, so allocate a default width */ if ( var_spec[i].width == -1 ) @@ -446,7 +669,7 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic if ( n_var_specs == 0 ) { msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), - gri->file_name); + spreadsheet->file_name); goto error; } @@ -454,9 +677,13 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic r->first_case = case_create (r->proto); case_set_missing (r->first_case); + int x = 0; for ( i = 0 ; i < n_var_specs ; ++i ) { - const struct variable *var = dict_get_var (r->dict, i); + if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL)) + continue; + + const struct variable *var = dict_get_var (r->dict, x++); convert_xml_string_to_value (r->first_case, var, var_spec[i].first_value); @@ -469,7 +696,17 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } free (var_spec); - + + + if (opts->cell_range == NULL) + { + opts->cell_range = c_xasprintf ("%c%d:%c%ld", + r->start_col + 'A', + r->start_row, + r->stop_col + 'A' + caseproto_get_n_widths (r->proto), + r->start_row + n_cases); + } + return casereader_create_sequential (NULL, r->proto, @@ -485,7 +722,8 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic } free (var_spec); - dict_destroy (*dict); + dict_destroy (spreadsheet->dict); + spreadsheet->dict = NULL; gnm_file_casereader_destroy (NULL, r); @@ -513,6 +751,9 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_) c = case_create (r->proto); case_set_missing (c); + if (r->start_col == -1) + r->start_col = r->min_col; + while ((r->state == STATE_CELL || r->state == STATE_CELLS_START ) && r->row == current_row && (ret = xmlTextReaderRead (r->xtr))) {