X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fgnumeric-reader.c;h=c87a749a1b4d983585a2e2667dacb48a928814a9;hb=1d05dd7fce36ca16a644705b0b05fd144f52463e;hp=cfdd45424d455481ecbd671777b7c4ef28eca8f9;hpb=7f8a1592964567db640915d3019af0e25e30bbb6;p=pspp diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index cfdd45424d..c87a749a1b 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -81,50 +81,103 @@ enum reader_state STATE_SHEET_START, /* Found the start of a sheet */ STATE_SHEET_NAME, /* Found the sheet name */ STATE_MAXROW, + STATE_MAXCOL, STATE_SHEET_FOUND, /* Found the sheet that we actually want */ STATE_CELLS_START, /* Found the start of the cell array */ STATE_CELL /* Found a cell */ }; +struct sheet_detail +{ + xmlChar *name; + + int start_col; + int stop_col; + int start_row; + int stop_row; + + int maxcol; + int maxrow; + + z_off_t offset; +}; + struct gnumeric_reader { struct spreadsheet spreadsheet; xmlTextReaderPtr xtr; + gzFile gz; enum reader_state state; - /* The total number of sheets in the "workbook" */ - int sheet_total ; - int row; int col; int min_col; int node_type; int sheet_index; + int start_col; + int stop_col; + int start_row; + int stop_row; + + struct sheet_detail *sheets; const xmlChar *target_sheet; int target_sheet_index; - int start_row; - int start_col; - int stop_row; - int stop_col; - struct caseproto *proto; struct dictionary *dict; struct ccase *first_case; bool used_first_case; }; + +const char * +gnumeric_get_sheet_name (struct spreadsheet *s, int n) +{ + struct gnumeric_reader *gr = (struct gnumeric_reader *) s; + assert (n < s->sheets); + + return gr->sheets[n].name; +} + + static void process_node (struct gnumeric_reader *r); +char * +gnumeric_get_sheet_range (struct spreadsheet *s, int n) +{ + int ret; + struct gnumeric_reader *gr = (struct gnumeric_reader *) s; + + assert (n < s->sheets); + + while ( + (gr->sheets[n].stop_col == -1) + && + (1 == (ret = xmlTextReaderRead (gr->xtr))) + ) + { + process_node (gr); + } + + + return create_cell_ref ( + gr->sheets[n].start_col, + gr->sheets[n].start_row, + gr->sheets[n].stop_col, + gr->sheets[n].stop_row); +} + + static void gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { + int i; struct gnumeric_reader *r = r_; if ( r == NULL) return ; @@ -137,6 +190,13 @@ gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) caseproto_unref (r->proto); + for (i = 0; i < r->spreadsheet.sheets; ++i) + { + xmlFree (r->sheets[i].name); + } + + free (r->sheets); + free (r); } @@ -153,11 +213,12 @@ process_node (struct gnumeric_reader *r) switch ( r->state) { case STATE_PRE_INIT: + r->sheet_index = -1; if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && XML_READER_TYPE_ELEMENT == r->node_type) { r->state = STATE_SHEET_COUNT; - r->sheet_total = 0; + r->spreadsheet.sheets = 0; } break; @@ -165,13 +226,22 @@ process_node (struct gnumeric_reader *r) if (0 == xmlStrcasecmp (name, _xml("gnm:SheetName")) && XML_READER_TYPE_ELEMENT == r->node_type) { - r->sheet_total++; + struct sheet_detail *sd ; + r->spreadsheet.sheets++; + r->sheets = xrealloc (r->sheets, r->spreadsheet.sheets * sizeof *r->sheets); + sd = &r->sheets[r->spreadsheet.sheets - 1]; + sd->start_col = sd->stop_col = sd->start_row = sd->stop_row = -1; + sd->offset = -1; } else if (0 == xmlStrcasecmp (name, _xml("gnm:SheetNameIndex")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { r->state = STATE_INIT; } + else if (XML_READER_TYPE_TEXT == r->node_type) + { + r->sheets [r->spreadsheet.sheets - 1].name = xmlTextReaderValue (r->xtr); + } break; case STATE_INIT: @@ -180,6 +250,7 @@ process_node (struct gnumeric_reader *r) { ++r->sheet_index; r->state = STATE_SHEET_START; + r->sheets[r->sheet_index].offset = gztell (r->gz); } break; case STATE_SHEET_START: @@ -195,16 +266,25 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_INIT; } + else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_INIT; + } else if (XML_READER_TYPE_TEXT == r->node_type) { - if ( r->target_sheet != NULL) + if ( r->target_sheet != NULL) { xmlChar *value = xmlTextReaderValue (r->xtr); if ( 0 == xmlStrcmp (value, r->target_sheet)) r->state = STATE_SHEET_FOUND; free (value); } - else if (r->target_sheet_index == r->sheet_index) + else if (r->target_sheet_index == r->sheet_index + 1) + { + r->state = STATE_SHEET_FOUND; + } + else if (r->target_sheet_index == -1) { r->state = STATE_SHEET_FOUND; } @@ -223,10 +303,15 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_MAXROW; } + else if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_MAXCOL; + } else if (0 == xmlStrcasecmp (name, _xml("gnm:Sheet")) && XML_READER_TYPE_END_ELEMENT == r->node_type) { - r->state = STATE_INIT; + r->state = STATE_INIT; } break; case STATE_MAXROW: @@ -235,6 +320,26 @@ process_node (struct gnumeric_reader *r) { r->state = STATE_SHEET_FOUND; } + else if (r->node_type == XML_READER_TYPE_TEXT) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + r->sheets[r->sheet_index].maxrow = _xmlchar_to_int (value); + xmlFree (value); + } + break; + case STATE_MAXCOL: + if (0 == xmlStrcasecmp (name, _xml("gnm:MaxCol")) && + XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_SHEET_FOUND; + } + else if (r->node_type == XML_READER_TYPE_TEXT) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + r->sheets[r->sheet_index].maxcol = _xmlchar_to_int (value); + xmlFree (value); + } + break; case STATE_CELLS_START: if (0 == xmlStrcasecmp (name, _xml ("gnm:Cell")) && XML_READER_TYPE_ELEMENT == r->node_type) @@ -252,11 +357,23 @@ process_node (struct gnumeric_reader *r) attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Row")); r->row = _xmlchar_to_int (attr); free (attr); + if (r->sheets[r->sheet_index].start_row == -1) + { + r->sheets[r->sheet_index].start_row = r->row; + } + + if (r->sheets[r->sheet_index].start_col == -1) + { + r->sheets[r->sheet_index].start_col = r->col; + } } else if (0 == xmlStrcasecmp (name, _xml("gnm:Cells")) && XML_READER_TYPE_END_ELEMENT == r->node_type) - r->state = STATE_SHEET_NAME; - + { + r->sheets[r->sheet_index].stop_col = r->col; + r->sheets[r->sheet_index].stop_row = r->row; + r->state = STATE_SHEET_NAME; + } break; case STATE_CELL: if (0 == xmlStrcasecmp (name, _xml("gnm:Cell")) && @@ -305,6 +422,14 @@ struct var_spec xmlChar *first_value; }; + +void +gnumeric_destroy (struct spreadsheet *s) +{ + struct gnumeric_reader *r = (struct gnumeric *) s; + gnm_file_casereader_destroy (NULL, s); +} + struct spreadsheet * gnumeric_probe (const char *filename) { @@ -312,7 +437,8 @@ gnumeric_probe (const char *filename) struct gnumeric_reader *r = NULL; xmlTextReaderPtr xtr; - gzFile gz = gzopen (filename, "r"); + gzFile gz; + gz = gzopen (filename, "r"); if (NULL == gz) return NULL; @@ -322,14 +448,21 @@ gnumeric_probe (const char *filename) NULL, NULL, 0); if (xtr == NULL) - return NULL; + { + gzclose (gz); + return NULL; + } r = xzalloc (sizeof *r); + r->gz = gz; r->xtr = xtr; - r->sheet_total = -1; + r->spreadsheet.sheets = -1; r->state = STATE_PRE_INIT; + r->target_sheet = NULL; + r->target_sheet_index = -1; + /* Advance to the start of the workbook. This gives us some confidence that we are actually dealing with a gnumeric @@ -344,24 +477,24 @@ gnumeric_probe (const char *filename) if (ret != 1) { /* Not a gnumeric spreadsheet */ + xmlFreeTextReader (r->xtr); free (r); - gzclose (gz); return NULL; } r->spreadsheet.type = SPREADSHEET_GNUMERIC; - r->spreadsheet.sheets = r->sheet_total; - r->spreadsheet.make_reader = NULL; - + r->spreadsheet.file_name = filename; return &r->spreadsheet; } + struct casereader * -gnumeric_open_reader (const struct spreadsheet_read_info *gri, - struct spreadsheet_read_options *opts, - struct dictionary **dict) +gnumeric_make_reader (struct spreadsheet *spreadsheet, + const struct spreadsheet_read_info *gri, + struct spreadsheet_read_options *opts) { + struct gnumeric_reader *r = NULL; unsigned long int vstart = 0; int ret; casenumber n_cases = CASENUMBER_MAX; @@ -369,14 +502,6 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, struct var_spec *var_spec = NULL; int n_var_specs = 0; - struct spreadsheet * spreadsheet = NULL; - struct gnumeric_reader *r = NULL; - - spreadsheet = gnumeric_probe (gri->file_name); - - if (spreadsheet == NULL) - goto error; - r = (struct gnumeric_reader *) (spreadsheet); if ( opts->cell_range ) @@ -398,11 +523,10 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, r->stop_row = -1; } - r->target_sheet = BAD_CAST opts->sheet_name; r->target_sheet_index = opts->sheet_index; r->row = r->col = -1; - r->sheet_index = 0; + r->sheet_index = -1; /* Advance to the start of the cells for the target sheet */ while ( (r->state != STATE_CELL || r->row < r->start_row ) @@ -419,7 +543,6 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, free (value); } - /* If a range has been given, then use that to calculate the number of cases */ if ( opts->cell_range) @@ -508,7 +631,7 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, if ( enc == NULL) goto error; /* Create the dictionary and populate it */ - *dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); + spreadsheet->dict = r->dict = dict_create (CHAR_CAST (const char *, enc)); } for (i = 0 ; i < n_var_specs ; ++i ) @@ -534,7 +657,7 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, if ( n_var_specs == 0 ) { msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), - gri->file_name); + spreadsheet->file_name); goto error; } @@ -565,7 +688,7 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, if (opts->cell_range == NULL) { - opts->cell_range = c_xasprintf ("%c%d:%c%d", + opts->cell_range = c_xasprintf ("%c%d:%c%ld", r->start_col + 'A', r->start_row, r->stop_col + 'A' + caseproto_get_n_widths (r->proto), @@ -587,8 +710,8 @@ gnumeric_open_reader (const struct spreadsheet_read_info *gri, } free (var_spec); - dict_destroy (*dict); - *dict = NULL; + dict_destroy (spreadsheet->dict); + spreadsheet->dict = NULL; gnm_file_casereader_destroy (NULL, r);