X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fods-reader.c;h=a36a020a0b063eb4b31ab84c94838ec071240b5c;hb=261eabd9113876a12994f7b419cbf3cf1ee98453;hp=c48fbfc1efbacce3f25b1f55f81ddc6c0fda68a6;hpb=910e343b5b70eaac0ab27ab7f235c899392dcb00;p=pspp diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c index c48fbfc1ef..a36a020a0b 100644 --- a/src/data/ods-reader.c +++ b/src/data/ods-reader.c @@ -22,6 +22,7 @@ #include "data/data-in.h" +#include "gl/c-strtod.h" #include "gl/minmax.h" #include "gettext.h" @@ -66,14 +67,15 @@ ods_open_reader (const struct spreadsheet_read_options *opts, #include "gl/xalloc.h" static void ods_file_casereader_destroy (struct casereader *, void *); - static struct ccase *ods_file_casereader_read (struct casereader *, void *); +static struct casereader *ods_file_casereader_clone (struct casereader *, void *); + static const struct casereader_class ods_file_casereader_class = { ods_file_casereader_read, ods_file_casereader_destroy, - NULL, + ods_file_casereader_clone, NULL, }; @@ -104,27 +106,23 @@ struct ods_reader struct spreadsheet spreadsheet; struct zip_reader *zreader; xmlTextReaderPtr xtr; + int ref_cnt; enum reader_state state; - bool sheet_found; int row; int col; int node_type; int current_sheet; + xmlChar *current_sheet_name; - const xmlChar *target_sheet_name; + xmlChar *target_sheet_name; int target_sheet_index; - int wanted_row_start; - int wanted_col_start; - -#if 0 int start_row; int start_col; int stop_row; int stop_col; -#endif int col_span; @@ -140,6 +138,42 @@ struct ods_reader struct string ods_errs; }; +void +ods_destroy (struct spreadsheet *s) +{ + struct ods_reader *r = s; + if (--r->ref_cnt == 0) + { + int i; + + for (i = 0; i < r->n_allocated_sheets; ++i) + { + xmlFree (r->sheets[i].name); + } + + zip_reader_destroy (r->zreader); + free (r->sheets); + free (r); + } +} + + + +static bool +reading_target_sheet (const struct ods_reader *r) +{ + if (r->target_sheet_name != NULL) + { + if ( 0 == xmlStrcmp (r->target_sheet_name, r->current_sheet_name)) + return true; + } + + if (r->target_sheet_index == r->current_sheet + 1) + return true; + + return false; +} + static void process_node (struct ods_reader *r); @@ -147,17 +181,19 @@ static void process_node (struct ods_reader *r); const char * ods_get_sheet_name (struct spreadsheet *s, int n) { - int ret; struct ods_reader *or = (struct ods_reader *) s; assert (n < s->n_sheets); while ( - (or->n_allocated_sheets <= n) - && - (1 == (ret = xmlTextReaderRead (or->xtr))) + (or->n_allocated_sheets <= n) + || or->state != STATE_SPREADSHEET ) { + int ret = xmlTextReaderRead (or->xtr); + if ( ret != 1) + break; + process_node (or); } @@ -167,23 +203,23 @@ ods_get_sheet_name (struct spreadsheet *s, int n) char * ods_get_sheet_range (struct spreadsheet *s, int n) { - int ret = -1; struct ods_reader *or = (struct ods_reader *) s; assert (n < s->n_sheets); while ( - ( (or->n_allocated_sheets <= n) - || (or->sheets[n].stop_row == -1) ) - && - (1 == (ret = xmlTextReaderRead (or->xtr))) + || (or->sheets[n].stop_row == -1) + || or->state != STATE_SPREADSHEET ) { + int ret = xmlTextReaderRead (or->xtr); + if ( ret != 1) + break; + process_node (or); } - return create_cell_ref ( or->sheets[n].start_col, or->sheets[n].start_row, @@ -192,6 +228,16 @@ ods_get_sheet_range (struct spreadsheet *s, int n) } +static struct casereader * +ods_file_casereader_clone (struct casereader *r_, void *s) +{ + struct ods_reader *r = r_; + + printf ("%s:%d CLONE reffing %p %d\n", __FILE__, __LINE__, s, r->ref_cnt); + + return r_; +} + static void ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { @@ -213,9 +259,16 @@ ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) caseproto_unref (r->proto); - // free (r); + xmlFree (r->current_sheet_name); + xmlFree (r->target_sheet_name); + + ods_destroy (r); } + + + + static void process_node (struct ods_reader *r) { @@ -234,6 +287,7 @@ process_node (struct ods_reader *r) { r->state = STATE_SPREADSHEET; r->current_sheet = -1; + r->current_sheet_name = NULL; } break; case STATE_SPREADSHEET: @@ -241,42 +295,31 @@ process_node (struct ods_reader *r) && (XML_READER_TYPE_ELEMENT == r->node_type)) { - xmlChar *value = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name")); + xmlFree (r->current_sheet_name); + r->current_sheet_name = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name")); ++r->current_sheet; if (r->current_sheet >= r->n_allocated_sheets) { + assert (r->current_sheet == r->n_allocated_sheets); r->sheets = xrealloc (r->sheets, sizeof (*r->sheets) * ++r->n_allocated_sheets); r->sheets[r->n_allocated_sheets - 1].start_col = -1; r->sheets[r->n_allocated_sheets - 1].stop_col = -1; r->sheets[r->n_allocated_sheets - 1].start_row = -1; r->sheets[r->n_allocated_sheets - 1].stop_row = -1; - r->sheets[r->n_allocated_sheets - 1].name = value; + r->sheets[r->n_allocated_sheets - 1].name = CHAR_CAST (char *, xmlStrdup (r->current_sheet_name)); } r->col = 0; r->row = 0; - if ( r->target_sheet_name != NULL) - { - if ( 0 == xmlStrcmp (value, r->target_sheet_name)) - { - r->sheet_found = true; - } - } - else if (r->target_sheet_index == r->current_sheet) - { - r->sheet_found = true; - } r->state = STATE_TABLE; } else if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) && XML_READER_TYPE_ELEMENT == r->node_type) { r->state = STATE_INIT; - printf ("%s:%d End of Workbook %d: Rows %d Cols %d\n", __FILE__, __LINE__, - r->current_sheet, r->row, r->col); } break; case STATE_TABLE: @@ -294,6 +337,8 @@ process_node (struct ods_reader *r) if (! xmlTextReaderIsEmptyElement (r->xtr)) r->state = STATE_ROW; + + xmlFree (value); } else if (0 == xmlStrcasecmp (name, _xml("table:table")) && (XML_READER_TYPE_END_ELEMENT == r->node_type)) @@ -315,6 +360,8 @@ process_node (struct ods_reader *r) if (! xmlTextReaderIsEmptyElement (r->xtr)) r->state = STATE_CELL; + + xmlFree (value); } else if ( (0 == xmlStrcasecmp (name, _xml ("table:table-row"))) && @@ -428,20 +475,27 @@ convert_xml_to_value (struct ccase *c, const struct variable *var, value_copy_str_rpad (v, var_get_width (var), xmv->text, ' '); else { - const char *text ; const struct fmt_spec *fmt = var_get_write_format (var); enum fmt_category fc = fmt_get_category (fmt->type); assert ( fc != FMT_CAT_STRING); - text = - xmv->value ? CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text); + if ( 0 == xmlStrcmp (xmv->type, _xml("float"))) + { + v->f = c_strtod (CHAR_CAST (const char *, xmv->value), NULL); + } + else + { + const char *text = xmv->value ? + CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text); - free (data_in (ss_cstr (text), "UTF-8", - fmt->type, - v, - var_get_width (var), - "UTF-8")); + + free (data_in (ss_cstr (text), "UTF-8", + fmt->type, + v, + var_get_width (var), + "UTF-8")); + } } } @@ -458,7 +512,7 @@ get_sheet_count (struct zip_reader *zreader) return -1; mxtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, - (xmlInputCloseCallback) zip_member_finish, + (xmlInputCloseCallback) NULL, meta, NULL, NULL, 0); while (1 == xmlTextReaderRead (mxtr)) @@ -467,14 +521,21 @@ get_sheet_count (struct zip_reader *zreader) if ( 0 == xmlStrcmp (name, _xml("meta:document-statistic"))) { xmlChar *attr = xmlTextReaderGetAttribute (mxtr, _xml ("meta:table-count")); - + if ( attr != NULL) { int s = _xmlchar_to_int (attr); + xmlFreeTextReader (mxtr); + xmlFree (name); + xmlFree (attr); return s; } + xmlFree (attr); } + xmlFree (name); } + + xmlFreeTextReader (mxtr); return -1; } @@ -501,9 +562,10 @@ init_reader (struct ods_reader *r, bool report_errors) if ( content == NULL) return false; - zip_member_ref (content); - + if (r->xtr) + xmlFreeTextReader (r->xtr); + zip_member_ref (content); xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, (xmlInputCloseCallback) zip_member_finish, content, NULL, NULL, @@ -514,6 +576,10 @@ init_reader (struct ods_reader *r, bool report_errors) r->xtr = xtr; r->spreadsheet.type = SPREADSHEET_ODS; + r->row = 0; + r->col = 0; + r->current_sheet = 0; + r->state = STATE_INIT; if (report_errors) xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r); @@ -531,12 +597,20 @@ ods_probe (const char *filename, bool report_errors) struct zip_reader *zr = zip_reader_create (filename, &errs); if (zr == NULL) - return NULL; + { + if (report_errors) + { + msg (ME, _("Cannot open %s as a OpenDocument file: %s"), + filename, ds_cstr (&errs)); + } + return NULL; + } sheet_count = get_sheet_count (zr); r = xzalloc (sizeof *r); r->zreader = zr; + r->ref_cnt = 1; if (! init_reader (r, report_errors)) { @@ -577,13 +651,12 @@ ods_make_reader (struct spreadsheet *spreadsheet, assert (r); r->read_names = opts->read_names; ds_init_empty (&r->ods_errs); - + ++r->ref_cnt; if ( !init_reader (r, true)) goto error; -#if 0 - if ( opts->cell_range ) + if (opts->cell_range) { if ( ! convert_cell_ref (opts->cell_range, &r->start_col, &r->start_row, @@ -601,28 +674,16 @@ ods_make_reader (struct spreadsheet *spreadsheet, r->stop_col = -1; r->stop_row = -1; } -#endif r->state = STATE_INIT; - r->target_sheet_name = BAD_CAST opts->sheet_name; + r->target_sheet_name = xmlStrdup (BAD_CAST opts->sheet_name); r->target_sheet_index = opts->sheet_index; r->row = r->col = 0; -#if 0 - /* If CELLRANGE was given, then we know how many variables should be read */ - if ( r->stop_col != -1 ) - { - assert (var_spec == NULL); - n_var_specs = r->stop_col - r->start_col + 1; - var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs); - memset (var_spec, '\0', sizeof (*var_spec) * n_var_specs); - } -#endif /* Advance to the start of the cells for the target sheet */ - while ( r->current_sheet < r->target_sheet_index - 1 || - r->state != STATE_TABLE - ) + while ( ! reading_target_sheet (r) + || r->state != STATE_ROW || r->row <= r->start_row ) { if (1 != (ret = xmlTextReaderRead (r->xtr))) break; @@ -630,7 +691,6 @@ ods_make_reader (struct spreadsheet *spreadsheet, process_node (r); } - if (ret < 1) { msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), @@ -647,10 +707,16 @@ ods_make_reader (struct spreadsheet *spreadsheet, process_node (r); /* If the row is finished then stop for now */ - if (r->state == STATE_TABLE && r->row > r->wanted_row_start) + if (r->state == STATE_TABLE && r->row > r->start_row) break; - idx = r->col - r->wanted_col_start - 1; + idx = r->col - r->start_col -1 ; + + if ( idx < 0) + continue; + + if (r->stop_col != -1 && idx > r->stop_col - r->start_col) + continue; if (r->state == STATE_CELL_CONTENT && @@ -665,7 +731,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */ memset (var_spec + n_var_specs, 0, - (n_var_specs - idx + 1) * sizeof (*var_spec)); + (idx - n_var_specs + 1) * sizeof (*var_spec)); n_var_specs = idx + 1; } var_spec[idx].firstval.text = 0; @@ -673,6 +739,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, var_spec[idx].firstval.type = 0; var_spec [idx].name = strdup (CHAR_CAST (const char *, value)); + xmlFree (value); } } @@ -684,11 +751,20 @@ ods_make_reader (struct spreadsheet *spreadsheet, int idx; process_node (r); + if ( ! reading_target_sheet (r) ) + break; + /* If the row is finished then stop for now */ - if (r->state == STATE_TABLE && r->row > r->wanted_row_start + (opts->read_names ? 1 : 0)) + if (r->state == STATE_TABLE && + r->row > r->start_row + (opts->read_names ? 1 : 0)) break; - idx = r->col - r->wanted_col_start - 1; + idx = r->col - r->start_col - 1; + if (idx < 0) + continue; + + if (r->stop_col != -1 && idx > r->stop_col - r->start_col) + continue; if ( r->state == STATE_CELL && XML_READER_TYPE_ELEMENT == r->node_type) @@ -700,13 +776,17 @@ ods_make_reader (struct spreadsheet *spreadsheet, if ( r->state == STATE_CELL_CONTENT && XML_READER_TYPE_TEXT == r->node_type) { - if ( idx >= n_var_specs) + if (idx >= n_var_specs) { var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1)); + memset (var_spec + n_var_specs, + 0, + (idx - n_var_specs + 1) * sizeof (*var_spec)); + var_spec [idx].name = NULL; n_var_specs = idx + 1; } - + var_spec [idx].firstval.type = type; var_spec [idx].firstval.text = xmlTextReaderValue (r->xtr); var_spec [idx].firstval.value = val_string; @@ -716,6 +796,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, } } + /* Create the dictionary and populate it */ r->spreadsheet.dict = r->dict = dict_create ( CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr))); @@ -773,9 +854,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, break; } - // zip_reader_destroy (zreader); -#if 0 for ( i = 0 ; i < n_var_specs ; ++i ) { free (var_spec[i].firstval.type); @@ -785,7 +864,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, } free (var_spec); -#endif + return casereader_create_sequential (NULL, @@ -795,8 +874,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, error: - // zip_reader_destroy (zreader); - +#if 0 for ( i = 0 ; i < n_var_specs ; ++i ) { free (var_spec[i].firstval.type); @@ -809,6 +887,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, dict_destroy (r->spreadsheet.dict); r->spreadsheet.dict = NULL; +#endif ods_file_casereader_destroy (NULL, r); @@ -823,75 +902,84 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) { struct ccase *c = NULL; xmlChar *val_string = NULL; + xmlChar *type = NULL; struct ods_reader *r = r_; - int current_row = r->row; - if ( !r->used_first_case ) + if (!r->used_first_case) { r->used_first_case = true; return r->first_case; } - if ( r->state > STATE_INIT) + + /* Advance to the start of a row. (If there is one) */ + while (r->state != STATE_ROW + && 1 == xmlTextReaderRead (r->xtr) + ) { - c = case_create (r->proto); - case_set_missing (c); + process_node (r); } - if (r->state == STATE_SPREADSHEET - && - r->current_sheet == r->target_sheet_index - 1 - ) + + if ( ! reading_target_sheet (r) + || r->state < STATE_TABLE + || (r->stop_row != -1 && r->row > r->stop_row + 1) + ) { return NULL; } + c = case_create (r->proto); + case_set_missing (c); + while (1 == xmlTextReaderRead (r->xtr)) { process_node (r); - if (r->row > current_row && r->state == STATE_ROW) + if ( r->stop_row != -1 && r->row > r->stop_row + 1) break; - if ( r->state == STATE_CELL && - r->node_type == XML_READER_TYPE_ELEMENT ) + if (r->state == STATE_CELL && + r->node_type == XML_READER_TYPE_ELEMENT) { + type = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value-type")); val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value")); } - if ( r->state == STATE_CELL_CONTENT && - r->node_type == XML_READER_TYPE_TEXT ) + if (r->state == STATE_CELL_CONTENT && + r->node_type == XML_READER_TYPE_TEXT) { int col; struct xml_value *xmv = xzalloc (sizeof *xmv); xmv->text = xmlTextReaderValue (r->xtr); - xmv->value = val_string; + xmv->value = val_string; + xmv->type = type; val_string = NULL; for (col = 0; col < r->col_span; ++col) { - const int idx = r->col + col - r->wanted_col_start - 1; - const struct variable *var = dict_get_var (r->dict, idx); + const struct variable *var; + const int idx = r->col - col - r->start_col - 1; + if (idx < 0) + continue; + if (r->stop_col != -1 && idx > r->stop_col - r->start_col ) + break; + if (idx >= dict_get_var_cnt (r->dict)) + break; + + var = dict_get_var (r->dict, idx); convert_xml_to_value (c, var, xmv); } - free (xmv->text); - free (xmv->value); + xmlFree (xmv->text); + xmlFree (xmv->value); + xmlFree (xmv->type); free (xmv); } - - if ( r->state < STATE_TABLE) + if ( r->state <= STATE_TABLE) break; } - if (NULL == c) - { - case_unref (c); - return NULL; - } - else - { - return c; - } + return c; } #endif