X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fods-reader.c;h=a36a020a0b063eb4b31ab84c94838ec071240b5c;hb=261eabd9113876a12994f7b419cbf3cf1ee98453;hp=7c12af221562a00749738e05341b1f6dec366eed;hpb=ec31bc43b14f5882d16d5aba50f1e7f22fca089c;p=pspp diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c index 7c12af2215..a36a020a0b 100644 --- a/src/data/ods-reader.c +++ b/src/data/ods-reader.c @@ -22,6 +22,7 @@ #include "data/data-in.h" +#include "gl/c-strtod.h" #include "gl/minmax.h" #include "gettext.h" @@ -66,14 +67,15 @@ ods_open_reader (const struct spreadsheet_read_options *opts, #include "gl/xalloc.h" static void ods_file_casereader_destroy (struct casereader *, void *); - static struct ccase *ods_file_casereader_read (struct casereader *, void *); +static struct casereader *ods_file_casereader_clone (struct casereader *, void *); + static const struct casereader_class ods_file_casereader_class = { ods_file_casereader_read, ods_file_casereader_destroy, - NULL, + ods_file_casereader_clone, NULL, }; @@ -104,6 +106,7 @@ struct ods_reader struct spreadsheet spreadsheet; struct zip_reader *zreader; xmlTextReaderPtr xtr; + int ref_cnt; enum reader_state state; int row; @@ -112,19 +115,14 @@ struct ods_reader int current_sheet; xmlChar *current_sheet_name; - const xmlChar *target_sheet_name; + xmlChar *target_sheet_name; int target_sheet_index; - int wanted_row_start; - int wanted_col_start; - -#if 0 int start_row; int start_col; int stop_row; int stop_col; -#endif int col_span; @@ -140,6 +138,26 @@ struct ods_reader struct string ods_errs; }; +void +ods_destroy (struct spreadsheet *s) +{ + struct ods_reader *r = s; + if (--r->ref_cnt == 0) + { + int i; + + for (i = 0; i < r->n_allocated_sheets; ++i) + { + xmlFree (r->sheets[i].name); + } + + zip_reader_destroy (r->zreader); + free (r->sheets); + free (r); + } +} + + static bool reading_target_sheet (const struct ods_reader *r) @@ -163,17 +181,19 @@ static void process_node (struct ods_reader *r); const char * ods_get_sheet_name (struct spreadsheet *s, int n) { - int ret; struct ods_reader *or = (struct ods_reader *) s; assert (n < s->n_sheets); while ( - (or->n_allocated_sheets <= n) - && - (1 == (ret = xmlTextReaderRead (or->xtr))) + (or->n_allocated_sheets <= n) + || or->state != STATE_SPREADSHEET ) { + int ret = xmlTextReaderRead (or->xtr); + if ( ret != 1) + break; + process_node (or); } @@ -183,23 +203,23 @@ ods_get_sheet_name (struct spreadsheet *s, int n) char * ods_get_sheet_range (struct spreadsheet *s, int n) { - int ret = -1; struct ods_reader *or = (struct ods_reader *) s; assert (n < s->n_sheets); while ( - ( (or->n_allocated_sheets <= n) - || (or->sheets[n].stop_row == -1) ) - && - (1 == (ret = xmlTextReaderRead (or->xtr))) + || (or->sheets[n].stop_row == -1) + || or->state != STATE_SPREADSHEET ) { + int ret = xmlTextReaderRead (or->xtr); + if ( ret != 1) + break; + process_node (or); } - return create_cell_ref ( or->sheets[n].start_col, or->sheets[n].start_row, @@ -208,6 +228,16 @@ ods_get_sheet_range (struct spreadsheet *s, int n) } +static struct casereader * +ods_file_casereader_clone (struct casereader *r_, void *s) +{ + struct ods_reader *r = r_; + + printf ("%s:%d CLONE reffing %p %d\n", __FILE__, __LINE__, s, r->ref_cnt); + + return r_; +} + static void ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { @@ -229,9 +259,16 @@ ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) caseproto_unref (r->proto); - // free (r); + xmlFree (r->current_sheet_name); + xmlFree (r->target_sheet_name); + + ods_destroy (r); } + + + + static void process_node (struct ods_reader *r) { @@ -265,12 +302,13 @@ process_node (struct ods_reader *r) if (r->current_sheet >= r->n_allocated_sheets) { + assert (r->current_sheet == r->n_allocated_sheets); r->sheets = xrealloc (r->sheets, sizeof (*r->sheets) * ++r->n_allocated_sheets); r->sheets[r->n_allocated_sheets - 1].start_col = -1; r->sheets[r->n_allocated_sheets - 1].stop_col = -1; r->sheets[r->n_allocated_sheets - 1].start_row = -1; r->sheets[r->n_allocated_sheets - 1].stop_row = -1; - r->sheets[r->n_allocated_sheets - 1].name = xmlStrdup (r->current_sheet_name); + r->sheets[r->n_allocated_sheets - 1].name = CHAR_CAST (char *, xmlStrdup (r->current_sheet_name)); } r->col = 0; @@ -299,6 +337,8 @@ process_node (struct ods_reader *r) if (! xmlTextReaderIsEmptyElement (r->xtr)) r->state = STATE_ROW; + + xmlFree (value); } else if (0 == xmlStrcasecmp (name, _xml("table:table")) && (XML_READER_TYPE_END_ELEMENT == r->node_type)) @@ -307,7 +347,6 @@ process_node (struct ods_reader *r) } break; case STATE_ROW: - // printf ("%s:%d Name is %s\n", __FILE__, __LINE__, name); if ( (0 == xmlStrcasecmp (name, _xml ("table:table-cell"))) && (XML_READER_TYPE_ELEMENT == r->node_type)) @@ -319,10 +358,10 @@ process_node (struct ods_reader *r) r->col_span = value ? _xmlchar_to_int (value) : 1; r->col += r->col_span; - // printf ("%s:%d %s\n", __FILE__, __LINE__, value); - if (! xmlTextReaderIsEmptyElement (r->xtr)) r->state = STATE_CELL; + + xmlFree (value); } else if ( (0 == xmlStrcasecmp (name, _xml ("table:table-row"))) && @@ -436,20 +475,27 @@ convert_xml_to_value (struct ccase *c, const struct variable *var, value_copy_str_rpad (v, var_get_width (var), xmv->text, ' '); else { - const char *text ; const struct fmt_spec *fmt = var_get_write_format (var); enum fmt_category fc = fmt_get_category (fmt->type); assert ( fc != FMT_CAT_STRING); - text = - xmv->value ? CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text); + if ( 0 == xmlStrcmp (xmv->type, _xml("float"))) + { + v->f = c_strtod (CHAR_CAST (const char *, xmv->value), NULL); + } + else + { + const char *text = xmv->value ? + CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text); + - free (data_in (ss_cstr (text), "UTF-8", - fmt->type, - v, - var_get_width (var), - "UTF-8")); + free (data_in (ss_cstr (text), "UTF-8", + fmt->type, + v, + var_get_width (var), + "UTF-8")); + } } } @@ -466,7 +512,7 @@ get_sheet_count (struct zip_reader *zreader) return -1; mxtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, - (xmlInputCloseCallback) zip_member_finish, + (xmlInputCloseCallback) NULL, meta, NULL, NULL, 0); while (1 == xmlTextReaderRead (mxtr)) @@ -475,14 +521,21 @@ get_sheet_count (struct zip_reader *zreader) if ( 0 == xmlStrcmp (name, _xml("meta:document-statistic"))) { xmlChar *attr = xmlTextReaderGetAttribute (mxtr, _xml ("meta:table-count")); - + if ( attr != NULL) { int s = _xmlchar_to_int (attr); + xmlFreeTextReader (mxtr); + xmlFree (name); + xmlFree (attr); return s; } + xmlFree (attr); } + xmlFree (name); } + + xmlFreeTextReader (mxtr); return -1; } @@ -509,9 +562,10 @@ init_reader (struct ods_reader *r, bool report_errors) if ( content == NULL) return false; - zip_member_ref (content); - + if (r->xtr) + xmlFreeTextReader (r->xtr); + zip_member_ref (content); xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, (xmlInputCloseCallback) zip_member_finish, content, NULL, NULL, @@ -522,6 +576,10 @@ init_reader (struct ods_reader *r, bool report_errors) r->xtr = xtr; r->spreadsheet.type = SPREADSHEET_ODS; + r->row = 0; + r->col = 0; + r->current_sheet = 0; + r->state = STATE_INIT; if (report_errors) xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r); @@ -539,12 +597,20 @@ ods_probe (const char *filename, bool report_errors) struct zip_reader *zr = zip_reader_create (filename, &errs); if (zr == NULL) - return NULL; + { + if (report_errors) + { + msg (ME, _("Cannot open %s as a OpenDocument file: %s"), + filename, ds_cstr (&errs)); + } + return NULL; + } sheet_count = get_sheet_count (zr); r = xzalloc (sizeof *r); r->zreader = zr; + r->ref_cnt = 1; if (! init_reader (r, report_errors)) { @@ -585,13 +651,12 @@ ods_make_reader (struct spreadsheet *spreadsheet, assert (r); r->read_names = opts->read_names; ds_init_empty (&r->ods_errs); - + ++r->ref_cnt; if ( !init_reader (r, true)) goto error; -#if 0 - if ( opts->cell_range ) + if (opts->cell_range) { if ( ! convert_cell_ref (opts->cell_range, &r->start_col, &r->start_row, @@ -609,26 +674,16 @@ ods_make_reader (struct spreadsheet *spreadsheet, r->stop_col = -1; r->stop_row = -1; } -#endif r->state = STATE_INIT; - r->target_sheet_name = BAD_CAST opts->sheet_name; + r->target_sheet_name = xmlStrdup (BAD_CAST opts->sheet_name); r->target_sheet_index = opts->sheet_index; r->row = r->col = 0; -#if 0 - /* If CELLRANGE was given, then we know how many variables should be read */ - if ( r->stop_col != -1 ) - { - assert (var_spec == NULL); - n_var_specs = r->stop_col - r->start_col + 1; - var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs); - memset (var_spec, '\0', sizeof (*var_spec) * n_var_specs); - } -#endif /* Advance to the start of the cells for the target sheet */ - while ( ! reading_target_sheet (r) || r->state != STATE_ROW ) + while ( ! reading_target_sheet (r) + || r->state != STATE_ROW || r->row <= r->start_row ) { if (1 != (ret = xmlTextReaderRead (r->xtr))) break; @@ -636,7 +691,6 @@ ods_make_reader (struct spreadsheet *spreadsheet, process_node (r); } - if (ret < 1) { msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), @@ -653,10 +707,16 @@ ods_make_reader (struct spreadsheet *spreadsheet, process_node (r); /* If the row is finished then stop for now */ - if (r->state == STATE_TABLE && r->row > r->wanted_row_start) + if (r->state == STATE_TABLE && r->row > r->start_row) break; - idx = r->col - r->wanted_col_start - 1; + idx = r->col - r->start_col -1 ; + + if ( idx < 0) + continue; + + if (r->stop_col != -1 && idx > r->stop_col - r->start_col) + continue; if (r->state == STATE_CELL_CONTENT && @@ -671,7 +731,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */ memset (var_spec + n_var_specs, 0, - (n_var_specs - idx + 1) * sizeof (*var_spec)); + (idx - n_var_specs + 1) * sizeof (*var_spec)); n_var_specs = idx + 1; } var_spec[idx].firstval.text = 0; @@ -679,6 +739,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, var_spec[idx].firstval.type = 0; var_spec [idx].name = strdup (CHAR_CAST (const char *, value)); + xmlFree (value); } } @@ -690,11 +751,20 @@ ods_make_reader (struct spreadsheet *spreadsheet, int idx; process_node (r); + if ( ! reading_target_sheet (r) ) + break; + /* If the row is finished then stop for now */ - if (r->state == STATE_TABLE && r->row > r->wanted_row_start + (opts->read_names ? 1 : 0)) + if (r->state == STATE_TABLE && + r->row > r->start_row + (opts->read_names ? 1 : 0)) break; - idx = r->col - r->wanted_col_start - 1; + idx = r->col - r->start_col - 1; + if (idx < 0) + continue; + + if (r->stop_col != -1 && idx > r->stop_col - r->start_col) + continue; if ( r->state == STATE_CELL && XML_READER_TYPE_ELEMENT == r->node_type) @@ -706,13 +776,17 @@ ods_make_reader (struct spreadsheet *spreadsheet, if ( r->state == STATE_CELL_CONTENT && XML_READER_TYPE_TEXT == r->node_type) { - if ( idx >= n_var_specs) + if (idx >= n_var_specs) { var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1)); + memset (var_spec + n_var_specs, + 0, + (idx - n_var_specs + 1) * sizeof (*var_spec)); + var_spec [idx].name = NULL; n_var_specs = idx + 1; } - + var_spec [idx].firstval.type = type; var_spec [idx].firstval.text = xmlTextReaderValue (r->xtr); var_spec [idx].firstval.value = val_string; @@ -780,9 +854,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, break; } - // zip_reader_destroy (zreader); -#if 0 for ( i = 0 ; i < n_var_specs ; ++i ) { free (var_spec[i].firstval.type); @@ -792,7 +864,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, } free (var_spec); -#endif + return casereader_create_sequential (NULL, @@ -802,8 +874,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, error: - // zip_reader_destroy (zreader); - +#if 0 for ( i = 0 ; i < n_var_specs ; ++i ) { free (var_spec[i].firstval.type); @@ -816,6 +887,7 @@ ods_make_reader (struct spreadsheet *spreadsheet, dict_destroy (r->spreadsheet.dict); r->spreadsheet.dict = NULL; +#endif ods_file_casereader_destroy (NULL, r); @@ -830,8 +902,8 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) { struct ccase *c = NULL; xmlChar *val_string = NULL; + xmlChar *type = NULL; struct ods_reader *r = r_; - int current_row = r->row; if (!r->used_first_case) { @@ -841,13 +913,18 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) /* Advance to the start of a row. (If there is one) */ - while (r->state != STATE_ROW && 1 == xmlTextReaderRead (r->xtr)) + while (r->state != STATE_ROW + && 1 == xmlTextReaderRead (r->xtr) + ) { process_node (r); } - if ( ! reading_target_sheet (r) || r->state < STATE_TABLE) + if ( ! reading_target_sheet (r) + || r->state < STATE_TABLE + || (r->stop_row != -1 && r->row > r->stop_row + 1) + ) { return NULL; } @@ -858,14 +935,14 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) while (1 == xmlTextReaderRead (r->xtr)) { process_node (r); -#if 0 - if (r->row > current_row && r->state == STATE_ROW) + + if ( r->stop_row != -1 && r->row > r->stop_row + 1) break; -#endif - // printf ("%s:%d\n", __FILE__, __LINE__); + if (r->state == STATE_CELL && r->node_type == XML_READER_TYPE_ELEMENT) { + type = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value-type")); val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value")); } @@ -875,18 +952,28 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) int col; struct xml_value *xmv = xzalloc (sizeof *xmv); xmv->text = xmlTextReaderValue (r->xtr); - xmv->value = val_string; + xmv->value = val_string; + xmv->type = type; val_string = NULL; for (col = 0; col < r->col_span; ++col) { - const int idx = r->col + col - r->wanted_col_start - 1; - const struct variable *var = dict_get_var (r->dict, idx); + const struct variable *var; + const int idx = r->col - col - r->start_col - 1; + if (idx < 0) + continue; + if (r->stop_col != -1 && idx > r->stop_col - r->start_col ) + break; + if (idx >= dict_get_var_cnt (r->dict)) + break; + + var = dict_get_var (r->dict, idx); convert_xml_to_value (c, var, xmv); } - free (xmv->text); - free (xmv->value); + xmlFree (xmv->text); + xmlFree (xmv->value); + xmlFree (xmv->type); free (xmv); } if ( r->state <= STATE_TABLE)