From: John Darrington Date: Sun, 17 Feb 2013 17:30:14 +0000 (+0100) Subject: Got the ODS reader model (sort of) working. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5c7ef80c2934c94d9739940d4f62f0f9815b852e;p=pspp Got the ODS reader model (sort of) working. --- diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index aaa2f9e143..514616ace7 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -438,7 +438,8 @@ gnumeric_error_handler (void *ctx, const char *mesg, { struct gnumeric_reader *r = ctx; - msg (MW, _("There was a problem whilst reading the Gnumeric file `%s' (near line %d): `%s'"), + msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"), + "Gnumeric", r->spreadsheet.file_name, xmlTextReaderLocatorLineNumber (loc), mesg); @@ -504,6 +505,7 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro process_node (r); } + if ( ret != 1) { /* Does not seem to be a gnumeric file */ diff --git a/src/data/gnumeric-reader.h b/src/data/gnumeric-reader.h index a544569c68..e0a5f5cad0 100644 --- a/src/data/gnumeric-reader.h +++ b/src/data/gnumeric-reader.h @@ -27,7 +27,6 @@ struct spreadsheet_read_options; struct spreadsheet *gnumeric_probe (const char *filename, bool report_errors); const char * gnumeric_get_sheet_name (struct spreadsheet *s, int n); - char * gnumeric_get_sheet_range (struct spreadsheet *s, int n); void gnumeric_destroy (struct spreadsheet *); diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c index 77da78ca51..7079027f3d 100644 --- a/src/data/ods-reader.c +++ b/src/data/ods-reader.c @@ -18,6 +18,7 @@ #include "libpspp/message.h" #include "libpspp/misc.h" +#include "libpspp/assertion.h" #include "data/data-in.h" @@ -76,6 +77,21 @@ static const struct casereader_class ods_file_casereader_class = NULL, }; +struct sheet_detail +{ + /* The name of the sheet (utf8 encoding) */ + char *name; + + int start_col; + int stop_col; + int start_row; + int stop_row; + + int maxcol; + int maxrow; +}; + + enum reader_state { STATE_INIT = 0, /* Initial state */ @@ -98,6 +114,8 @@ struct ods_reader int col; int node_type; int sheet_index; + int max_col; + int min_col; const xmlChar *target_sheet; int target_sheet_index; @@ -107,6 +125,9 @@ struct ods_reader int stop_row; int stop_col; + struct sheet_detail *sheets; + int n_allocated_sheets; + struct caseproto *proto; struct dictionary *dict; struct ccase *first_case; @@ -114,11 +135,57 @@ struct ods_reader bool read_names; struct string ods_errs; - int span; }; + static void process_node (struct ods_reader *r); + +const char * +ods_get_sheet_name (struct spreadsheet *s, int n) +{ + int ret; + struct ods_reader *or = (struct ods_reader *) s; + + assert (n < s->n_sheets); + + while ( + (or->n_allocated_sheets <= n) + && + (1 == (ret = xmlTextReaderRead (or->xtr))) + ) + { + process_node (or); + } + + return or->sheets[n].name; +} + +char * +ods_get_sheet_range (struct spreadsheet *s, int n) +{ + int ret; + struct ods_reader *or = (struct ods_reader *) s; + + assert (n < s->n_sheets); + + while ( + (or->n_allocated_sheets <= n || or->sheets[n].stop_col == -1) + && + (1 == (ret = xmlTextReaderRead (or->xtr))) + ) + { + process_node (or); + } + + return create_cell_ref ( + or->sheets[n].start_col, + or->sheets[n].start_row, + or->sheets[n].stop_col, + or->sheets[n].stop_row); +} + + static void ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) { @@ -149,111 +216,151 @@ process_node (struct ods_reader *r) if (name == NULL) name = xmlStrdup (_xml ("--")); + r->node_type = xmlTextReaderNodeType (r->xtr); - switch ( r->state) + switch (r->state) { case STATE_INIT: if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) && XML_READER_TYPE_ELEMENT == r->node_type) { + printf ("%s:%d Start of Workbook %d: Rows %d\n", __FILE__, __LINE__, + r->sheet_index, r->row); + r->state = STATE_SPREADSHEET; } break; case STATE_SPREADSHEET: - if (0 == xmlStrcasecmp (name, _xml("table:table"))) + if (0 == xmlStrcasecmp (name, _xml("table:table")) + && + (XML_READER_TYPE_ELEMENT == r->node_type)) { - if (XML_READER_TYPE_ELEMENT == r->node_type) + xmlChar *value = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name")); + r->sheets = xrealloc (r->sheets, sizeof (*r->sheets) * ++r->n_allocated_sheets); + r->sheets[r->n_allocated_sheets - 1].start_col = -1; + r->sheets[r->n_allocated_sheets - 1].stop_col = -1; + r->sheets[r->n_allocated_sheets - 1].start_row = -1; + r->sheets[r->n_allocated_sheets - 1].stop_row = -1; + r->sheets[r->n_allocated_sheets - 1].name = value; + r->col = -1; + r->row = -1; + r->max_col = -1; + r->min_col = INT_MAX; + ++r->sheet_index; + + printf ("%s:%d Start of SHEET %d: Rows %d\n", __FILE__, __LINE__, + r->sheet_index, r->row); + + if ( r->target_sheet != NULL) { - r->col = -1; - r->row = -1; - ++r->sheet_index; - if ( r->target_sheet != NULL) - { - xmlChar *value = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name")); - if ( 0 == xmlStrcmp (value, r->target_sheet)) - { - r->sheet_found = true; - r->state = STATE_TABLE; - } - free (value); - } - else if (r->target_sheet_index == r->sheet_index) + if ( 0 == xmlStrcmp (value, r->target_sheet)) { r->sheet_found = true; - r->state = STATE_TABLE; } - else if ( r->target_sheet_index == -1) - r->state = STATE_TABLE; } + else if (r->target_sheet_index == r->sheet_index) + { + r->sheet_found = true; + } + r->state = STATE_TABLE; } - else if (XML_READER_TYPE_END_ELEMENT == r->node_type - && r->sheet_found) + else if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) && + XML_READER_TYPE_ELEMENT == r->node_type) { r->state = STATE_INIT; + printf ("%s:%d End of Workbook %d: Rows %d Cols %d\n", __FILE__, __LINE__, + r->sheet_index, r->row, r->col); } - break; + break; case STATE_TABLE: - if (0 == xmlStrcasecmp (name, _xml("table:table-row")) ) + if (0 == xmlStrcasecmp (name, _xml("table:table-row")) && + (XML_READER_TYPE_ELEMENT == r->node_type)) { - if ( XML_READER_TYPE_ELEMENT == r->node_type) - { - if (! xmlTextReaderIsEmptyElement (r->xtr)) - { - r->state = STATE_ROW; - } - r->row++; - r->span = 1; - } + xmlChar *value = + xmlTextReaderGetAttribute (r->xtr, + _xml ("table:number-rows-repeated")); + + int row_span = value ? _xmlchar_to_int (value) : 1; + + printf ("%s:%d Start of Row %d Span %d\n", __FILE__, __LINE__, r->row, row_span); + r->row += row_span; + r->col = -1; + + if (! xmlTextReaderIsEmptyElement (r->xtr)) + r->state = STATE_ROW; } - else if (XML_READER_TYPE_END_ELEMENT == r->node_type) + else if (0 == xmlStrcasecmp (name, _xml("table:table")) && + (XML_READER_TYPE_END_ELEMENT == r->node_type)) { + printf ("%s:%d End of SHEET %d %d,%d\n", __FILE__, __LINE__, r->sheet_index, r->row, r->col); + r->sheets[r->sheet_index].stop_row = r->row; + r->sheets[r->sheet_index].stop_col = r->max_col - 1; + r->sheets[r->sheet_index].start_col = r->min_col; r->state = STATE_SPREADSHEET; + } break; case STATE_ROW: - if (0 == xmlStrcasecmp (name, _xml ("table:table-cell"))) + if ( (0 == xmlStrcasecmp (name, _xml ("table:table-cell"))) + && + (XML_READER_TYPE_ELEMENT == r->node_type)) { - if ( XML_READER_TYPE_ELEMENT == r->node_type) - { - xmlChar *value = - xmlTextReaderGetAttribute (r->xtr, - _xml ("table:number-columns-repeated")); - r->col += r->span; - r->span = value ? _xmlchar_to_int (value) : 1; - free (value); - if (! xmlTextReaderIsEmptyElement (r->xtr)) - { - r->state = STATE_CELL; - } - } + xmlChar *value = + xmlTextReaderGetAttribute (r->xtr, + _xml ("table:number-columns-repeated")); + + int col_span = value ? _xmlchar_to_int (value) : 0; + + r->col += col_span; + r->col ++; + + if (r->min_col > r->col) + r->min_col = r->col; + + printf ("%s:%d Start of Cell %d, %d\n", __FILE__, __LINE__, r->row, r->col); + if (! xmlTextReaderIsEmptyElement (r->xtr)) + r->state = STATE_CELL; } - else if (XML_READER_TYPE_END_ELEMENT == r->node_type) + else if ( (0 == xmlStrcasecmp (name, _xml ("table:table-row"))) + && + (XML_READER_TYPE_END_ELEMENT == r->node_type)) { - r->state = STATE_TABLE; - r->col = -1; /* Set the span back to the default */ - r->span = 1; + printf ("%s:%d End of Cell: %d, %d\n", __FILE__, __LINE__, r->row, r->col); + if ( r->max_col < r->col) + r->max_col = r->col; + r->state = STATE_TABLE; } break; case STATE_CELL: - if (0 == xmlStrcasecmp (name, _xml("text:p"))) + if ( (0 == xmlStrcasecmp (name, _xml("text:p"))) + && + ( XML_READER_TYPE_ELEMENT == r->node_type)) { - if ( XML_READER_TYPE_ELEMENT == r->node_type) - { - r->state = STATE_CELL_CONTENT; - } + // printf ("%s:%d Start of Cell Contents %d\n", __FILE__, __LINE__, r->row); + if (! xmlTextReaderIsEmptyElement (r->xtr)) + r->state = STATE_CELL_CONTENT; } - else if (XML_READER_TYPE_END_ELEMENT == r->node_type) + else if + ( (0 == xmlStrcasecmp (name, _xml("table:table-cell"))) + && + (XML_READER_TYPE_END_ELEMENT == r->node_type) + ) { + // printf ("%s:%d End of Cell contents: Rows %d\n", __FILE__, __LINE__, r->row); r->state = STATE_ROW; } break; case STATE_CELL_CONTENT: - if (XML_READER_TYPE_TEXT != r->node_type) - r->state = STATE_CELL; + if (r->sheets[r->sheet_index].start_row == -1) + r->sheets[r->sheet_index].start_row = r->row; + // printf ("%s:%d Cell contents: Rows %d\n", __FILE__, __LINE__, r->row); + /* if (XML_READER_TYPE_TEXT != r->node_type) */ + r->state = STATE_CELL; break; default: + NOT_REACHED (); break; }; @@ -349,7 +456,7 @@ get_sheet_count (struct zip_reader *zreader) mxtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, (xmlInputCloseCallback) zip_member_finish, - meta, NULL, NULL, XML_PARSE_RECOVER); + meta, NULL, NULL, 0); while (1 == xmlTextReaderRead (mxtr)) { @@ -368,7 +475,22 @@ get_sheet_count (struct zip_reader *zreader) return -1; } -struct spreadsheet *ods_probe (const char *filename, bool report_errors) +static void +ods_error_handler (void *ctx, const char *mesg, + UNUSED xmlParserSeverities sev, xmlTextReaderLocatorPtr loc) +{ + struct ods_reader *r = ctx; + + msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"), + "ODF", + r->spreadsheet.file_name, + xmlTextReaderLocatorLineNumber (loc), + mesg); +} + + +struct spreadsheet * +ods_probe (const char *filename, bool report_errors) { struct ods_reader *r; struct string errs; @@ -395,8 +517,9 @@ struct spreadsheet *ods_probe (const char *filename, bool report_errors) sheet_count = get_sheet_count (zreader); xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, - (xmlInputCloseCallback) zip_member_finish, - content, NULL, NULL, XML_PARSE_RECOVER); + (xmlInputCloseCallback) zip_member_finish, + content, NULL, NULL, + report_errors ? 0 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING) ); if ( xtr == NULL) goto error; @@ -405,9 +528,18 @@ struct spreadsheet *ods_probe (const char *filename, bool report_errors) r->xtr = xtr; r->spreadsheet.type = SPREADSHEET_ODS; r->spreadsheet.n_sheets = sheet_count; + r->n_allocated_sheets = 0; + r->sheet_index = -1; + r->sheets = NULL; + + if (report_errors) + xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r); + ds_destroy (&errs); + printf ("%s:%d\n", __FILE__, __LINE__); + r->spreadsheet.file_name = filename; return &r->spreadsheet; @@ -713,6 +845,7 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) xmv->value = val_string; val_string = NULL; + /* for (col = 0; col < r->span ; ++col) { const int idx = r->col + col - r->start_col; @@ -721,6 +854,7 @@ ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) convert_xml_to_value (c, var, xmv); } + */ free (xmv->text); free (xmv->value); free (xmv); diff --git a/src/data/ods-reader.h b/src/data/ods-reader.h index 1a5525d8f6..3d939a8048 100644 --- a/src/data/ods-reader.h +++ b/src/data/ods-reader.h @@ -19,9 +19,12 @@ struct casereader; struct dictionary; -struct spreadsheet_read_info; + struct spreadsheet_read_options; +struct spreadsheet; +const char * ods_get_sheet_name (struct spreadsheet *s, int n); +char * ods_get_sheet_range (struct spreadsheet *s, int n); struct spreadsheet *ods_probe (const char *filename, bool report_errors); diff --git a/src/data/spreadsheet-reader.c b/src/data/spreadsheet-reader.c index 29e84e6340..efaafa30c5 100644 --- a/src/data/spreadsheet-reader.c +++ b/src/data/spreadsheet-reader.c @@ -19,6 +19,7 @@ #include "spreadsheet-reader.h" #include "gnumeric-reader.h" +#include "ods-reader.h" #include #include @@ -32,7 +33,7 @@ spreadsheet_open (const char *filename) { struct spreadsheet *ss = NULL; - ss = gnumeric_probe (filename, true); + ss = ods_probe (filename, true); return ss; } diff --git a/src/ui/gui/psppire-spreadsheet-model.c b/src/ui/gui/psppire-spreadsheet-model.c index f64f35ad09..3a14ce6c46 100644 --- a/src/ui/gui/psppire-spreadsheet-model.c +++ b/src/ui/gui/psppire-spreadsheet-model.c @@ -23,6 +23,7 @@ #include "data/spreadsheet-reader.h" #include "data/gnumeric-reader.h" +#include "data/ods-reader.h" static void psppire_spreadsheet_model_init (PsppireSpreadsheetModel * spreadsheetModel); @@ -245,15 +246,16 @@ tree_model_get_value (GtkTreeModel * model, GtkTreeIter * iter, case PSPPIRE_SPREADSHEET_MODEL_COL_NAME: { const char *x = - gnumeric_get_sheet_name (spreadsheetModel->spreadsheet, + ods_get_sheet_name (spreadsheetModel->spreadsheet, (gint) iter->user_data); + g_value_set_string (value, x); } break; case PSPPIRE_SPREADSHEET_MODEL_COL_RANGE: { char *x = - gnumeric_get_sheet_range (spreadsheetModel->spreadsheet, + ods_get_sheet_range (spreadsheetModel->spreadsheet, (gint) iter->user_data); g_value_set_string (value, x); g_free (x); diff --git a/src/ui/gui/sheet-test.c b/src/ui/gui/sheet-test.c index b45a6c59d0..275dfb0210 100644 --- a/src/ui/gui/sheet-test.c +++ b/src/ui/gui/sheet-test.c @@ -78,11 +78,17 @@ main (int argc, char *argv[] ) if ( argc < 2) g_error ("Usage: prog file\n"); - sp = gnumeric_probe (argv[1], true); + sp = NULL; + + if (sp == NULL) + sp = gnumeric_probe (argv[1], false); + + if (sp == NULL) + sp = ods_probe (argv[1], false); if (sp == NULL) { - g_error ("%s is not a gnumeric file\n", argv[1]); + g_error ("%s is neither a gnumeric nor a ods file\n", argv[1]); return 0; } @@ -140,7 +146,7 @@ main (int argc, char *argv[] ) gtk_main (); - gnumeric_destroy (sp); + // gnumeric_destroy (sp); return 0; }