1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2012, 2013, 2016, 2020, 2021 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ods-reader.h"
20 #include "spreadsheet-reader.h"
25 #include <libxml/xmlreader.h>
28 #include "data/case.h"
29 #include "data/casereader-provider.h"
30 #include "data/data-in.h"
31 #include "data/dictionary.h"
32 #include "data/format.h"
33 #include "data/identifier.h"
34 #include "data/value.h"
35 #include "data/variable.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "libpspp/misc.h"
40 #include "libpspp/str.h"
41 #include "libpspp/zip-reader.h"
42 #include "libpspp/hmap.h"
43 #include "libpspp/hash-functions.h"
46 #include "gl/c-strtod.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
53 /* Setting this to false can help with debugging and development.
54 Don't forget to set it back to true, or users will complain that
55 all but the smallest spreadsheets display VERY slowly. */
56 static const bool use_cache = true;
58 static void ods_file_casereader_destroy (struct casereader *, void *);
59 static struct ccase *ods_file_casereader_read (struct casereader *, void *);
61 static const struct casereader_class ods_file_casereader_class =
63 ods_file_casereader_read,
64 ods_file_casereader_destroy,
71 STATE_INIT = 0, /* Initial state */
72 STATE_SPREADSHEET, /* Found the start of the spreadsheet doc */
73 STATE_TABLE, /* Found the sheet that we actually want */
74 STATE_ROW, /* Found the start of the cell array */
75 STATE_CELL, /* Found a cell */
76 STATE_CELL_CONTENT /* Found a the text within a cell */
82 struct zip_member *zm;
84 enum reader_state state;
88 xmlChar *current_sheet_name;
94 state_data_destroy (struct state_data *sd)
96 xmlFree (sd->current_sheet_name);
97 sd->current_sheet_name = NULL;
99 xmlFreeTextReader (sd->xtr);
102 zip_member_finish (sd->zm);
108 struct spreadsheet spreadsheet;
109 struct zip_reader *zreader;
111 int target_sheet_index;
112 xmlChar *target_sheet_name;
114 int n_allocated_sheets;
116 /* The total number of sheets in the "workbook" */
119 /* State data for the reader */
120 struct state_data rsd;
122 struct string ods_errs;
127 /* A value to be kept in the hash table for cache purposes. */
130 struct hmap_node node;
132 /* The the number of the sheet. */
135 /* The cell's row. */
138 /* The cell's column. */
141 /* The value of the cell. */
146 xml_reader_for_zip_member (void *zm_, char *buffer, int len)
148 struct zip_member *zm = zm_;
149 return zip_member_read (zm, buffer, len);
153 ods_destroy (struct spreadsheet *s)
155 struct ods_reader *r = (struct ods_reader *) s;
159 for (i = 0; i < r->n_allocated_sheets; ++i)
161 xmlFree (r->spreadsheet.sheets[i].name);
164 dict_unref (r->spreadsheet.dict);
166 zip_reader_unref (r->zreader);
167 free (r->spreadsheet.sheets);
170 struct cache_datum *cell;
171 struct cache_datum *next;
172 HMAP_FOR_EACH_SAFE (cell, next, struct cache_datum, node, &r->cache)
178 hmap_destroy (&r->cache);
184 reading_target_sheet (const struct ods_reader *r, const struct state_data *sd)
186 if (r->target_sheet_name != NULL)
188 if (0 == xmlStrcmp (r->target_sheet_name, sd->current_sheet_name))
192 if (r->target_sheet_index == sd->current_sheet + 1)
199 static void process_node (struct ods_reader *or, struct state_data *r);
202 /* Initialise SD using R */
204 state_data_init (const struct ods_reader *r, struct state_data *sd)
206 memset (sd, 0, sizeof (*sd));
208 char *error = zip_member_open (r->zreader, "content.xml", &sd->zm);
216 xmlReaderForIO (xml_reader_for_zip_member, NULL, sd->zm, NULL, NULL,
222 sd->state = STATE_INIT;
228 ods_get_sheet_name (struct spreadsheet *s, int n)
230 struct ods_reader *r = (struct ods_reader *) s;
231 struct state_data sd;
232 state_data_init (r, &sd);
234 while ((r->n_allocated_sheets <= n)
235 || sd.state != STATE_SPREADSHEET)
237 int ret = xmlTextReaderRead (sd.xtr);
241 process_node (r, &sd);
243 state_data_destroy (&sd);
245 return r->spreadsheet.sheets[n].name;
249 ods_get_sheet_range (struct spreadsheet *s, int n)
251 struct ods_reader *r = (struct ods_reader *) s;
252 struct state_data sd;
253 state_data_init (r, &sd);
255 while ((r->n_allocated_sheets <= n)
256 || (r->spreadsheet.sheets[n].last_row == -1)
257 || sd.state != STATE_SPREADSHEET)
259 int ret = xmlTextReaderRead (sd.xtr);
263 process_node (r, &sd);
265 state_data_destroy (&sd);
267 return create_cell_range (
268 r->spreadsheet.sheets[n].first_col,
269 r->spreadsheet.sheets[n].first_row,
270 r->spreadsheet.sheets[n].last_col,
271 r->spreadsheet.sheets[n].last_row);
275 ods_get_sheet_n_rows (struct spreadsheet *s, int n)
277 struct ods_reader *r = (struct ods_reader *) s;
278 struct state_data sd;
280 if (r->n_allocated_sheets > n && r->spreadsheet.sheets[n].last_row != -1)
282 return r->spreadsheet.sheets[n].last_row + 1;
285 state_data_init (r, &sd);
287 while (1 == xmlTextReaderRead (sd.xtr))
289 process_node (r, &sd);
292 state_data_destroy (&sd);
294 return r->spreadsheet.sheets[n].last_row + 1;
298 ods_get_sheet_n_columns (struct spreadsheet *s, int n)
300 struct ods_reader *r = (struct ods_reader *) s;
301 struct state_data sd;
303 if (r->n_allocated_sheets > n && r->spreadsheet.sheets[n].last_col != -1)
304 return r->spreadsheet.sheets[n].last_col + 1;
306 state_data_init (r, &sd);
308 while (1 == xmlTextReaderRead (sd.xtr))
310 process_node (r, &sd);
313 state_data_destroy (&sd);
315 return r->spreadsheet.sheets[n].last_col + 1;
319 ods_get_sheet_cell (struct spreadsheet *s, int n, int row, int column)
321 struct ods_reader *r = (struct ods_reader *) s;
322 struct state_data sd;
324 /* See if this cell is in the cache. If it is, then use it. */
327 struct cache_datum *lookup = NULL;
328 unsigned int hash = hash_int (n, 0);
329 hash = hash_int (row, hash);
330 hash = hash_int (column, hash);
332 HMAP_FOR_EACH_WITH_HASH (lookup, struct cache_datum, node, hash,
335 if (lookup->row == row && lookup->col == column
336 && lookup->sheet == n)
343 return lookup->value ? strdup (lookup->value) : NULL;
347 state_data_init (r, &sd);
349 char *cell_content = NULL;
353 while (1 == xmlTextReaderRead (sd.xtr))
355 process_node (r, &sd);
356 if (sd.row > prev_row)
359 if (sd.state == STATE_CELL_CONTENT
360 && sd.current_sheet == n
361 && sd.node_type == XML_READER_TYPE_TEXT)
363 /* When cell contents are encountered, copy and save it, discarding
364 any older content. */
366 cell_content = CHAR_CAST (char *, xmlTextReaderValue (sd.xtr));
368 if (sd.state == STATE_ROW
369 && sd.current_sheet == n
370 && sd.node_type == XML_READER_TYPE_ELEMENT)
372 /* At the start of a row, free the cell contents and set it to NULL. */
376 if (sd.state == STATE_ROW
377 && sd.current_sheet == n
379 (sd.node_type == XML_READER_TYPE_END_ELEMENT
381 xmlTextReaderIsEmptyElement (sd.xtr)))
385 for (int c = prev_col; c < sd.col; ++c)
387 /* See if this cell has already been cached ... */
388 unsigned int hash = hash_int (sd.current_sheet, 0);
389 hash = hash_int (sd.row - 1, hash);
390 hash = hash_int (c, hash);
391 struct cache_datum *probe = NULL;
392 struct cache_datum *next;
393 HMAP_FOR_EACH_WITH_HASH_SAFE (probe, next, struct cache_datum, node, hash,
396 if (probe->row == sd.row - 1 && probe->col == c
397 && probe->sheet == sd.current_sheet)
401 /* If not, then cache it. */
404 struct cache_datum *cell_data = XMALLOC (struct cache_datum);
405 cell_data->row = sd.row - 1;
407 cell_data->sheet = sd.current_sheet;
408 cell_data->value = cell_content ? strdup (cell_content) : NULL;
410 hmap_insert (&r->cache, &cell_data->node, hash);
415 if (sd.row == row + 1 && sd.col >= column + 1)
425 state_data_destroy (&sd);
430 ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
432 struct ods_reader *r = r_;
436 state_data_destroy (&r->rsd);
438 if (! ds_is_empty (&r->ods_errs))
439 msg (ME, "%s", ds_cstr (&r->ods_errs));
441 ds_destroy (&r->ods_errs);
443 if (r->spreadsheet.first_case && ! r->spreadsheet.used_first_case)
444 case_unref (r->spreadsheet.first_case);
446 caseproto_unref (r->spreadsheet.proto);
447 r->spreadsheet.proto = NULL;
449 xmlFree (r->target_sheet_name);
450 r->target_sheet_name = NULL;
452 spreadsheet_unref (&r->spreadsheet);
456 process_node (struct ods_reader *or, struct state_data *r)
458 xmlChar *name = xmlTextReaderName (r->xtr);
460 name = xmlStrdup (_xml ("--"));
463 r->node_type = xmlTextReaderNodeType (r->xtr);
468 if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
469 XML_READER_TYPE_ELEMENT == r->node_type)
471 r->state = STATE_SPREADSHEET;
472 r->current_sheet = -1;
473 r->current_sheet_name = NULL;
476 case STATE_SPREADSHEET:
477 if (0 == xmlStrcasecmp (name, _xml("table:table"))
479 (XML_READER_TYPE_ELEMENT == r->node_type))
481 xmlFree (r->current_sheet_name);
482 r->current_sheet_name = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name"));
486 if (r->current_sheet >= or->n_allocated_sheets)
488 assert (r->current_sheet == or->n_allocated_sheets);
489 or->spreadsheet.sheets = xrealloc (or->spreadsheet.sheets, sizeof (*or->spreadsheet.sheets) * ++or->n_allocated_sheets);
490 or->spreadsheet.sheets[or->n_allocated_sheets - 1].first_col = -1;
491 or->spreadsheet.sheets[or->n_allocated_sheets - 1].last_col = -1;
492 or->spreadsheet.sheets[or->n_allocated_sheets - 1].first_row = -1;
493 or->spreadsheet.sheets[or->n_allocated_sheets - 1].last_row = -1;
494 or->spreadsheet.sheets[or->n_allocated_sheets - 1].name = CHAR_CAST (char *, xmlStrdup (r->current_sheet_name));
496 if (or->n_allocated_sheets > or->n_sheets)
497 or->n_sheets = or->n_allocated_sheets;
502 r->state = STATE_TABLE;
504 else if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
505 XML_READER_TYPE_ELEMENT == r->node_type)
507 r->state = STATE_INIT;
511 if (0 == xmlStrcasecmp (name, _xml("table:table-row")) &&
512 (XML_READER_TYPE_ELEMENT == r->node_type))
515 xmlTextReaderGetAttribute (r->xtr,
516 _xml ("table:number-rows-repeated"));
518 int row_span = value ? _xmlchar_to_int (value) : 1;
523 if (! xmlTextReaderIsEmptyElement (r->xtr))
524 r->state = STATE_ROW;
528 else if (0 == xmlStrcasecmp (name, _xml("table:table")) &&
529 (XML_READER_TYPE_END_ELEMENT == r->node_type))
531 r->state = STATE_SPREADSHEET;
535 if ((0 == xmlStrcasecmp (name, _xml ("table:table-cell")))
537 (XML_READER_TYPE_ELEMENT == r->node_type))
540 xmlTextReaderGetAttribute (r->xtr,
541 _xml ("table:number-columns-repeated"));
543 r->col_span = value ? _xmlchar_to_int (value) : 1;
544 r->col += r->col_span;
546 if (! xmlTextReaderIsEmptyElement (r->xtr))
547 r->state = STATE_CELL;
551 else if ((0 == xmlStrcasecmp (name, _xml ("table:table-row")))
553 (XML_READER_TYPE_END_ELEMENT == r->node_type))
555 r->state = STATE_TABLE;
559 if ((0 == xmlStrcasecmp (name, _xml("text:p")))
561 (XML_READER_TYPE_ELEMENT == r->node_type))
563 if (! xmlTextReaderIsEmptyElement (r->xtr))
564 r->state = STATE_CELL_CONTENT;
567 ((0 == xmlStrcasecmp (name, _xml("table:table-cell")))
569 (XML_READER_TYPE_END_ELEMENT == r->node_type)
572 r->state = STATE_ROW;
575 case STATE_CELL_CONTENT:
576 assert (r->current_sheet >= 0);
577 assert (r->current_sheet < or->n_allocated_sheets);
579 if (or->spreadsheet.sheets[r->current_sheet].first_row == -1)
580 or->spreadsheet.sheets[r->current_sheet].first_row = r->row - 1;
583 (or->spreadsheet.sheets[r->current_sheet].first_col == -1)
585 (or->spreadsheet.sheets[r->current_sheet].first_col >= r->col - 1)
587 or->spreadsheet.sheets[r->current_sheet].first_col = r->col - 1;
589 if (or->spreadsheet.sheets[r->current_sheet].last_row < r->row - 1)
590 or->spreadsheet.sheets[r->current_sheet].last_row = r->row - 1;
592 if (or->spreadsheet.sheets[r->current_sheet].last_col < r->col - 1)
593 or->spreadsheet.sheets[r->current_sheet].last_col = r->col - 1;
595 if (XML_READER_TYPE_END_ELEMENT == r->node_type)
596 r->state = STATE_CELL;
607 A struct containing the parameters of a cell's value
620 struct xml_value firstval;
624 /* Determine the width that a xmv should probably have */
626 xmv_to_width (const struct xml_value *xmv, int fallback)
628 int width = SPREADSHEET_DEFAULT_WIDTH;
630 /* Non-strings always have zero width */
631 if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string")))
638 width = ROUND_UP (xmlStrlen (xmv->value),
639 SPREADSHEET_DEFAULT_WIDTH);
641 width = ROUND_UP (xmlStrlen (xmv->text),
642 SPREADSHEET_DEFAULT_WIDTH);
648 Sets the VAR of case C, to the value corresponding to the xml data
651 convert_xml_to_value (struct ccase *c, const struct variable *var,
652 const struct xml_value *xmv, int col, int row)
654 union value *v = case_data_rw (c, var);
656 if (xmv->value == NULL && xmv->text == NULL)
657 value_set_missing (v, var_get_width (var));
658 else if (var_is_alpha (var))
659 /* Use the text field, because it seems that there is no
660 value field for strings */
661 value_copy_str_rpad (v, var_get_width (var), xmv->text, ' ');
664 const struct fmt_spec *fmt = var_get_write_format (var);
665 enum fmt_category fc = fmt_get_category (fmt->type);
667 assert (fc != FMT_CAT_STRING);
669 if (0 == xmlStrcmp (xmv->type, _xml("float")))
671 v->f = c_strtod (CHAR_CAST (const char *, xmv->value), NULL);
675 const char *text = xmv->value ?
676 CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text);
678 char *m = data_in (ss_cstr (text), "UTF-8", fmt->type,
679 settings_get_fmt_settings (), v,
680 var_get_width (var), "UTF-8");
684 char buf [FMT_STRING_LEN_MAX + 1];
685 char *cell = create_cell_ref (col, row);
687 msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"),
688 cell, fmt_to_string (fmt, buf), m);
696 /* Try to find out how many sheets there are in the "workbook" */
698 get_sheet_count (struct zip_reader *zreader)
700 xmlTextReaderPtr mxtr;
701 struct zip_member *meta = NULL;
702 char *error = zip_member_open (zreader, "meta.xml", &meta);
709 mxtr = xmlReaderForIO (xml_reader_for_zip_member, NULL, meta, NULL, NULL, 0);
711 while (1 == xmlTextReaderRead (mxtr))
713 xmlChar *name = xmlTextReaderName (mxtr);
714 if (0 == xmlStrcmp (name, _xml("meta:document-statistic")))
716 xmlChar *attr = xmlTextReaderGetAttribute (mxtr, _xml ("meta:table-count"));
720 int s = _xmlchar_to_int (attr);
721 xmlFreeTextReader (mxtr);
722 zip_member_finish (meta);
732 xmlFreeTextReader (mxtr);
733 zip_member_finish (meta);
738 ods_get_sheet_n_sheets (struct spreadsheet *s)
740 struct ods_reader *r = (struct ods_reader *) s;
742 if (r->n_sheets >= 0)
745 r->n_sheets = get_sheet_count (r->zreader);
752 ods_error_handler (void *ctx, const char *mesg,
753 xmlParserSeverities sev UNUSED,
754 xmlTextReaderLocatorPtr loc)
756 struct ods_reader *r = ctx;
758 msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"),
760 r->spreadsheet.file_name,
761 xmlTextReaderLocatorLineNumber (loc),
766 static bool init_reader (struct ods_reader *r, bool report_errors, struct state_data *state);
768 static struct casereader *
769 ods_make_reader (struct spreadsheet *spreadsheet,
770 const struct spreadsheet_read_options *opts)
773 xmlChar *type = NULL;
774 unsigned long int vstart = 0;
775 casenumber n_cases = CASENUMBER_MAX;
777 struct var_spec *var_spec = NULL;
780 struct ods_reader *r = (struct ods_reader *) spreadsheet;
781 xmlChar *val_string = NULL;
784 ds_init_empty (&r->ods_errs);
785 r = (struct ods_reader *) spreadsheet_ref (SPREADSHEET_CAST (r));
787 if (!init_reader (r, true, &r->rsd))
790 r->spreadsheet.used_first_case = false;
791 r->spreadsheet.first_case = NULL;
793 if (opts->cell_range)
795 if (! convert_cell_ref (opts->cell_range,
796 &r->spreadsheet.start_col, &r->spreadsheet.start_row,
797 &r->spreadsheet.stop_col, &r->spreadsheet.stop_row))
799 msg (SE, _("Invalid cell range `%s'"),
806 r->spreadsheet.start_col = 0;
807 r->spreadsheet.start_row = 0;
808 r->spreadsheet.stop_col = -1;
809 r->spreadsheet.stop_row = -1;
812 r->target_sheet_name = xmlStrdup (BAD_CAST opts->sheet_name);
813 r->target_sheet_index = opts->sheet_index;
815 /* Advance to the start of the cells for the target sheet */
816 while (! reading_target_sheet (r, &r->rsd)
817 || r->rsd.state != STATE_ROW || r->rsd.row <= r->spreadsheet.start_row)
819 if (1 != (ret = xmlTextReaderRead (r->rsd.xtr)))
822 process_node (r, &r->rsd);
827 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
828 spreadsheet->file_name);
832 if (opts->read_names)
834 while (1 == xmlTextReaderRead (r->rsd.xtr))
836 process_node (r, &r->rsd);
838 /* If the row is finished then stop for now */
839 if (r->rsd.state == STATE_TABLE && r->rsd.row > r->spreadsheet.start_row)
842 int idx = r->rsd.col - r->spreadsheet.start_col - 1;
847 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
850 if (r->rsd.state == STATE_CELL_CONTENT
852 XML_READER_TYPE_TEXT == r->rsd.node_type)
854 xmlChar *value = xmlTextReaderValue (r->rsd.xtr);
855 if (idx >= n_var_specs)
857 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
859 /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */
860 memset (var_spec + n_var_specs,
862 (idx - n_var_specs + 1) * sizeof (*var_spec));
863 n_var_specs = idx + 1;
865 for (int i = 0; i < r->rsd.col_span; ++i)
867 var_spec[idx - i].firstval.text = 0;
868 var_spec[idx - i].firstval.value = 0;
869 var_spec[idx - i].firstval.type = 0;
870 var_spec[idx - i].name =
871 strdup (CHAR_CAST (const char *, value));
879 /* Read in the first row of data */
880 while (1 == xmlTextReaderRead (r->rsd.xtr))
883 process_node (r, &r->rsd);
885 if (! reading_target_sheet (r, &r->rsd))
888 /* If the row is finished then stop for now */
889 if (r->rsd.state == STATE_TABLE &&
890 r->rsd.row > r->spreadsheet.start_row + (opts->read_names ? 1 : 0))
893 idx = r->rsd.col - r->spreadsheet.start_col - 1;
897 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
900 if (r->rsd.state == STATE_CELL &&
901 XML_READER_TYPE_ELEMENT == r->rsd.node_type)
903 type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
904 val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
907 if (r->rsd.state == STATE_CELL_CONTENT &&
908 XML_READER_TYPE_TEXT == r->rsd.node_type)
910 if (idx >= n_var_specs)
912 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + r->rsd.col_span));
913 memset (var_spec + n_var_specs,
915 (idx + r->rsd.col_span - n_var_specs) * sizeof (*var_spec));
917 var_spec [idx].name = NULL;
918 n_var_specs = idx + 1;
921 for (int x = 0; x < r->rsd.col_span; ++x)
923 var_spec [idx - x].firstval.type = xmlStrdup (type);
924 var_spec [idx - x].firstval.text = xmlTextReaderValue (r->rsd.xtr);
925 var_spec [idx - x].firstval.value = xmlStrdup (val_string);
933 /* Create the dictionary and populate it */
934 r->spreadsheet.dict = dict_create (
935 CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->rsd.xtr)));
937 for (i = 0; i < n_var_specs ; ++i)
940 struct variable *var = NULL;
941 char *name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart);
942 int width = xmv_to_width (&var_spec[i].firstval, opts->asw);
943 dict_create_var (r->spreadsheet.dict, name, width);
946 var = dict_get_var (r->spreadsheet.dict, i);
948 if (0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
955 fmt = fmt_default_for_width (width);
957 var_set_both_formats (var, &fmt);
960 if (n_var_specs == 0)
962 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
963 spreadsheet->file_name);
967 /* Create the first case, and cache it */
968 r->spreadsheet.proto = caseproto_ref (dict_get_proto (r->spreadsheet.dict));
969 r->spreadsheet.first_case = case_create (r->spreadsheet.proto);
970 case_set_missing (r->spreadsheet.first_case);
972 for (i = 0 ; i < n_var_specs; ++i)
974 const struct variable *var = dict_get_var (r->spreadsheet.dict, i);
976 convert_xml_to_value (r->spreadsheet.first_case, var, &var_spec[i].firstval,
977 r->rsd.col - n_var_specs + i,
981 /* Read in the first row of data */
982 while (1 == xmlTextReaderRead (r->rsd.xtr))
984 process_node (r, &r->rsd);
986 if (r->rsd.state == STATE_ROW)
991 for (i = 0 ; i < n_var_specs ; ++i)
993 free (var_spec[i].firstval.type);
994 free (var_spec[i].firstval.value);
995 free (var_spec[i].firstval.text);
996 free (var_spec[i].name);
1002 return casereader_create_sequential
1004 r->spreadsheet.proto,
1006 &ods_file_casereader_class, r);
1010 for (i = 0 ; i < n_var_specs ; ++i)
1012 free (var_spec[i].firstval.type);
1013 free (var_spec[i].firstval.value);
1014 free (var_spec[i].firstval.text);
1015 free (var_spec[i].name);
1020 ods_file_casereader_destroy (NULL, r);
1026 /* Reads and returns one case from READER's file. Returns a null
1027 pointer on failure. */
1028 static struct ccase *
1029 ods_file_casereader_read (struct casereader *reader UNUSED, void *r_)
1031 struct ccase *c = NULL;
1032 struct ods_reader *r = r_;
1034 xmlChar *val_string = NULL;
1035 xmlChar *type = NULL;
1037 if (!r->spreadsheet.used_first_case)
1039 r->spreadsheet.used_first_case = true;
1040 return r->spreadsheet.first_case;
1044 /* Advance to the start of a row. (If there is one) */
1045 while (r->rsd.state != STATE_ROW
1046 && 1 == xmlTextReaderRead (r->rsd.xtr)
1049 process_node (r, &r->rsd);
1053 if (! reading_target_sheet (r, &r->rsd)
1054 || r->rsd.state < STATE_TABLE
1055 || (r->spreadsheet.stop_row != -1 && r->rsd.row > r->spreadsheet.stop_row + 1)
1061 c = case_create (r->spreadsheet.proto);
1062 case_set_missing (c);
1064 while (1 == xmlTextReaderRead (r->rsd.xtr))
1066 process_node (r, &r->rsd);
1068 if (r->spreadsheet.stop_row != -1 && r->rsd.row > r->spreadsheet.stop_row + 1)
1071 if (r->rsd.state == STATE_CELL &&
1072 r->rsd.node_type == XML_READER_TYPE_ELEMENT)
1074 type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
1075 val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
1078 if (r->rsd.state == STATE_CELL_CONTENT &&
1079 r->rsd.node_type == XML_READER_TYPE_TEXT)
1082 struct xml_value *xmv = XZALLOC (struct xml_value);
1083 xmv->text = xmlTextReaderValue (r->rsd.xtr);
1084 xmv->value = val_string;
1089 for (col = 0; col < r->rsd.col_span; ++col)
1091 const struct variable *var;
1092 const int idx = r->rsd.col - col - r->spreadsheet.start_col - 1;
1095 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
1097 if (idx >= dict_get_var_cnt (r->spreadsheet.dict))
1100 var = dict_get_var (r->spreadsheet.dict, idx);
1101 convert_xml_to_value (c, var, xmv, idx + r->spreadsheet.start_col, r->rsd.row - 1);
1104 xmlFree (xmv->text);
1105 xmlFree (xmv->value);
1106 xmlFree (xmv->type);
1109 if (r->rsd.state <= STATE_TABLE)
1114 xmlFree (val_string);
1120 init_reader (struct ods_reader *r, bool report_errors,
1121 struct state_data *state)
1123 struct spreadsheet *s = SPREADSHEET_CAST (r);
1127 struct zip_member *content;
1128 char *error = zip_member_open (r->zreader, "content.xml", &content);
1129 if (content == NULL)
1135 xmlTextReaderPtr xtr = xmlReaderForIO (xml_reader_for_zip_member, NULL, content, NULL, NULL,
1138 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING));
1143 *state = (struct state_data) { .xtr = xtr,
1145 .state = STATE_INIT };
1147 xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r);
1150 strcpy (s->type, "ODS");
1151 s->destroy = ods_destroy;
1152 s->make_reader = ods_make_reader;
1153 s->get_sheet_name = ods_get_sheet_name;
1154 s->get_sheet_range = ods_get_sheet_range;
1155 s->get_sheet_n_sheets = ods_get_sheet_n_sheets;
1156 s->get_sheet_n_rows = ods_get_sheet_n_rows;
1157 s->get_sheet_n_columns = ods_get_sheet_n_columns;
1158 s->get_sheet_cell = ods_get_sheet_cell;
1163 struct spreadsheet *
1164 ods_probe (const char *filename, bool report_errors)
1166 struct ods_reader *r = XZALLOC (struct ods_reader);
1168 struct zip_reader *zr;
1169 char *error = zip_reader_create (filename, &zr);
1174 msg (ME, _("Cannot open %s as a OpenDocument file: %s"),
1183 r->spreadsheet.ref_cnt = 1;
1184 hmap_init (&r->cache);
1186 if (!init_reader (r, report_errors, NULL))
1190 r->n_allocated_sheets = 0;
1191 r->spreadsheet.sheets = NULL;
1193 r->spreadsheet.file_name = strdup (filename);
1194 return &r->spreadsheet;
1197 zip_reader_unref (r->zreader);