1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2012, 2013, 2016, 2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ods-reader.h"
20 #include "spreadsheet-reader.h"
25 #include <libxml/xmlreader.h>
28 #include "data/case.h"
29 #include "data/casereader-provider.h"
30 #include "data/data-in.h"
31 #include "data/dictionary.h"
32 #include "data/format.h"
33 #include "data/identifier.h"
34 #include "data/value.h"
35 #include "data/variable.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "libpspp/misc.h"
40 #include "libpspp/str.h"
41 #include "libpspp/zip-reader.h"
42 #include "libpspp/hmap.h"
43 #include "libpspp/hash-functions.h"
46 #include "gl/c-strtod.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
53 /* Setting this to false can help with debugging and development.
54 Don't forget to set it back to true, or users will complain that
55 all but the smallest spreadsheets display VERY slowly. */
56 static const bool use_cache = true;
58 static void ods_file_casereader_destroy (struct casereader *, void *);
59 static struct ccase *ods_file_casereader_read (struct casereader *, void *);
61 static const struct casereader_class ods_file_casereader_class =
63 ods_file_casereader_read,
64 ods_file_casereader_destroy,
71 STATE_INIT = 0, /* Initial state */
72 STATE_SPREADSHEET, /* Found the start of the spreadsheet doc */
73 STATE_TABLE, /* Found the sheet that we actually want */
74 STATE_ROW, /* Found the start of the cell array */
75 STATE_CELL, /* Found a cell */
76 STATE_CELL_CONTENT /* Found a the text within a cell */
82 struct zip_member *zm;
84 enum reader_state state;
88 xmlChar *current_sheet_name;
94 state_data_destroy (struct state_data *sd)
96 xmlFree (sd->current_sheet_name);
97 sd->current_sheet_name = NULL;
99 xmlFreeTextReader (sd->xtr);
102 zip_member_finish (sd->zm);
108 struct spreadsheet spreadsheet;
109 struct zip_reader *zreader;
111 int target_sheet_index;
112 xmlChar *target_sheet_name;
114 int n_allocated_sheets;
116 /* The total number of sheets in the "workbook" */
119 /* State data for the reader */
120 struct state_data rsd;
122 struct string ods_errs;
124 struct string zip_errs;
128 /* A value to be kept in the hash table for cache purposes. */
131 struct hmap_node node;
133 /* The the number of the sheet. */
136 /* The cell's row. */
139 /* The cell's column. */
142 /* The value of the cell. */
147 xml_reader_for_zip_member (void *zm_, char *buffer, int len)
149 struct zip_member *zm = zm_;
150 return zip_member_read (zm, buffer, len);
154 ods_destroy (struct spreadsheet *s)
156 struct ods_reader *r = (struct ods_reader *) s;
160 for (i = 0; i < r->n_allocated_sheets; ++i)
162 xmlFree (r->spreadsheet.sheets[i].name);
165 dict_unref (r->spreadsheet.dict);
167 zip_reader_destroy (r->zreader);
168 free (r->spreadsheet.sheets);
171 struct cache_datum *cell;
172 struct cache_datum *next;
173 HMAP_FOR_EACH_SAFE (cell, next, struct cache_datum, node, &r->cache)
179 hmap_destroy (&r->cache);
185 reading_target_sheet (const struct ods_reader *r, const struct state_data *sd)
187 if (r->target_sheet_name != NULL)
189 if (0 == xmlStrcmp (r->target_sheet_name, sd->current_sheet_name))
193 if (r->target_sheet_index == sd->current_sheet + 1)
200 static void process_node (struct ods_reader *or, struct state_data *r);
203 /* Initialise SD using R */
205 state_data_init (const struct ods_reader *r, struct state_data *sd)
207 memset (sd, 0, sizeof (*sd));
209 sd->zm = zip_member_open (r->zreader, "content.xml");
215 xmlReaderForIO (xml_reader_for_zip_member, NULL, sd->zm, NULL, NULL,
221 sd->state = STATE_INIT;
227 ods_get_sheet_name (struct spreadsheet *s, int n)
229 struct ods_reader *r = (struct ods_reader *) s;
230 struct state_data sd;
231 state_data_init (r, &sd);
233 while ((r->n_allocated_sheets <= n)
234 || sd.state != STATE_SPREADSHEET)
236 int ret = xmlTextReaderRead (sd.xtr);
240 process_node (r, &sd);
242 state_data_destroy (&sd);
244 return r->spreadsheet.sheets[n].name;
248 ods_get_sheet_range (struct spreadsheet *s, int n)
250 struct ods_reader *r = (struct ods_reader *) s;
251 struct state_data sd;
252 state_data_init (r, &sd);
254 while ((r->n_allocated_sheets <= n)
255 || (r->spreadsheet.sheets[n].last_row == -1)
256 || sd.state != STATE_SPREADSHEET)
258 int ret = xmlTextReaderRead (sd.xtr);
262 process_node (r, &sd);
264 state_data_destroy (&sd);
266 return create_cell_range (
267 r->spreadsheet.sheets[n].first_col,
268 r->spreadsheet.sheets[n].first_row,
269 r->spreadsheet.sheets[n].last_col,
270 r->spreadsheet.sheets[n].last_row);
274 ods_get_sheet_n_rows (struct spreadsheet *s, int n)
276 struct ods_reader *r = (struct ods_reader *) s;
277 struct state_data sd;
279 if (r->n_allocated_sheets > n && r->spreadsheet.sheets[n].last_row != -1)
281 return r->spreadsheet.sheets[n].last_row + 1;
284 state_data_init (r, &sd);
286 while (1 == xmlTextReaderRead (sd.xtr))
288 process_node (r, &sd);
291 state_data_destroy (&sd);
293 return r->spreadsheet.sheets[n].last_row + 1;
297 ods_get_sheet_n_columns (struct spreadsheet *s, int n)
299 struct ods_reader *r = (struct ods_reader *) s;
300 struct state_data sd;
302 if (r->n_allocated_sheets > n && r->spreadsheet.sheets[n].last_col != -1)
303 return r->spreadsheet.sheets[n].last_col + 1;
305 state_data_init (r, &sd);
307 while (1 == xmlTextReaderRead (sd.xtr))
309 process_node (r, &sd);
312 state_data_destroy (&sd);
314 return r->spreadsheet.sheets[n].last_col + 1;
318 ods_get_sheet_cell (struct spreadsheet *s, int n, int row, int column)
320 struct ods_reader *r = (struct ods_reader *) s;
321 struct state_data sd;
323 /* See if this cell is in the cache. If it is, then use it. */
326 struct cache_datum *lookup = NULL;
327 unsigned int hash = hash_int (n, 0);
328 hash = hash_int (row, hash);
329 hash = hash_int (column, hash);
331 HMAP_FOR_EACH_WITH_HASH (lookup, struct cache_datum, node, hash,
334 if (lookup->row == row && lookup->col == column
335 && lookup->sheet == n)
342 return lookup->value ? strdup (lookup->value) : NULL;
346 state_data_init (r, &sd);
348 char *cell_content = NULL;
352 while (1 == xmlTextReaderRead (sd.xtr))
354 process_node (r, &sd);
355 if (sd.row > prev_row)
358 if (sd.state == STATE_CELL_CONTENT
359 && sd.current_sheet == n
360 && sd.node_type == XML_READER_TYPE_TEXT)
362 /* When cell contents are encountered, copy and save it, discarding
363 any older content. */
365 cell_content = CHAR_CAST (char *, xmlTextReaderValue (sd.xtr));
367 if (sd.state == STATE_ROW
368 && sd.current_sheet == n
369 && sd.node_type == XML_READER_TYPE_ELEMENT)
371 /* At the start of a row, free the cell contents and set it to NULL. */
375 if (sd.state == STATE_ROW
376 && sd.current_sheet == n
378 (sd.node_type == XML_READER_TYPE_END_ELEMENT
380 xmlTextReaderIsEmptyElement (sd.xtr)))
384 for (int c = prev_col; c < sd.col; ++c)
386 /* See if this cell has already been cached ... */
387 unsigned int hash = hash_int (sd.current_sheet, 0);
388 hash = hash_int (sd.row - 1, hash);
389 hash = hash_int (c, hash);
390 struct cache_datum *probe = NULL;
391 struct cache_datum *next;
392 HMAP_FOR_EACH_WITH_HASH_SAFE (probe, next, struct cache_datum, node, hash,
395 if (probe->row == sd.row - 1 && probe->col == c
396 && probe->sheet == sd.current_sheet)
400 /* If not, then cache it. */
403 struct cache_datum *cell_data = XMALLOC (struct cache_datum);
404 cell_data->row = sd.row - 1;
406 cell_data->sheet = sd.current_sheet;
407 cell_data->value = cell_content ? strdup (cell_content) : NULL;
409 hmap_insert (&r->cache, &cell_data->node, hash);
414 if (sd.row == row + 1 && sd.col >= column + 1)
424 state_data_destroy (&sd);
429 ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
431 struct ods_reader *r = r_;
435 state_data_destroy (&r->rsd);
437 if (! ds_is_empty (&r->ods_errs))
438 msg (ME, "%s", ds_cstr (&r->ods_errs));
440 ds_destroy (&r->ods_errs);
442 if (r->spreadsheet.first_case && ! r->spreadsheet.used_first_case)
443 case_unref (r->spreadsheet.first_case);
445 caseproto_unref (r->spreadsheet.proto);
446 r->spreadsheet.proto = NULL;
448 xmlFree (r->target_sheet_name);
449 r->target_sheet_name = NULL;
451 spreadsheet_unref (&r->spreadsheet);
455 process_node (struct ods_reader *or, struct state_data *r)
457 xmlChar *name = xmlTextReaderName (r->xtr);
459 name = xmlStrdup (_xml ("--"));
462 r->node_type = xmlTextReaderNodeType (r->xtr);
467 if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
468 XML_READER_TYPE_ELEMENT == r->node_type)
470 r->state = STATE_SPREADSHEET;
471 r->current_sheet = -1;
472 r->current_sheet_name = NULL;
475 case STATE_SPREADSHEET:
476 if (0 == xmlStrcasecmp (name, _xml("table:table"))
478 (XML_READER_TYPE_ELEMENT == r->node_type))
480 xmlFree (r->current_sheet_name);
481 r->current_sheet_name = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name"));
485 if (r->current_sheet >= or->n_allocated_sheets)
487 assert (r->current_sheet == or->n_allocated_sheets);
488 or->spreadsheet.sheets = xrealloc (or->spreadsheet.sheets, sizeof (*or->spreadsheet.sheets) * ++or->n_allocated_sheets);
489 or->spreadsheet.sheets[or->n_allocated_sheets - 1].first_col = -1;
490 or->spreadsheet.sheets[or->n_allocated_sheets - 1].last_col = -1;
491 or->spreadsheet.sheets[or->n_allocated_sheets - 1].first_row = -1;
492 or->spreadsheet.sheets[or->n_allocated_sheets - 1].last_row = -1;
493 or->spreadsheet.sheets[or->n_allocated_sheets - 1].name = CHAR_CAST (char *, xmlStrdup (r->current_sheet_name));
495 if (or->n_allocated_sheets > or->n_sheets)
496 or->n_sheets = or->n_allocated_sheets;
501 r->state = STATE_TABLE;
503 else if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
504 XML_READER_TYPE_ELEMENT == r->node_type)
506 r->state = STATE_INIT;
510 if (0 == xmlStrcasecmp (name, _xml("table:table-row")) &&
511 (XML_READER_TYPE_ELEMENT == r->node_type))
514 xmlTextReaderGetAttribute (r->xtr,
515 _xml ("table:number-rows-repeated"));
517 int row_span = value ? _xmlchar_to_int (value) : 1;
522 if (! xmlTextReaderIsEmptyElement (r->xtr))
523 r->state = STATE_ROW;
527 else if (0 == xmlStrcasecmp (name, _xml("table:table")) &&
528 (XML_READER_TYPE_END_ELEMENT == r->node_type))
530 r->state = STATE_SPREADSHEET;
534 if ((0 == xmlStrcasecmp (name, _xml ("table:table-cell")))
536 (XML_READER_TYPE_ELEMENT == r->node_type))
539 xmlTextReaderGetAttribute (r->xtr,
540 _xml ("table:number-columns-repeated"));
542 r->col_span = value ? _xmlchar_to_int (value) : 1;
543 r->col += r->col_span;
545 if (! xmlTextReaderIsEmptyElement (r->xtr))
546 r->state = STATE_CELL;
550 else if ((0 == xmlStrcasecmp (name, _xml ("table:table-row")))
552 (XML_READER_TYPE_END_ELEMENT == r->node_type))
554 r->state = STATE_TABLE;
558 if ((0 == xmlStrcasecmp (name, _xml("text:p")))
560 (XML_READER_TYPE_ELEMENT == r->node_type))
562 if (! xmlTextReaderIsEmptyElement (r->xtr))
563 r->state = STATE_CELL_CONTENT;
566 ((0 == xmlStrcasecmp (name, _xml("table:table-cell")))
568 (XML_READER_TYPE_END_ELEMENT == r->node_type)
571 r->state = STATE_ROW;
574 case STATE_CELL_CONTENT:
575 assert (r->current_sheet >= 0);
576 assert (r->current_sheet < or->n_allocated_sheets);
578 if (or->spreadsheet.sheets[r->current_sheet].first_row == -1)
579 or->spreadsheet.sheets[r->current_sheet].first_row = r->row - 1;
582 (or->spreadsheet.sheets[r->current_sheet].first_col == -1)
584 (or->spreadsheet.sheets[r->current_sheet].first_col >= r->col - 1)
586 or->spreadsheet.sheets[r->current_sheet].first_col = r->col - 1;
588 if (or->spreadsheet.sheets[r->current_sheet].last_row < r->row - 1)
589 or->spreadsheet.sheets[r->current_sheet].last_row = r->row - 1;
591 if (or->spreadsheet.sheets[r->current_sheet].last_col < r->col - 1)
592 or->spreadsheet.sheets[r->current_sheet].last_col = r->col - 1;
594 if (XML_READER_TYPE_END_ELEMENT == r->node_type)
595 r->state = STATE_CELL;
606 A struct containing the parameters of a cell's value
619 struct xml_value firstval;
623 /* Determine the width that a xmv should probably have */
625 xmv_to_width (const struct xml_value *xmv, int fallback)
627 int width = SPREADSHEET_DEFAULT_WIDTH;
629 /* Non-strings always have zero width */
630 if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string")))
637 width = ROUND_UP (xmlStrlen (xmv->value),
638 SPREADSHEET_DEFAULT_WIDTH);
640 width = ROUND_UP (xmlStrlen (xmv->text),
641 SPREADSHEET_DEFAULT_WIDTH);
647 Sets the VAR of case C, to the value corresponding to the xml data
650 convert_xml_to_value (struct ccase *c, const struct variable *var,
651 const struct xml_value *xmv, int col, int row)
653 union value *v = case_data_rw (c, var);
655 if (xmv->value == NULL && xmv->text == NULL)
656 value_set_missing (v, var_get_width (var));
657 else if (var_is_alpha (var))
658 /* Use the text field, because it seems that there is no
659 value field for strings */
660 value_copy_str_rpad (v, var_get_width (var), xmv->text, ' ');
663 const struct fmt_spec *fmt = var_get_write_format (var);
664 enum fmt_category fc = fmt_get_category (fmt->type);
666 assert (fc != FMT_CAT_STRING);
668 if (0 == xmlStrcmp (xmv->type, _xml("float")))
670 v->f = c_strtod (CHAR_CAST (const char *, xmv->value), NULL);
674 const char *text = xmv->value ?
675 CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text);
677 char *m = data_in (ss_cstr (text), "UTF-8",
685 char buf [FMT_STRING_LEN_MAX + 1];
686 char *cell = create_cell_ref (col, row);
688 msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"),
689 cell, fmt_to_string (fmt, buf), m);
697 /* Try to find out how many sheets there are in the "workbook" */
699 get_sheet_count (struct zip_reader *zreader)
701 xmlTextReaderPtr mxtr;
702 struct zip_member *meta = NULL;
703 meta = zip_member_open (zreader, "meta.xml");
708 mxtr = xmlReaderForIO (xml_reader_for_zip_member, NULL, meta, NULL, NULL, 0);
710 while (1 == xmlTextReaderRead (mxtr))
712 xmlChar *name = xmlTextReaderName (mxtr);
713 if (0 == xmlStrcmp (name, _xml("meta:document-statistic")))
715 xmlChar *attr = xmlTextReaderGetAttribute (mxtr, _xml ("meta:table-count"));
719 int s = _xmlchar_to_int (attr);
720 xmlFreeTextReader (mxtr);
721 zip_member_finish (meta);
731 xmlFreeTextReader (mxtr);
732 zip_member_finish (meta);
737 ods_get_sheet_n_sheets (struct spreadsheet *s)
739 struct ods_reader *r = (struct ods_reader *) s;
741 if (r->n_sheets >= 0)
744 r->n_sheets = get_sheet_count (r->zreader);
751 ods_error_handler (void *ctx, const char *mesg,
752 xmlParserSeverities sev UNUSED,
753 xmlTextReaderLocatorPtr loc)
755 struct ods_reader *r = ctx;
757 msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"),
759 r->spreadsheet.file_name,
760 xmlTextReaderLocatorLineNumber (loc),
765 static bool init_reader (struct ods_reader *r, bool report_errors, struct state_data *state);
767 static struct casereader *
768 ods_make_reader (struct spreadsheet *spreadsheet,
769 const struct spreadsheet_read_options *opts)
772 xmlChar *type = NULL;
773 unsigned long int vstart = 0;
774 casenumber n_cases = CASENUMBER_MAX;
776 struct var_spec *var_spec = NULL;
779 struct ods_reader *r = (struct ods_reader *) spreadsheet;
780 xmlChar *val_string = NULL;
783 ds_init_empty (&r->ods_errs);
784 r = (struct ods_reader *) spreadsheet_ref (SPREADSHEET_CAST (r));
786 if (!init_reader (r, true, &r->rsd))
789 r->spreadsheet.used_first_case = false;
790 r->spreadsheet.first_case = NULL;
792 if (opts->cell_range)
794 if (! convert_cell_ref (opts->cell_range,
795 &r->spreadsheet.start_col, &r->spreadsheet.start_row,
796 &r->spreadsheet.stop_col, &r->spreadsheet.stop_row))
798 msg (SE, _("Invalid cell range `%s'"),
805 r->spreadsheet.start_col = 0;
806 r->spreadsheet.start_row = 0;
807 r->spreadsheet.stop_col = -1;
808 r->spreadsheet.stop_row = -1;
811 r->target_sheet_name = xmlStrdup (BAD_CAST opts->sheet_name);
812 r->target_sheet_index = opts->sheet_index;
814 /* Advance to the start of the cells for the target sheet */
815 while (! reading_target_sheet (r, &r->rsd)
816 || r->rsd.state != STATE_ROW || r->rsd.row <= r->spreadsheet.start_row)
818 if (1 != (ret = xmlTextReaderRead (r->rsd.xtr)))
821 process_node (r, &r->rsd);
826 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
827 spreadsheet->file_name);
831 if (opts->read_names)
833 while (1 == xmlTextReaderRead (r->rsd.xtr))
835 process_node (r, &r->rsd);
837 /* If the row is finished then stop for now */
838 if (r->rsd.state == STATE_TABLE && r->rsd.row > r->spreadsheet.start_row)
841 int idx = r->rsd.col - r->spreadsheet.start_col - 1;
846 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
849 if (r->rsd.state == STATE_CELL_CONTENT
851 XML_READER_TYPE_TEXT == r->rsd.node_type)
853 xmlChar *value = xmlTextReaderValue (r->rsd.xtr);
854 if (idx >= n_var_specs)
856 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
858 /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */
859 memset (var_spec + n_var_specs,
861 (idx - n_var_specs + 1) * sizeof (*var_spec));
862 n_var_specs = idx + 1;
864 for (int i = 0; i < r->rsd.col_span; ++i)
866 var_spec[idx - i].firstval.text = 0;
867 var_spec[idx - i].firstval.value = 0;
868 var_spec[idx - i].firstval.type = 0;
869 var_spec[idx - i].name =
870 strdup (CHAR_CAST (const char *, value));
878 /* Read in the first row of data */
879 while (1 == xmlTextReaderRead (r->rsd.xtr))
882 process_node (r, &r->rsd);
884 if (! reading_target_sheet (r, &r->rsd))
887 /* If the row is finished then stop for now */
888 if (r->rsd.state == STATE_TABLE &&
889 r->rsd.row > r->spreadsheet.start_row + (opts->read_names ? 1 : 0))
892 idx = r->rsd.col - r->spreadsheet.start_col - 1;
896 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
899 if (r->rsd.state == STATE_CELL &&
900 XML_READER_TYPE_ELEMENT == r->rsd.node_type)
902 type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
903 val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
906 if (r->rsd.state == STATE_CELL_CONTENT &&
907 XML_READER_TYPE_TEXT == r->rsd.node_type)
909 if (idx >= n_var_specs)
911 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
912 memset (var_spec + n_var_specs,
914 (idx - n_var_specs + 1) * sizeof (*var_spec));
916 var_spec [idx].name = NULL;
917 n_var_specs = idx + 1;
920 var_spec [idx].firstval.type = type;
921 var_spec [idx].firstval.text = xmlTextReaderValue (r->rsd.xtr);
922 var_spec [idx].firstval.value = val_string;
930 /* Create the dictionary and populate it */
931 r->spreadsheet.dict = dict_create (
932 CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->rsd.xtr)));
934 for (i = 0; i < n_var_specs ; ++i)
937 struct variable *var = NULL;
938 char *name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart);
939 int width = xmv_to_width (&var_spec[i].firstval, opts->asw);
940 dict_create_var (r->spreadsheet.dict, name, width);
943 var = dict_get_var (r->spreadsheet.dict, i);
945 if (0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
952 fmt = fmt_default_for_width (width);
954 var_set_both_formats (var, &fmt);
957 if (n_var_specs == 0)
959 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
960 spreadsheet->file_name);
964 /* Create the first case, and cache it */
965 r->spreadsheet.proto = caseproto_ref (dict_get_proto (r->spreadsheet.dict));
966 r->spreadsheet.first_case = case_create (r->spreadsheet.proto);
967 case_set_missing (r->spreadsheet.first_case);
969 for (i = 0 ; i < n_var_specs; ++i)
971 const struct variable *var = dict_get_var (r->spreadsheet.dict, i);
973 convert_xml_to_value (r->spreadsheet.first_case, var, &var_spec[i].firstval,
974 r->rsd.col - n_var_specs + i,
978 /* Read in the first row of data */
979 while (1 == xmlTextReaderRead (r->rsd.xtr))
981 process_node (r, &r->rsd);
983 if (r->rsd.state == STATE_ROW)
988 for (i = 0 ; i < n_var_specs ; ++i)
990 free (var_spec[i].firstval.type);
991 free (var_spec[i].firstval.value);
992 free (var_spec[i].firstval.text);
993 free (var_spec[i].name);
999 return casereader_create_sequential
1001 r->spreadsheet.proto,
1003 &ods_file_casereader_class, r);
1007 for (i = 0 ; i < n_var_specs ; ++i)
1009 free (var_spec[i].firstval.type);
1010 free (var_spec[i].firstval.value);
1011 free (var_spec[i].firstval.text);
1012 free (var_spec[i].name);
1017 ods_file_casereader_destroy (NULL, r);
1023 /* Reads and returns one case from READER's file. Returns a null
1024 pointer on failure. */
1025 static struct ccase *
1026 ods_file_casereader_read (struct casereader *reader UNUSED, void *r_)
1028 struct ccase *c = NULL;
1029 struct ods_reader *r = r_;
1031 xmlChar *val_string = NULL;
1032 xmlChar *type = NULL;
1034 if (!r->spreadsheet.used_first_case)
1036 r->spreadsheet.used_first_case = true;
1037 return r->spreadsheet.first_case;
1041 /* Advance to the start of a row. (If there is one) */
1042 while (r->rsd.state != STATE_ROW
1043 && 1 == xmlTextReaderRead (r->rsd.xtr)
1046 process_node (r, &r->rsd);
1050 if (! reading_target_sheet (r, &r->rsd)
1051 || r->rsd.state < STATE_TABLE
1052 || (r->spreadsheet.stop_row != -1 && r->rsd.row > r->spreadsheet.stop_row + 1)
1058 c = case_create (r->spreadsheet.proto);
1059 case_set_missing (c);
1061 while (1 == xmlTextReaderRead (r->rsd.xtr))
1063 process_node (r, &r->rsd);
1065 if (r->spreadsheet.stop_row != -1 && r->rsd.row > r->spreadsheet.stop_row + 1)
1068 if (r->rsd.state == STATE_CELL &&
1069 r->rsd.node_type == XML_READER_TYPE_ELEMENT)
1071 type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
1072 val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
1075 if (r->rsd.state == STATE_CELL_CONTENT &&
1076 r->rsd.node_type == XML_READER_TYPE_TEXT)
1079 struct xml_value *xmv = xzalloc (sizeof *xmv);
1080 xmv->text = xmlTextReaderValue (r->rsd.xtr);
1081 xmv->value = val_string;
1086 for (col = 0; col < r->rsd.col_span; ++col)
1088 const struct variable *var;
1089 const int idx = r->rsd.col - col - r->spreadsheet.start_col - 1;
1092 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
1094 if (idx >= dict_get_var_cnt (r->spreadsheet.dict))
1097 var = dict_get_var (r->spreadsheet.dict, idx);
1098 convert_xml_to_value (c, var, xmv, idx + r->spreadsheet.start_col, r->rsd.row - 1);
1101 xmlFree (xmv->text);
1102 xmlFree (xmv->value);
1103 xmlFree (xmv->type);
1106 if (r->rsd.state <= STATE_TABLE)
1111 xmlFree (val_string);
1117 init_reader (struct ods_reader *r, bool report_errors,
1118 struct state_data *state)
1120 struct spreadsheet *s = SPREADSHEET_CAST (r);
1124 struct zip_member *content = zip_member_open (r->zreader, "content.xml");
1125 if (content == NULL)
1128 xmlTextReaderPtr xtr = xmlReaderForIO (xml_reader_for_zip_member, NULL, content, NULL, NULL,
1131 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING));
1136 *state = (struct state_data) { .xtr = xtr,
1138 .state = STATE_INIT };
1140 xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r);
1143 strcpy (s->type, "ODS");
1144 s->destroy = ods_destroy;
1145 s->make_reader = ods_make_reader;
1146 s->get_sheet_name = ods_get_sheet_name;
1147 s->get_sheet_range = ods_get_sheet_range;
1148 s->get_sheet_n_sheets = ods_get_sheet_n_sheets;
1149 s->get_sheet_n_rows = ods_get_sheet_n_rows;
1150 s->get_sheet_n_columns = ods_get_sheet_n_columns;
1151 s->get_sheet_cell = ods_get_sheet_cell;
1156 struct spreadsheet *
1157 ods_probe (const char *filename, bool report_errors)
1159 struct ods_reader *r = xzalloc (sizeof *r);
1160 struct zip_reader *zr;
1162 ds_init_empty (&r->zip_errs);
1164 zr = zip_reader_create (filename, &r->zip_errs);
1170 msg (ME, _("Cannot open %s as a OpenDocument file: %s"),
1171 filename, ds_cstr (&r->zip_errs));
1173 ds_destroy (&r->zip_errs);
1179 r->spreadsheet.ref_cnt = 1;
1180 hmap_init (&r->cache);
1182 if (!init_reader (r, report_errors, NULL))
1186 r->n_allocated_sheets = 0;
1187 r->spreadsheet.sheets = NULL;
1189 r->spreadsheet.file_name = strdup (filename);
1190 return &r->spreadsheet;
1193 ds_destroy (&r->zip_errs);
1194 zip_reader_destroy (r->zreader);