1 /* PSPP - a program for statistical analysis.
2 Copyright (C) 2011, 2012, 2013, 2016, 2020 Free Software Foundation, Inc.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation, either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 #include "ods-reader.h"
20 #include "spreadsheet-reader.h"
25 #include <libxml/xmlreader.h>
28 #include "data/case.h"
29 #include "data/casereader-provider.h"
30 #include "data/data-in.h"
31 #include "data/dictionary.h"
32 #include "data/format.h"
33 #include "data/identifier.h"
34 #include "data/value.h"
35 #include "data/variable.h"
36 #include "libpspp/assertion.h"
37 #include "libpspp/i18n.h"
38 #include "libpspp/message.h"
39 #include "libpspp/misc.h"
40 #include "libpspp/str.h"
41 #include "libpspp/zip-reader.h"
42 #include "libpspp/hmap.h"
43 #include "libpspp/hash-functions.h"
46 #include "gl/c-strtod.h"
47 #include "gl/minmax.h"
48 #include "gl/xalloc.h"
51 #define _(msgid) gettext (msgid)
53 /* Setting this to false can help with debugging and development.
54 Don't forget to set it back to true, or users will complain that
55 all but the smallest spreadsheets display VERY slowly. */
56 static const bool use_cache = true;
58 static void ods_file_casereader_destroy (struct casereader *, void *);
59 static struct ccase *ods_file_casereader_read (struct casereader *, void *);
61 static const struct casereader_class ods_file_casereader_class =
63 ods_file_casereader_read,
64 ods_file_casereader_destroy,
71 STATE_INIT = 0, /* Initial state */
72 STATE_SPREADSHEET, /* Found the start of the spreadsheet doc */
73 STATE_TABLE, /* Found the sheet that we actually want */
74 STATE_ROW, /* Found the start of the cell array */
75 STATE_CELL, /* Found a cell */
76 STATE_CELL_CONTENT /* Found a the text within a cell */
82 struct zip_member *zm;
84 enum reader_state state;
88 xmlChar *current_sheet_name;
94 state_data_destroy (struct state_data *sd)
96 xmlFree (sd->current_sheet_name);
97 sd->current_sheet_name = NULL;
99 xmlFreeTextReader (sd->xtr);
102 zip_member_finish (sd->zm);
108 struct spreadsheet spreadsheet;
109 struct zip_reader *zreader;
111 int target_sheet_index;
112 xmlChar *target_sheet_name;
114 int n_allocated_sheets;
116 /* The total number of sheets in the "workbook" */
119 /* State data for the reader */
120 struct state_data rsd;
122 struct string ods_errs;
124 struct string zip_errs;
128 /* A value to be kept in the hash table for cache purposes. */
131 struct hmap_node node;
133 /* The the number of the sheet. */
136 /* The cell's row. */
139 /* The cell's column. */
142 /* The value of the cell. */
147 xml_reader_for_zip_member (void *zm_, char *buffer, int len)
149 struct zip_member *zm = zm_;
150 return zip_member_read (zm, buffer, len);
154 ods_destroy (struct spreadsheet *s)
156 struct ods_reader *r = (struct ods_reader *) s;
160 for (i = 0; i < r->n_allocated_sheets; ++i)
162 xmlFree (r->spreadsheet.sheets[i].name);
165 dict_unref (r->spreadsheet.dict);
167 zip_reader_destroy (r->zreader);
168 free (r->spreadsheet.sheets);
171 struct cache_datum *cell;
172 struct cache_datum *next;
173 HMAP_FOR_EACH_SAFE (cell, next, struct cache_datum, node, &r->cache)
179 hmap_destroy (&r->cache);
185 reading_target_sheet (const struct ods_reader *r, const struct state_data *sd)
187 if (r->target_sheet_name != NULL)
189 if (0 == xmlStrcmp (r->target_sheet_name, sd->current_sheet_name))
193 if (r->target_sheet_index == sd->current_sheet + 1)
200 static void process_node (struct ods_reader *or, struct state_data *r);
203 /* Initialise SD using R */
205 state_data_init (const struct ods_reader *r, struct state_data *sd)
207 memset (sd, 0, sizeof (*sd));
209 sd->zm = zip_member_open (r->zreader, "content.xml");
215 xmlReaderForIO (xml_reader_for_zip_member, NULL, sd->zm, NULL, NULL,
221 sd->state = STATE_INIT;
227 ods_get_sheet_name (struct spreadsheet *s, int n)
229 struct ods_reader *r = (struct ods_reader *) s;
230 struct state_data sd;
231 state_data_init (r, &sd);
233 while ((r->n_allocated_sheets <= n)
234 || sd.state != STATE_SPREADSHEET)
236 int ret = xmlTextReaderRead (sd.xtr);
240 process_node (r, &sd);
242 state_data_destroy (&sd);
244 return r->spreadsheet.sheets[n].name;
248 ods_get_sheet_range (struct spreadsheet *s, int n)
250 struct ods_reader *r = (struct ods_reader *) s;
251 struct state_data sd;
252 state_data_init (r, &sd);
254 while ((r->n_allocated_sheets <= n)
255 || (r->spreadsheet.sheets[n].last_row == -1)
256 || sd.state != STATE_SPREADSHEET)
258 int ret = xmlTextReaderRead (sd.xtr);
262 process_node (r, &sd);
264 state_data_destroy (&sd);
266 return create_cell_range (
267 r->spreadsheet.sheets[n].first_col,
268 r->spreadsheet.sheets[n].first_row,
269 r->spreadsheet.sheets[n].last_col,
270 r->spreadsheet.sheets[n].last_row);
274 ods_get_sheet_n_rows (struct spreadsheet *s, int n)
276 struct ods_reader *r = (struct ods_reader *) s;
277 struct state_data sd;
279 if (r->n_allocated_sheets > n && r->spreadsheet.sheets[n].last_row != -1)
281 return r->spreadsheet.sheets[n].last_row + 1;
284 state_data_init (r, &sd);
286 while (1 == xmlTextReaderRead (sd.xtr))
288 process_node (r, &sd);
291 state_data_destroy (&sd);
293 return r->spreadsheet.sheets[n].last_row + 1;
297 ods_get_sheet_n_columns (struct spreadsheet *s, int n)
299 struct ods_reader *r = (struct ods_reader *) s;
300 struct state_data sd;
302 if (r->n_allocated_sheets > n && r->spreadsheet.sheets[n].last_col != -1)
303 return r->spreadsheet.sheets[n].last_col + 1;
305 state_data_init (r, &sd);
307 while (1 == xmlTextReaderRead (sd.xtr))
309 process_node (r, &sd);
312 state_data_destroy (&sd);
314 return r->spreadsheet.sheets[n].last_col + 1;
318 ods_get_sheet_cell (struct spreadsheet *s, int n, int row, int column)
320 struct ods_reader *r = (struct ods_reader *) s;
321 struct state_data sd;
323 /* See if this cell is in the cache. If it is, then use it. */
326 struct cache_datum *lookup = NULL;
327 unsigned int hash = hash_int (n, 0);
328 hash = hash_int (row, hash);
329 hash = hash_int (column, hash);
331 HMAP_FOR_EACH_WITH_HASH (lookup, struct cache_datum, node, hash,
334 if (lookup->row == row && lookup->col == column
335 && lookup->sheet == n)
342 return lookup->value ? strdup (lookup->value) : NULL;
346 state_data_init (r, &sd);
348 char *cell_content = NULL;
352 while (1 == xmlTextReaderRead (sd.xtr))
354 process_node (r, &sd);
355 if (sd.row > prev_row)
358 if (sd.state == STATE_CELL_CONTENT
359 && sd.current_sheet == n
360 && sd.node_type == XML_READER_TYPE_TEXT)
362 /* When cell contents are encountered, copy and save it, discarding
363 any older content. */
365 cell_content = CHAR_CAST (char *, xmlTextReaderValue (sd.xtr));
367 if (sd.state == STATE_ROW
368 && sd.current_sheet == n
369 && sd.node_type == XML_READER_TYPE_ELEMENT)
371 /* At the start of a row, free the cell contents and set it to NULL. */
375 if (sd.state == STATE_ROW
376 && sd.current_sheet == n
378 (sd.node_type == XML_READER_TYPE_END_ELEMENT
380 xmlTextReaderIsEmptyElement (sd.xtr)))
384 for (int c = prev_col; c < sd.col; ++c)
386 /* See if this cell has already been cached ... */
387 unsigned int hash = hash_int (sd.current_sheet, 0);
388 hash = hash_int (sd.row - 1, hash);
389 hash = hash_int (c, hash);
390 struct cache_datum *probe = NULL;
391 struct cache_datum *next;
392 HMAP_FOR_EACH_WITH_HASH_SAFE (probe, next, struct cache_datum, node, hash,
395 if (probe->row == sd.row - 1 && probe->col == c
396 && probe->sheet == sd.current_sheet)
400 /* If not, then cache it. */
403 struct cache_datum *cell_data = XMALLOC (struct cache_datum);
404 cell_data->row = sd.row - 1;
406 cell_data->sheet = sd.current_sheet;
407 cell_data->value = cell_content ? strdup (cell_content) : NULL;
409 hmap_insert (&r->cache, &cell_data->node, hash);
414 if (sd.row == row + 1 && sd.col >= column + 1)
424 state_data_destroy (&sd);
429 ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
431 struct ods_reader *r = r_;
435 state_data_destroy (&r->rsd);
437 if (! ds_is_empty (&r->ods_errs))
438 msg (ME, "%s", ds_cstr (&r->ods_errs));
440 ds_destroy (&r->ods_errs);
442 if (r->spreadsheet.first_case && ! r->spreadsheet.used_first_case)
443 case_unref (r->spreadsheet.first_case);
445 caseproto_unref (r->spreadsheet.proto);
446 r->spreadsheet.proto = NULL;
448 xmlFree (r->target_sheet_name);
449 r->target_sheet_name = NULL;
451 spreadsheet_unref (&r->spreadsheet);
455 process_node (struct ods_reader *or, struct state_data *r)
457 xmlChar *name = xmlTextReaderName (r->xtr);
459 name = xmlStrdup (_xml ("--"));
462 r->node_type = xmlTextReaderNodeType (r->xtr);
467 if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
468 XML_READER_TYPE_ELEMENT == r->node_type)
470 r->state = STATE_SPREADSHEET;
471 r->current_sheet = -1;
472 r->current_sheet_name = NULL;
475 case STATE_SPREADSHEET:
476 if (0 == xmlStrcasecmp (name, _xml("table:table"))
478 (XML_READER_TYPE_ELEMENT == r->node_type))
480 xmlFree (r->current_sheet_name);
481 r->current_sheet_name = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name"));
485 if (r->current_sheet >= or->n_allocated_sheets)
487 assert (r->current_sheet == or->n_allocated_sheets);
488 or->spreadsheet.sheets = xrealloc (or->spreadsheet.sheets, sizeof (*or->spreadsheet.sheets) * ++or->n_allocated_sheets);
489 or->spreadsheet.sheets[or->n_allocated_sheets - 1].first_col = -1;
490 or->spreadsheet.sheets[or->n_allocated_sheets - 1].last_col = -1;
491 or->spreadsheet.sheets[or->n_allocated_sheets - 1].first_row = -1;
492 or->spreadsheet.sheets[or->n_allocated_sheets - 1].last_row = -1;
493 or->spreadsheet.sheets[or->n_allocated_sheets - 1].name = CHAR_CAST (char *, xmlStrdup (r->current_sheet_name));
495 if (or->n_allocated_sheets > or->n_sheets)
496 or->n_sheets = or->n_allocated_sheets;
501 r->state = STATE_TABLE;
503 else if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
504 XML_READER_TYPE_ELEMENT == r->node_type)
506 r->state = STATE_INIT;
510 if (0 == xmlStrcasecmp (name, _xml("table:table-row")) &&
511 (XML_READER_TYPE_ELEMENT == r->node_type))
514 xmlTextReaderGetAttribute (r->xtr,
515 _xml ("table:number-rows-repeated"));
517 int row_span = value ? _xmlchar_to_int (value) : 1;
522 if (! xmlTextReaderIsEmptyElement (r->xtr))
523 r->state = STATE_ROW;
527 else if (0 == xmlStrcasecmp (name, _xml("table:table")) &&
528 (XML_READER_TYPE_END_ELEMENT == r->node_type))
530 r->state = STATE_SPREADSHEET;
534 if ((0 == xmlStrcasecmp (name, _xml ("table:table-cell")))
536 (XML_READER_TYPE_ELEMENT == r->node_type))
539 xmlTextReaderGetAttribute (r->xtr,
540 _xml ("table:number-columns-repeated"));
542 r->col_span = value ? _xmlchar_to_int (value) : 1;
543 r->col += r->col_span;
545 if (! xmlTextReaderIsEmptyElement (r->xtr))
546 r->state = STATE_CELL;
550 else if ((0 == xmlStrcasecmp (name, _xml ("table:table-row")))
552 (XML_READER_TYPE_END_ELEMENT == r->node_type))
554 r->state = STATE_TABLE;
558 if ((0 == xmlStrcasecmp (name, _xml("text:p")))
560 (XML_READER_TYPE_ELEMENT == r->node_type))
562 if (! xmlTextReaderIsEmptyElement (r->xtr))
563 r->state = STATE_CELL_CONTENT;
566 ((0 == xmlStrcasecmp (name, _xml("table:table-cell")))
568 (XML_READER_TYPE_END_ELEMENT == r->node_type)
571 r->state = STATE_ROW;
574 case STATE_CELL_CONTENT:
575 assert (r->current_sheet >= 0);
576 assert (r->current_sheet < or->n_allocated_sheets);
578 if (or->spreadsheet.sheets[r->current_sheet].first_row == -1)
579 or->spreadsheet.sheets[r->current_sheet].first_row = r->row - 1;
582 (or->spreadsheet.sheets[r->current_sheet].first_col == -1)
584 (or->spreadsheet.sheets[r->current_sheet].first_col >= r->col - 1)
586 or->spreadsheet.sheets[r->current_sheet].first_col = r->col - 1;
588 if (or->spreadsheet.sheets[r->current_sheet].last_row < r->row - 1)
589 or->spreadsheet.sheets[r->current_sheet].last_row = r->row - 1;
591 if (or->spreadsheet.sheets[r->current_sheet].last_col < r->col - 1)
592 or->spreadsheet.sheets[r->current_sheet].last_col = r->col - 1;
594 if (XML_READER_TYPE_END_ELEMENT == r->node_type)
595 r->state = STATE_CELL;
606 A struct containing the parameters of a cell's value
619 struct xml_value firstval;
623 /* Determine the width that a xmv should probably have */
625 xmv_to_width (const struct xml_value *xmv, int fallback)
627 int width = SPREADSHEET_DEFAULT_WIDTH;
629 /* Non-strings always have zero width */
630 if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string")))
637 width = ROUND_UP (xmlStrlen (xmv->value),
638 SPREADSHEET_DEFAULT_WIDTH);
640 width = ROUND_UP (xmlStrlen (xmv->text),
641 SPREADSHEET_DEFAULT_WIDTH);
647 Sets the VAR of case C, to the value corresponding to the xml data
650 convert_xml_to_value (struct ccase *c, const struct variable *var,
651 const struct xml_value *xmv, int col, int row)
653 union value *v = case_data_rw (c, var);
655 if (xmv->value == NULL && xmv->text == NULL)
656 value_set_missing (v, var_get_width (var));
657 else if (var_is_alpha (var))
658 /* Use the text field, because it seems that there is no
659 value field for strings */
660 value_copy_str_rpad (v, var_get_width (var), xmv->text, ' ');
663 const struct fmt_spec *fmt = var_get_write_format (var);
664 enum fmt_category fc = fmt_get_category (fmt->type);
666 assert (fc != FMT_CAT_STRING);
668 if (0 == xmlStrcmp (xmv->type, _xml("float")))
670 v->f = c_strtod (CHAR_CAST (const char *, xmv->value), NULL);
674 const char *text = xmv->value ?
675 CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text);
677 char *m = data_in (ss_cstr (text), "UTF-8", fmt->type,
678 settings_get_fmt_settings (), v,
679 var_get_width (var), "UTF-8");
683 char buf [FMT_STRING_LEN_MAX + 1];
684 char *cell = create_cell_ref (col, row);
686 msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"),
687 cell, fmt_to_string (fmt, buf), m);
695 /* Try to find out how many sheets there are in the "workbook" */
697 get_sheet_count (struct zip_reader *zreader)
699 xmlTextReaderPtr mxtr;
700 struct zip_member *meta = NULL;
701 meta = zip_member_open (zreader, "meta.xml");
706 mxtr = xmlReaderForIO (xml_reader_for_zip_member, NULL, meta, NULL, NULL, 0);
708 while (1 == xmlTextReaderRead (mxtr))
710 xmlChar *name = xmlTextReaderName (mxtr);
711 if (0 == xmlStrcmp (name, _xml("meta:document-statistic")))
713 xmlChar *attr = xmlTextReaderGetAttribute (mxtr, _xml ("meta:table-count"));
717 int s = _xmlchar_to_int (attr);
718 xmlFreeTextReader (mxtr);
719 zip_member_finish (meta);
729 xmlFreeTextReader (mxtr);
730 zip_member_finish (meta);
735 ods_get_sheet_n_sheets (struct spreadsheet *s)
737 struct ods_reader *r = (struct ods_reader *) s;
739 if (r->n_sheets >= 0)
742 r->n_sheets = get_sheet_count (r->zreader);
749 ods_error_handler (void *ctx, const char *mesg,
750 xmlParserSeverities sev UNUSED,
751 xmlTextReaderLocatorPtr loc)
753 struct ods_reader *r = ctx;
755 msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"),
757 r->spreadsheet.file_name,
758 xmlTextReaderLocatorLineNumber (loc),
763 static bool init_reader (struct ods_reader *r, bool report_errors, struct state_data *state);
765 static struct casereader *
766 ods_make_reader (struct spreadsheet *spreadsheet,
767 const struct spreadsheet_read_options *opts)
770 xmlChar *type = NULL;
771 unsigned long int vstart = 0;
772 casenumber n_cases = CASENUMBER_MAX;
774 struct var_spec *var_spec = NULL;
777 struct ods_reader *r = (struct ods_reader *) spreadsheet;
778 xmlChar *val_string = NULL;
781 ds_init_empty (&r->ods_errs);
782 r = (struct ods_reader *) spreadsheet_ref (SPREADSHEET_CAST (r));
784 if (!init_reader (r, true, &r->rsd))
787 r->spreadsheet.used_first_case = false;
788 r->spreadsheet.first_case = NULL;
790 if (opts->cell_range)
792 if (! convert_cell_ref (opts->cell_range,
793 &r->spreadsheet.start_col, &r->spreadsheet.start_row,
794 &r->spreadsheet.stop_col, &r->spreadsheet.stop_row))
796 msg (SE, _("Invalid cell range `%s'"),
803 r->spreadsheet.start_col = 0;
804 r->spreadsheet.start_row = 0;
805 r->spreadsheet.stop_col = -1;
806 r->spreadsheet.stop_row = -1;
809 r->target_sheet_name = xmlStrdup (BAD_CAST opts->sheet_name);
810 r->target_sheet_index = opts->sheet_index;
812 /* Advance to the start of the cells for the target sheet */
813 while (! reading_target_sheet (r, &r->rsd)
814 || r->rsd.state != STATE_ROW || r->rsd.row <= r->spreadsheet.start_row)
816 if (1 != (ret = xmlTextReaderRead (r->rsd.xtr)))
819 process_node (r, &r->rsd);
824 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
825 spreadsheet->file_name);
829 if (opts->read_names)
831 while (1 == xmlTextReaderRead (r->rsd.xtr))
833 process_node (r, &r->rsd);
835 /* If the row is finished then stop for now */
836 if (r->rsd.state == STATE_TABLE && r->rsd.row > r->spreadsheet.start_row)
839 int idx = r->rsd.col - r->spreadsheet.start_col - 1;
844 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
847 if (r->rsd.state == STATE_CELL_CONTENT
849 XML_READER_TYPE_TEXT == r->rsd.node_type)
851 xmlChar *value = xmlTextReaderValue (r->rsd.xtr);
852 if (idx >= n_var_specs)
854 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
856 /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */
857 memset (var_spec + n_var_specs,
859 (idx - n_var_specs + 1) * sizeof (*var_spec));
860 n_var_specs = idx + 1;
862 for (int i = 0; i < r->rsd.col_span; ++i)
864 var_spec[idx - i].firstval.text = 0;
865 var_spec[idx - i].firstval.value = 0;
866 var_spec[idx - i].firstval.type = 0;
867 var_spec[idx - i].name =
868 strdup (CHAR_CAST (const char *, value));
876 /* Read in the first row of data */
877 while (1 == xmlTextReaderRead (r->rsd.xtr))
880 process_node (r, &r->rsd);
882 if (! reading_target_sheet (r, &r->rsd))
885 /* If the row is finished then stop for now */
886 if (r->rsd.state == STATE_TABLE &&
887 r->rsd.row > r->spreadsheet.start_row + (opts->read_names ? 1 : 0))
890 idx = r->rsd.col - r->spreadsheet.start_col - 1;
894 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
897 if (r->rsd.state == STATE_CELL &&
898 XML_READER_TYPE_ELEMENT == r->rsd.node_type)
900 type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
901 val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
904 if (r->rsd.state == STATE_CELL_CONTENT &&
905 XML_READER_TYPE_TEXT == r->rsd.node_type)
907 if (idx >= n_var_specs)
909 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
910 memset (var_spec + n_var_specs,
912 (idx - n_var_specs + 1) * sizeof (*var_spec));
914 var_spec [idx].name = NULL;
915 n_var_specs = idx + 1;
918 var_spec [idx].firstval.type = type;
919 var_spec [idx].firstval.text = xmlTextReaderValue (r->rsd.xtr);
920 var_spec [idx].firstval.value = val_string;
928 /* Create the dictionary and populate it */
929 r->spreadsheet.dict = dict_create (
930 CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->rsd.xtr)));
932 for (i = 0; i < n_var_specs ; ++i)
935 struct variable *var = NULL;
936 char *name = dict_make_unique_var_name (r->spreadsheet.dict, var_spec[i].name, &vstart);
937 int width = xmv_to_width (&var_spec[i].firstval, opts->asw);
938 dict_create_var (r->spreadsheet.dict, name, width);
941 var = dict_get_var (r->spreadsheet.dict, i);
943 if (0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
950 fmt = fmt_default_for_width (width);
952 var_set_both_formats (var, &fmt);
955 if (n_var_specs == 0)
957 msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
958 spreadsheet->file_name);
962 /* Create the first case, and cache it */
963 r->spreadsheet.proto = caseproto_ref (dict_get_proto (r->spreadsheet.dict));
964 r->spreadsheet.first_case = case_create (r->spreadsheet.proto);
965 case_set_missing (r->spreadsheet.first_case);
967 for (i = 0 ; i < n_var_specs; ++i)
969 const struct variable *var = dict_get_var (r->spreadsheet.dict, i);
971 convert_xml_to_value (r->spreadsheet.first_case, var, &var_spec[i].firstval,
972 r->rsd.col - n_var_specs + i,
976 /* Read in the first row of data */
977 while (1 == xmlTextReaderRead (r->rsd.xtr))
979 process_node (r, &r->rsd);
981 if (r->rsd.state == STATE_ROW)
986 for (i = 0 ; i < n_var_specs ; ++i)
988 free (var_spec[i].firstval.type);
989 free (var_spec[i].firstval.value);
990 free (var_spec[i].firstval.text);
991 free (var_spec[i].name);
997 return casereader_create_sequential
999 r->spreadsheet.proto,
1001 &ods_file_casereader_class, r);
1005 for (i = 0 ; i < n_var_specs ; ++i)
1007 free (var_spec[i].firstval.type);
1008 free (var_spec[i].firstval.value);
1009 free (var_spec[i].firstval.text);
1010 free (var_spec[i].name);
1015 ods_file_casereader_destroy (NULL, r);
1021 /* Reads and returns one case from READER's file. Returns a null
1022 pointer on failure. */
1023 static struct ccase *
1024 ods_file_casereader_read (struct casereader *reader UNUSED, void *r_)
1026 struct ccase *c = NULL;
1027 struct ods_reader *r = r_;
1029 xmlChar *val_string = NULL;
1030 xmlChar *type = NULL;
1032 if (!r->spreadsheet.used_first_case)
1034 r->spreadsheet.used_first_case = true;
1035 return r->spreadsheet.first_case;
1039 /* Advance to the start of a row. (If there is one) */
1040 while (r->rsd.state != STATE_ROW
1041 && 1 == xmlTextReaderRead (r->rsd.xtr)
1044 process_node (r, &r->rsd);
1048 if (! reading_target_sheet (r, &r->rsd)
1049 || r->rsd.state < STATE_TABLE
1050 || (r->spreadsheet.stop_row != -1 && r->rsd.row > r->spreadsheet.stop_row + 1)
1056 c = case_create (r->spreadsheet.proto);
1057 case_set_missing (c);
1059 while (1 == xmlTextReaderRead (r->rsd.xtr))
1061 process_node (r, &r->rsd);
1063 if (r->spreadsheet.stop_row != -1 && r->rsd.row > r->spreadsheet.stop_row + 1)
1066 if (r->rsd.state == STATE_CELL &&
1067 r->rsd.node_type == XML_READER_TYPE_ELEMENT)
1069 type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
1070 val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
1073 if (r->rsd.state == STATE_CELL_CONTENT &&
1074 r->rsd.node_type == XML_READER_TYPE_TEXT)
1077 struct xml_value *xmv = xzalloc (sizeof *xmv);
1078 xmv->text = xmlTextReaderValue (r->rsd.xtr);
1079 xmv->value = val_string;
1084 for (col = 0; col < r->rsd.col_span; ++col)
1086 const struct variable *var;
1087 const int idx = r->rsd.col - col - r->spreadsheet.start_col - 1;
1090 if (r->spreadsheet.stop_col != -1 && idx > r->spreadsheet.stop_col - r->spreadsheet.start_col)
1092 if (idx >= dict_get_var_cnt (r->spreadsheet.dict))
1095 var = dict_get_var (r->spreadsheet.dict, idx);
1096 convert_xml_to_value (c, var, xmv, idx + r->spreadsheet.start_col, r->rsd.row - 1);
1099 xmlFree (xmv->text);
1100 xmlFree (xmv->value);
1101 xmlFree (xmv->type);
1104 if (r->rsd.state <= STATE_TABLE)
1109 xmlFree (val_string);
1115 init_reader (struct ods_reader *r, bool report_errors,
1116 struct state_data *state)
1118 struct spreadsheet *s = SPREADSHEET_CAST (r);
1122 struct zip_member *content = zip_member_open (r->zreader, "content.xml");
1123 if (content == NULL)
1126 xmlTextReaderPtr xtr = xmlReaderForIO (xml_reader_for_zip_member, NULL, content, NULL, NULL,
1129 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING));
1134 *state = (struct state_data) { .xtr = xtr,
1136 .state = STATE_INIT };
1138 xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r);
1141 strcpy (s->type, "ODS");
1142 s->destroy = ods_destroy;
1143 s->make_reader = ods_make_reader;
1144 s->get_sheet_name = ods_get_sheet_name;
1145 s->get_sheet_range = ods_get_sheet_range;
1146 s->get_sheet_n_sheets = ods_get_sheet_n_sheets;
1147 s->get_sheet_n_rows = ods_get_sheet_n_rows;
1148 s->get_sheet_n_columns = ods_get_sheet_n_columns;
1149 s->get_sheet_cell = ods_get_sheet_cell;
1154 struct spreadsheet *
1155 ods_probe (const char *filename, bool report_errors)
1157 struct ods_reader *r = xzalloc (sizeof *r);
1158 struct zip_reader *zr;
1160 ds_init_empty (&r->zip_errs);
1162 zr = zip_reader_create (filename, &r->zip_errs);
1168 msg (ME, _("Cannot open %s as a OpenDocument file: %s"),
1169 filename, ds_cstr (&r->zip_errs));
1171 ds_destroy (&r->zip_errs);
1177 r->spreadsheet.ref_cnt = 1;
1178 hmap_init (&r->cache);
1180 if (!init_reader (r, report_errors, NULL))
1184 r->n_allocated_sheets = 0;
1185 r->spreadsheet.sheets = NULL;
1187 r->spreadsheet.file_name = strdup (filename);
1188 return &r->spreadsheet;
1191 ds_destroy (&r->zip_errs);
1192 zip_reader_destroy (r->zreader);