From 2f7b367bf796c63c595ac837b716cce535ffd5bf Mon Sep 17 00:00:00 2001 From: John Darrington Date: Fri, 1 Jul 2011 17:27:26 +0200 Subject: [PATCH] Added support to read OpenDocument spreadsheet files --- configure.ac | 16 +- doc/files.texi | 41 +- src/data/automake.mk | 4 + src/data/gnumeric-reader.c | 86 +-- src/data/gnumeric-reader.h | 15 +- src/data/ods-reader.c | 681 ++++++++++++++++++ src/data/ods-reader.h | 27 + src/data/spreadsheet-reader.c | 89 +++ src/data/spreadsheet-reader.h | 47 ++ src/language/data-io/get-data.c | 115 ++- src/output/automake.mk | 2 +- tests/atlocal.in | 1 + tests/automake.mk | 5 +- ...et-data-gnm.at => get-data-spreadsheet.at} | 73 +- tests/language/data-io/test.ods | Bin 0 -> 9231 bytes 15 files changed, 1018 insertions(+), 184 deletions(-) create mode 100644 src/data/ods-reader.c create mode 100644 src/data/ods-reader.h create mode 100644 src/data/spreadsheet-reader.c create mode 100644 src/data/spreadsheet-reader.h rename tests/language/data-io/{get-data-gnm.at => get-data-spreadsheet.at} (59%) create mode 100644 tests/language/data-io/test.ods diff --git a/configure.ac b/configure.ac index eb10e560a2..719f4c7411 100644 --- a/configure.ac +++ b/configure.ac @@ -196,24 +196,30 @@ AC_DEFINE( [crc32], [gl_crc32], [Avoid making zlib call gnulib's crc32() instead of its own.]) -dnl Gnumeric support requires libxml2 and zlib. +dnl Gnumeric and OpenDocument (read) support requires libxml2 and zlib. if test $HAVE_LIBXML2 = yes && test $HAVE_ZLIB = yes; then GNM_SUPPORT=yes + ODF_READ_SUPPORT=yes AC_DEFINE( [GNM_SUPPORT], [1], [Define to 1 if building in support for reading Gnumeric files.]) + AC_DEFINE( + [ODF_READ_SUPPORT], [1], + [Define to 1 if building in support for reading OpenDocument files.]) else GNM_SUPPORT=no + ODF_READ_SUPPORT=no fi AC_SUBST([GNM_SUPPORT]) +AC_SUBST([ODF_READ_SUPPORT]) -dnl ODT support requires libxml2. +dnl ODF support requires libxml2 (zlib is optional). if test $HAVE_LIBXML2 = yes; then AC_DEFINE( - [ODT_SUPPORT], [1], - [Define to 1 if building in support for writing ODT files.]) + [ODF_WRITE_SUPPORT], [1], + [Define to 1 if building in support for writing OpenDocument files.]) fi -AM_CONDITIONAL([ODT_SUPPORT], [test $HAVE_LIBXML2 = yes]) +AM_CONDITIONAL([ODF_WRITE_SUPPORT], [test $HAVE_LIBXML2 = yes]) AC_ARG_WITH( gui_tools, diff --git a/doc/files.texi b/doc/files.texi index 89d043f876..cdce0a3c46 100644 --- a/doc/files.texi +++ b/doc/files.texi @@ -182,7 +182,7 @@ Use of @cmd{GET} to read a portable file is a PSPP extension. @display GET DATA - /TYPE=@{GNM,PSQL,TXT@} + /TYPE=@{GNM,ODS,PSQL,TXT@} @dots{}additional subcommands depending on TYPE@dots{} @end display @@ -199,6 +199,9 @@ PSPP currently supports the following file types: @item GNM Spreadsheet files created by Gnumeric (@url{http://gnumeric.org}). +@item ODS +Spreadsheet files in OpenDocument format. + @item PSQL Relations from PostgreSQL databases (@url{http://postgresql.org}). @@ -210,16 +213,16 @@ Each supported file type has additional subcommands, explained in separate sections below. @menu -* GET DATA /TYPE=GNM:: -* GET DATA /TYPE=PSQL:: -* GET DATA /TYPE=TXT:: +* GET DATA /TYPE=GNM/ODS:: Spreadsheets +* GET DATA /TYPE=PSQL:: Databases +* GET DATA /TYPE=TXT:: Delimited Text Files @end menu -@node GET DATA /TYPE=GNM -@subsection Gnumeric Spreadsheet Files +@node GET DATA /TYPE=GNM/ODS +@subsection Spreadsheet Files @display -GET DATA /TYPE=GNM +GET DATA /TYPE=@{GNM, ODS@} /FILE=@{'file-name'@} /SHEET=@{NAME 'sheet-name', INDEX n@} /CELLRANGE=@{RANGE 'range', FULL@} @@ -228,11 +231,19 @@ GET DATA /TYPE=GNM @end display @cindex Gnumeric +@cindex OpenDocument @cindex spreadsheet files -To use GET DATA to read a spreadsheet file created by Gnumeric -(@url{http://gnumeric.org}), specify TYPE=GNM to indicate the file's -format and use FILE to indicate the Gnumeric file to be read. All -other subcommands are optional. + +Gnumeric spreadsheets (@url{http://gnumeric.org}), and spreadsheets +in OpenDocument format +(@url{http://libreplanet.org/wiki/Group:OpenDocument/Software}) +can be read using the GET DATA command. +Use the TYPE subcommand to indicate the file's format. +/TYPE=GNM indicates Gnumeric files, +/TYPE=ODS indicates OpenDocument. +The FILE subcommand is mandatory. +Use it to specify the name file to be read. +All other subcommands are optional. The format of each variable is determined by the format of the spreadsheet cell containing the first datum for the variable. @@ -240,10 +251,6 @@ If this cell is of string (text) format, then the width of the variable is determined from the length of the string it contains, unless the ASSUMEDVARWIDTH subcommand is given. - -The FILE subcommand is mandatory. Specify the name of the file -to be read. - The SHEET subcommand specifies the sheet within the spreadsheet file to read. There are two forms of the SHEET subcommand. In the first form, @@ -266,8 +273,8 @@ If no CELLRANGE subcommand is given, then the entire sheet is read. If @samp{/READNAMES=ON} is specified, then the contents of cells of the first row are used as the names of the variables in which to store -the data from subsequent rows. -If the READNAMES command is omitted, or if @samp{/READNAMES=OFF} is +the data from subsequent rows. This is the default. +If @samp{/READNAMES=OFF} is used, then the variables receive automatically assigned names. The ASSUMEDVARWIDTH subcommand specifies the maximum width of string diff --git a/src/data/automake.mk b/src/data/automake.mk index 81a9d9c525..4385fd6d63 100644 --- a/src/data/automake.mk +++ b/src/data/automake.mk @@ -82,6 +82,8 @@ src_data_libdata_la_SOURCES = \ src/data/make-file.h \ src/data/mrset.c \ src/data/mrset.h \ + src/data/ods-reader.c \ + src/data/ods-reader.h \ src/data/por-file-reader.c \ src/data/por-file-reader.h \ src/data/por-file-writer.c \ @@ -94,6 +96,8 @@ src_data_libdata_la_SOURCES = \ src/data/settings.h \ src/data/short-names.c \ src/data/short-names.h \ + src/data/spreadsheet-reader.c \ + src/data/spreadsheet-reader.h \ src/data/subcase.c \ src/data/subcase.h \ src/data/sys-file-encoding.c \ diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c index 61fbab899b..56ebc3062a 100644 --- a/src/data/gnumeric-reader.c +++ b/src/data/gnumeric-reader.c @@ -14,8 +14,6 @@ You should have received a copy of the GNU General Public License along with this program. If not, see . */ - - #include #include "libpspp/message.h" @@ -27,13 +25,14 @@ #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) +#include "spreadsheet-reader.h" #if !GNM_SUPPORT struct casereader * -gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) +gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) { - msg (ME, _("Support for Gnumeric files was not compiled into this installation of PSPP")); + msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric"); return NULL; } @@ -59,9 +58,6 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) #include "gl/xalloc.h" -/* Default width of string variables. */ -#define GNUMERIC_DEFAULT_WIDTH 8 - static void gnm_file_casereader_destroy (struct casereader *, void *); static struct ccase *gnm_file_casereader_read (struct casereader *, void *); @@ -74,73 +70,6 @@ static const struct casereader_class gnm_file_casereader_class = NULL, }; -/* Convert a string, which is an integer encoded in base26 - IE, A=0, B=1, ... Z=25 to the integer it represents. - ... except that in this scheme, digits with an exponent - greater than 1 are implicitly incremented by 1, so - AA = 0 + 1*26, AB = 1 + 1*26, - ABC = 2 + 2*26 + 1*26^2 .... -*/ -static int -pseudo_base26 (const char *str) -{ - int i; - int multiplier = 1; - int result = 0; - int len = strlen (str); - - for ( i = len - 1 ; i >= 0; --i) - { - int mantissa = (str[i] - 'A'); - - if ( mantissa < 0 || mantissa > 25 ) - return -1; - - if ( i != len - 1) - mantissa++; - - result += mantissa * multiplier; - - multiplier *= 26; - } - - return result; -} - - - -/* Convert a cell reference in the form "A1:B2", to - integers. A1 means column zero, row zero. - B1 means column 1 row 0. AA1 means column 26, row 0. -*/ -static bool -convert_cell_ref (const char *ref, - int *col0, int *row0, - int *coli, int *rowi) -{ - char startcol[5]; - char stopcol [5]; - - int startrow; - int stoprow; - - int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d", - startcol, &startrow, - stopcol, &stoprow); - if ( n != 4) - return false; - - str_uppercase (startcol); - *col0 = pseudo_base26 (startcol); - str_uppercase (stopcol); - *coli = pseudo_base26 (stopcol); - *row0 = startrow - 1; - *rowi = stoprow - 1 ; - - return true; -} - - enum reader_state { STATE_INIT = 0, /* Initial state */ @@ -180,9 +109,6 @@ struct gnumeric_reader static void process_node (struct gnumeric_reader *r); -#define _xml(X) (CHAR_CAST (const xmlChar *, X)) - -#define _xmlchar_to_int(X) (atoi(CHAR_CAST (const char *, X))) static void gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) @@ -345,7 +271,7 @@ struct var_spec }; struct casereader * -gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) +gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) { unsigned long int vstart = 0; int ret; @@ -473,7 +399,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) if (-1 == var_spec [idx].width ) var_spec [idx].width = (gri->asw == -1) ? - ROUND_UP (strlen(text), GNUMERIC_DEFAULT_WIDTH) : gri->asw; + ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : gri->asw; } free (value); @@ -506,7 +432,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict) /* Probably no data exists for this variable, so allocate a default width */ if ( var_spec[i].width == -1 ) - var_spec[i].width = GNUMERIC_DEFAULT_WIDTH; + var_spec[i].width = SPREADSHEET_DEFAULT_WIDTH; name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart); dict_create_var (r->dict, name, var_spec[i].width); diff --git a/src/data/gnumeric-reader.h b/src/data/gnumeric-reader.h index b313fc7876..fcd3385675 100644 --- a/src/data/gnumeric-reader.h +++ b/src/data/gnumeric-reader.h @@ -20,21 +20,10 @@ #include struct casereader; - - -struct gnumeric_read_info -{ - char *sheet_name ; /* In UTF-8. */ - char *file_name ; /* In filename encoding. */ - char *cell_range ; /* In UTF-8. */ - int sheet_index ; - bool read_names ; - int asw ; -}; - struct dictionary; +struct spreadsheet_read_info; -struct casereader * gnumeric_open_reader (struct gnumeric_read_info *, struct dictionary **); +struct casereader * gnumeric_open_reader (struct spreadsheet_read_info *, struct dictionary **); #endif diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c new file mode 100644 index 0000000000..51ee5ac39a --- /dev/null +++ b/src/data/ods-reader.c @@ -0,0 +1,681 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "libpspp/message.h" +#include "libpspp/misc.h" + +#include "data/data-in.h" + +#include "gl/minmax.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) (msgid) + +#include "ods-reader.h" +#include "spreadsheet-reader.h" + +#if !ODF_READ_SUPPORT + +struct casereader * +ods_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) +{ + msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "OpenDocument"); + + return NULL; +} + +#else + +#include "libpspp/zip-reader.h" + + +#include +#include +#include +#include +#include + +#include "data/format.h" +#include "data/case.h" +#include "data/casereader-provider.h" +#include "data/dictionary.h" +#include "data/identifier.h" +#include "data/value.h" +#include "data/variable.h" +#include "libpspp/i18n.h" +#include "libpspp/str.h" + +#include "gl/xalloc.h" + +static void ods_file_casereader_destroy (struct casereader *, void *); + +static struct ccase *ods_file_casereader_read (struct casereader *, void *); + +static const struct casereader_class ods_file_casereader_class = + { + ods_file_casereader_read, + ods_file_casereader_destroy, + NULL, + NULL, + }; + +enum reader_state + { + STATE_INIT = 0, /* Initial state */ + STATE_SPREADSHEET, /* Found the start of the spreadsheet doc */ + STATE_TABLE, /* Found the sheet that we actually want */ + STATE_ROW, /* Found the start of the cell array */ + STATE_CELL, /* Found a cell */ + STATE_CELL_CONTENT /* Found a the text within a cell */ + }; + +struct ods_reader +{ + xmlTextReaderPtr xtr; + + enum reader_state state; + bool sheet_found; + int row; + int col; + int node_type; + int sheet_index; + + const xmlChar *target_sheet; + int target_sheet_index; + + int start_row; + int start_col; + int stop_row; + int stop_col; + + struct caseproto *proto; + struct dictionary *dict; + struct ccase *first_case; + bool used_first_case; + bool read_names; + + struct string ods_errs; + int span; +}; + +static void process_node (struct ods_reader *r); + +static void +ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_) +{ + struct ods_reader *r = r_; + if ( r == NULL) + return ; + + if (r->xtr) + xmlFreeTextReader (r->xtr); + + if ( ! ds_is_empty (&r->ods_errs)) + msg (ME, ds_cstr (&r->ods_errs)); + + ds_destroy (&r->ods_errs); + + if ( ! r->used_first_case ) + case_unref (r->first_case); + + caseproto_unref (r->proto); + + free (r); +} + +static void +process_node (struct ods_reader *r) +{ + xmlChar *name = xmlTextReaderName (r->xtr); + if (name == NULL) + name = xmlStrdup (_xml ("--")); + + r->node_type = xmlTextReaderNodeType (r->xtr); + + switch ( r->state) + { + case STATE_INIT: + if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) && + XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_SPREADSHEET; + } + break; + case STATE_SPREADSHEET: + if (0 == xmlStrcasecmp (name, _xml("table:table"))) + { + if (XML_READER_TYPE_ELEMENT == r->node_type) + { + r->col = -1; + r->row = -1; + ++r->sheet_index; + if ( r->target_sheet != NULL) + { + xmlChar *value = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name")); + if ( 0 == xmlStrcmp (value, r->target_sheet)) + { + r->sheet_found = true; + r->state = STATE_TABLE; + } + free (value); + } + else if (r->target_sheet_index == r->sheet_index) + { + r->sheet_found = true; + r->state = STATE_TABLE; + } + else if ( r->target_sheet_index == -1) + r->state = STATE_TABLE; + } + } + else if (XML_READER_TYPE_END_ELEMENT == r->node_type + && r->sheet_found) + { + r->state = STATE_INIT; + } + break; + case STATE_TABLE: + if (0 == xmlStrcasecmp (name, _xml("table:table-row")) ) + { + if ( XML_READER_TYPE_ELEMENT == r->node_type) + { + if (! xmlTextReaderIsEmptyElement (r->xtr)) + { + r->state = STATE_ROW; + } + r->row++; + r->span = 1; + } + } + else if (XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_SPREADSHEET; + } + break; + case STATE_ROW: + if (0 == xmlStrcasecmp (name, _xml ("table:table-cell"))) + { + if ( XML_READER_TYPE_ELEMENT == r->node_type) + { + xmlChar *value = + xmlTextReaderGetAttribute (r->xtr, + _xml ("table:number-columns-repeated")); + r->col += r->span; + r->span = value ? _xmlchar_to_int (value) : 1; + free (value); + if (! xmlTextReaderIsEmptyElement (r->xtr)) + { + r->state = STATE_CELL; + } + } + } + else if (XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_TABLE; + r->col = -1; + /* Set the span back to the default */ + r->span = 1; + } + break; + case STATE_CELL: + if (0 == xmlStrcasecmp (name, _xml("text:p"))) + { + if ( XML_READER_TYPE_ELEMENT == r->node_type) + { + r->state = STATE_CELL_CONTENT; + } + } + else if (XML_READER_TYPE_END_ELEMENT == r->node_type) + { + r->state = STATE_ROW; + } + break; + case STATE_CELL_CONTENT: + if (XML_READER_TYPE_TEXT != r->node_type) + r->state = STATE_CELL; + break; + default: + break; + }; + + xmlFree (name); +} + +/* + A struct containing the parameters of a cell's value + parsed from the xml +*/ +struct xml_value +{ + xmlChar *type; + xmlChar *value; + xmlChar *text; +}; + +struct var_spec +{ + char *name; + struct xml_value firstval; +}; + + +/* Determine the width that a xmv should probably have */ +static int +xmv_to_width (const struct xml_value *xmv, int fallback) +{ + int width = SPREADSHEET_DEFAULT_WIDTH; + + /* Non-strings always have zero width */ + if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string"))) + return 0; + + if ( fallback != -1) + return fallback; + + if ( xmv->value ) + width = ROUND_UP (xmlStrlen (xmv->value), + SPREADSHEET_DEFAULT_WIDTH); + else if ( xmv->text) + width = ROUND_UP (xmlStrlen (xmv->text), + SPREADSHEET_DEFAULT_WIDTH); + + return width; +} + +/* + Sets the VAR of case C, to the value corresponding to the xml data + */ +static void +convert_xml_to_value (struct ccase *c, const struct variable *var, + const struct xml_value *xmv) +{ + union value *v = case_data_rw (c, var); + + if (xmv->value == NULL && xmv->text == NULL) + value_set_missing (v, var_get_width (var)); + else if ( var_is_alpha (var)) + /* Use the text field, because it seems that there is no + value field for strings */ + value_copy_str_rpad (v, var_get_width (var), xmv->text, ' '); + else + { + const struct fmt_spec *fmt = var_get_write_format (var); + enum fmt_category fc = fmt_get_category (fmt->type); + + assert ( fc != FMT_CAT_STRING); + + const char *text = xmv->value ? CHAR_CAST (const char *, xmv->value): + CHAR_CAST (const char *, xmv->text); + + data_in (ss_cstr (text), "UTF-8", + fmt->type, + v, + var_get_width (var), + "UTF-8"); + } +} + + +struct casereader * +ods_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict) +{ + int ret = 0; + xmlChar *type = NULL; + unsigned long int vstart = 0; + casenumber n_cases = CASENUMBER_MAX; + int i; + struct var_spec *var_spec = NULL; + int n_var_specs = 0; + + struct ods_reader *r = xzalloc (sizeof *r); + + r->read_names = gri->read_names; + ds_init_empty (&r->ods_errs); + + struct zip_reader *zreader = zip_reader_create (gri->file_name, &r->ods_errs); + struct zip_member *content = NULL; + + if ( NULL == zreader) + { + msg (ME, _("Error opening `%s' for reading as a OpenDocument spreadsheet file: %s."), + gri->file_name, ds_cstr (&r->ods_errs)); + + goto error; + } + + content = zip_member_open (zreader, "content.xml"); + if ( NULL == content) + { + msg (ME, _("Could not extract OpenDocument spreadsheet from file `%s': %s."), + gri->file_name, ds_cstr (&r->ods_errs)); + + goto error; + } + + zip_member_ref (content); + + r->xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read, + (xmlInputCloseCallback) zip_member_finish, + content, NULL, NULL, XML_PARSE_RECOVER); + + if ( r->xtr == NULL) + { + goto error; + } + + if ( gri->cell_range ) + { + if ( ! convert_cell_ref (gri->cell_range, + &r->start_col, &r->start_row, + &r->stop_col, &r->stop_row)) + { + msg (SE, _("Invalid cell range `%s'"), + gri->cell_range); + goto error; + } + } + else + { + r->start_col = 0; + r->start_row = 0; + r->stop_col = -1; + r->stop_row = -1; + } + + r->state = STATE_INIT; + r->target_sheet = BAD_CAST gri->sheet_name; + r->target_sheet_index = gri->sheet_index; + r->row = r->col = -1; + r->sheet_index = 0; + + + /* If CELLRANGE was given, then we know how many variables should be read */ + if ( r->stop_col != -1 ) + { + n_var_specs = r->stop_col - r->start_col + 1; + var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs); + } + + + /* Advance to the start of the cells for the target sheet */ + while ( (r->row < r->start_row )) + { + if (1 != (ret = xmlTextReaderRead (r->xtr))) + break; + + process_node (r); + } + + if (ret < 1) + { + msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), + gri->file_name); + goto error; + } + + if ( gri->read_names) + { + while (1 == (ret = xmlTextReaderRead (r->xtr))) + { + int idx; + process_node (r); + if ( r->row > r->start_row) + break; + + if (r->col == -1 && r->row == r->start_row) + break; + + if ( r->col < r->start_col) + continue; + + idx = r->col - r->start_col; + + if (r->state == STATE_CELL_CONTENT + && + XML_READER_TYPE_TEXT == r->node_type) + { + xmlChar *value = xmlTextReaderValue (r->xtr); + if ( idx >= n_var_specs) + { + + var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1)); + + /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */ + memset (var_spec + n_var_specs * sizeof (*var_spec), + 0, + (n_var_specs - idx + 1) * sizeof (*var_spec)); + n_var_specs = idx + 1; + } + var_spec[idx].firstval.text = 0; + var_spec[idx].firstval.value = 0; + var_spec[idx].firstval.type = 0; + + var_spec [idx].name = strdup (CHAR_CAST (const char *, value)); + free (value); + value = NULL; + } + } + } + + xmlChar *val_string = NULL; + /* Read in the first row of data */ + while (1 == xmlTextReaderRead (r->xtr)) + { + int idx; + process_node (r); + if ( r->row >= r->start_row + 1 + gri->read_names) + break; + + if ( r->col < r->start_col) + continue; + + if ( r->col - r->start_col + 1 > n_var_specs) + continue; + + idx = r->col - r->start_col; + + if ( r->state == STATE_CELL && + XML_READER_TYPE_ELEMENT == r->node_type) + { + type = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value-type")); + val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value")); + } + + if ( r->state == STATE_CELL_CONTENT && + XML_READER_TYPE_TEXT == r->node_type) + { + var_spec [idx].firstval.type = type; + var_spec [idx].firstval.text = xmlTextReaderValue (r->xtr); + var_spec [idx].firstval.value = val_string; + val_string = NULL; + type = NULL; + } + } + + /* Create the dictionary and populate it */ + *dict = r->dict = dict_create ( + CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr))); + + for (i = 0 ; i < n_var_specs ; ++i ) + { + struct fmt_spec fmt; + struct variable *var = NULL; + char *name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart); + int width = xmv_to_width (&var_spec[i].firstval, gri->asw); + dict_create_var (r->dict, name, width); + free (name); + + var = dict_get_var (r->dict, i); + + if ( 0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date"))) + { + fmt.type = FMT_DATE; + fmt.d = 0; + fmt.w = 20; + } + else + fmt = fmt_default_for_width (width); + + var_set_both_formats (var, &fmt); + } + + /* Create the first case, and cache it */ + r->used_first_case = false; + + if ( n_var_specs == 0 ) + { + msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."), + gri->file_name); + goto error; + } + + r->proto = caseproto_ref (dict_get_proto (r->dict)); + r->first_case = case_create (r->proto); + case_set_missing (r->first_case); + + for ( i = 0 ; i < n_var_specs ; ++i ) + { + const struct variable *var = dict_get_var (r->dict, i); + + convert_xml_to_value (r->first_case, var, &var_spec[i].firstval); + } + + zip_reader_destroy (zreader); + + for ( i = 0 ; i < n_var_specs ; ++i ) + { + free (var_spec[i].firstval.type); + free (var_spec[i].firstval.value); + free (var_spec[i].firstval.text); + free (var_spec[i].name); + } + + free (var_spec); + + return casereader_create_sequential + (NULL, + r->proto, + n_cases, + &ods_file_casereader_class, r); + + error: + + zip_reader_destroy (zreader); + + for ( i = 0 ; i < n_var_specs ; ++i ) + { + free (var_spec[i].firstval.type); + free (var_spec[i].firstval.value); + free (var_spec[i].firstval.text); + free (var_spec[i].name); + } + + free (var_spec); + + return NULL; +} + + +/* Reads and returns one case from READER's file. Returns a null + pointer on failure. */ +static struct ccase * +ods_file_casereader_read (struct casereader *reader UNUSED, void *r_) +{ + struct ccase *c = NULL; + xmlChar *val_string = NULL; + struct ods_reader *r = r_; + int current_row = r->row; + + if ( r->row == -1) + return NULL; + + if ( !r->used_first_case ) + { + r->used_first_case = true; + return r->first_case; + } + + + if ( r->state > STATE_INIT) + { + c = case_create (r->proto); + case_set_missing (c); + } + + while (1 == xmlTextReaderRead (r->xtr)) + { + process_node (r); + if ( r->row > current_row) + { + break; + } + if ( r->col < r->start_col || (r->stop_col != -1 && r->col > r->stop_col)) + { + continue; + } + if ( r->col - r->start_col >= caseproto_get_n_widths (r->proto)) + { + continue; + } + if ( r->stop_row != -1 && r->row > r->stop_row) + { + continue; + } + if ( r->state == STATE_CELL && + r->node_type == XML_READER_TYPE_ELEMENT ) + { + val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value")); + } + + if ( r->state == STATE_CELL_CONTENT && r->node_type == XML_READER_TYPE_TEXT ) + { + int col; + struct xml_value *xmv = xzalloc (sizeof *xmv); + xmv->text = xmlTextReaderValue (r->xtr); + xmv->value = val_string; + val_string = NULL; + + for (col = 0; col < r->span ; ++col) + { + const int idx = r->col + col - r->start_col; + + const struct variable *var = dict_get_var (r->dict, idx); + + convert_xml_to_value (c, var, xmv); + } + free (xmv->text); + free (xmv->value); + free (xmv); + } + + if ( r->state < STATE_TABLE) + break; + } + + if (NULL == c || (r->stop_row != -1 && r->row > r->stop_row + 1)) + { + case_unref (c); + return NULL; + } + else + { + return c; + } +} +#endif diff --git a/src/data/ods-reader.h b/src/data/ods-reader.h new file mode 100644 index 0000000000..79b7169833 --- /dev/null +++ b/src/data/ods-reader.h @@ -0,0 +1,27 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef ODS_READ_H +#define ODS_READ_H 1 + +struct casereader; +struct dictionary; +struct spreadsheet_read_info; + +struct casereader * ods_open_reader (struct spreadsheet_read_info *, struct dictionary **); + + +#endif diff --git a/src/data/spreadsheet-reader.c b/src/data/spreadsheet-reader.c new file mode 100644 index 0000000000..11e8cf593a --- /dev/null +++ b/src/data/spreadsheet-reader.c @@ -0,0 +1,89 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "spreadsheet-reader.h" + +#include +#include +#include + +/* Convert a string, which is an integer encoded in base26 + IE, A=0, B=1, ... Z=25 to the integer it represents. + ... except that in this scheme, digits with an exponent + greater than 1 are implicitly incremented by 1, so + AA = 0 + 1*26, AB = 1 + 1*26, + ABC = 2 + 2*26 + 1*26^2 .... +*/ +int +pseudo_base26 (const char *str) +{ + int i; + int multiplier = 1; + int result = 0; + int len = strlen (str); + + for ( i = len - 1 ; i >= 0; --i) + { + int mantissa = (str[i] - 'A'); + + if ( mantissa < 0 || mantissa > 25 ) + return -1; + + if ( i != len - 1) + mantissa++; + + result += mantissa * multiplier; + + multiplier *= 26; + } + + return result; +} + + +/* Convert a cell reference in the form "A1:B2", to + integers. A1 means column zero, row zero. + B1 means column 1 row 0. AA1 means column 26, row 0. +*/ +bool +convert_cell_ref (const char *ref, + int *col0, int *row0, + int *coli, int *rowi) +{ + char startcol[5]; + char stopcol [5]; + + int startrow; + int stoprow; + + int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d", + startcol, &startrow, + stopcol, &stoprow); + if ( n != 4) + return false; + + str_uppercase (startcol); + *col0 = pseudo_base26 (startcol); + str_uppercase (stopcol); + *coli = pseudo_base26 (stopcol); + *row0 = startrow - 1; + *rowi = stoprow - 1 ; + + return true; +} + diff --git a/src/data/spreadsheet-reader.h b/src/data/spreadsheet-reader.h new file mode 100644 index 0000000000..6edd705067 --- /dev/null +++ b/src/data/spreadsheet-reader.h @@ -0,0 +1,47 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2007, 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef SPREADSHEET_READ_H +#define SPREADSHEET_READ_H 1 + +#include + +/* Default width of string variables. */ +#define SPREADSHEET_DEFAULT_WIDTH 8 + +struct spreadsheet_read_info +{ + char *sheet_name ; /* In UTF-8. */ + char *file_name ; /* In filename encoding. */ + char *cell_range ; /* In UTF-8. */ + int sheet_index ; + bool read_names ; + int asw ; +}; + +int pseudo_base26 (const char *str); + +bool convert_cell_ref (const char *ref, + int *col0, int *row0, + int *coli, int *rowi); + + +#define _xml(X) (CHAR_CAST (const xmlChar *, X)) + +#define _xmlchar_to_int(X) (atoi(CHAR_CAST (const char *, X))) + + +#endif diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index 47b65b445a..9b878c553a 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -18,10 +18,14 @@ #include +#include + #include "data/dataset.h" #include "data/dictionary.h" #include "data/format.h" #include "data/gnumeric-reader.h" +#include "data/ods-reader.h" +#include "data/spreadsheet-reader.h" #include "data/psql-reader.h" #include "data/settings.h" #include "language/command.h" @@ -40,13 +44,16 @@ #define _(msgid) gettext (msgid) #define N_(msgid) (msgid) -static int parse_get_gnm (struct lexer *lexer, struct dataset *); +static struct spreadsheet_read_info *parse_spreadsheet (struct lexer *lexer); +static void destroy_spreadsheet_read_info (struct spreadsheet_read_info *); + static int parse_get_txt (struct lexer *lexer, struct dataset *); static int parse_get_psql (struct lexer *lexer, struct dataset *); int cmd_get_data (struct lexer *lexer, struct dataset *ds) { + char *tok = NULL; lex_force_match (lexer, T_SLASH); if (!lex_force_match_id (lexer, "TYPE")) @@ -54,14 +61,44 @@ cmd_get_data (struct lexer *lexer, struct dataset *ds) lex_force_match (lexer, T_EQUALS); - if (lex_match_id (lexer, "GNM")) - return parse_get_gnm (lexer, ds); - else if (lex_match_id (lexer, "TXT")) - return parse_get_txt (lexer, ds); + tok = strdup (lex_tokcstr (lexer)); + if (lex_match_id (lexer, "TXT")) + { + return parse_get_txt (lexer, ds); + } else if (lex_match_id (lexer, "PSQL")) - return parse_get_psql (lexer, ds); + { + return parse_get_psql (lexer, ds); + } + else if (lex_match_id (lexer, "GNM") || + lex_match_id (lexer, "ODS")) + { + struct casereader *reader = NULL; + struct dictionary *dict = NULL; + struct spreadsheet_read_info *sri = parse_spreadsheet (lexer); + if (NULL == sri) + goto error; + + if ( 0 == strncasecmp (tok, "GNM", 3)) + reader = gnumeric_open_reader (sri, &dict); + else if (0 == strncasecmp (tok, "ODS", 3)) + reader = ods_open_reader (sri, &dict); + + if (reader) + { + dataset_set_dict (ds, dict); + dataset_set_source (ds, reader); + destroy_spreadsheet_read_info (sri); + free (tok); + return CMD_SUCCESS; + } + destroy_spreadsheet_read_info (sri); + } + else + msg (SE, _("Unsupported TYPE %s."), tok); - msg (SE, _("Unsupported TYPE %s."), lex_tokcstr (lexer)); + error: + free (tok); return CMD_FAILURE; } @@ -141,10 +178,13 @@ parse_get_psql (struct lexer *lexer, struct dataset *ds) return CMD_FAILURE; } -static int -parse_get_gnm (struct lexer *lexer, struct dataset *ds) +static struct spreadsheet_read_info * +parse_spreadsheet (struct lexer *lexer) { - struct gnumeric_read_info gri = {NULL, NULL, NULL, 1, true, -1}; + struct spreadsheet_read_info *sri = xzalloc (sizeof *sri); + sri->sheet_index = 1; + sri->read_names = true; + sri->asw = -1; lex_force_match (lexer, T_SLASH); @@ -156,7 +196,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if (!lex_force_string (lexer)) goto error; - gri.file_name = utf8_to_filename (lex_tokcstr (lexer)); + sri->file_name = utf8_to_filename (lex_tokcstr (lexer)); lex_get (lexer); @@ -165,7 +205,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH")) { lex_match (lexer, T_EQUALS); - gri.asw = lex_integer (lexer); + sri->asw = lex_integer (lexer); lex_get (lexer); } else if (lex_match_id (lexer, "SHEET")) @@ -176,14 +216,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if ( ! lex_force_string (lexer) ) goto error; - gri.sheet_name = ss_xstrdup (lex_tokss (lexer)); - gri.sheet_index = -1; + sri->sheet_name = ss_xstrdup (lex_tokss (lexer)); + sri->sheet_index = -1; lex_get (lexer); } else if (lex_match_id (lexer, "INDEX")) { - gri.sheet_index = lex_integer (lexer); + sri->sheet_index = lex_integer (lexer); lex_get (lexer); } else @@ -195,14 +235,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if (lex_match_id (lexer, "FULL")) { - gri.cell_range = NULL; + sri->cell_range = NULL; } else if (lex_match_id (lexer, "RANGE")) { if ( ! lex_force_string (lexer) ) goto error; - gri.cell_range = ss_xstrdup (lex_tokss (lexer)); + sri->cell_range = ss_xstrdup (lex_tokss (lexer)); lex_get (lexer); } else @@ -214,11 +254,11 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) if ( lex_match_id (lexer, "ON")) { - gri.read_names = true; + sri->read_names = true; } else if (lex_match_id (lexer, "OFF")) { - gri.read_names = false; + sri->read_names = false; } else goto error; @@ -230,30 +270,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds) } } - { - struct dictionary *dict = NULL; - struct casereader *reader = gnumeric_open_reader (&gri, &dict); - - if ( reader ) - { - dataset_set_dict (ds, dict); - dataset_set_source (ds, reader); - } - } - - free (gri.file_name); - free (gri.sheet_name); - free (gri.cell_range); - return CMD_SUCCESS; + return sri; error: - - free (gri.file_name); - free (gri.sheet_name); - free (gri.cell_range); - return CMD_FAILURE; + destroy_spreadsheet_read_info (sri); + return NULL; } + static bool set_type (struct data_parser *parser, const char *subcommand, enum data_parser_type type, bool *has_type) @@ -596,3 +620,16 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) free (name); return CMD_CASCADING_FAILURE; } + + +static void +destroy_spreadsheet_read_info (struct spreadsheet_read_info *sri) +{ + if ( NULL == sri) + return; + + free (sri->sheet_name); + free (sri->cell_range); + free (sri->file_name); + free (sri); +} diff --git a/src/output/automake.mk b/src/output/automake.mk index 6f9b149e82..78173e3ef2 100644 --- a/src/output/automake.mk +++ b/src/output/automake.mk @@ -68,7 +68,7 @@ src_output_liboutput_la_SOURCES += \ src/output/charts/roc-chart-cairo.c \ src/output/charts/scree-cairo.c endif -if ODT_SUPPORT +if ODF_WRITE_SUPPORT src_output_liboutput_la_SOURCES += src/output/odt.c endif diff --git a/tests/atlocal.in b/tests/atlocal.in index 9b54a705b4..cfe019a238 100644 --- a/tests/atlocal.in +++ b/tests/atlocal.in @@ -3,6 +3,7 @@ # Variables used internally by the testsuite. EXEEXT='@EXEEXT@' GNM_SUPPORT='@GNM_SUPPORT@' +ODF_READ_SUPPORT='@ODF_READ_SUPPORT@' PERL='@PERL@' WITH_PERL_MODULE='@WITH_PERL_MODULE@' host='@host@' diff --git a/tests/automake.mk b/tests/automake.mk index 7d4afef707..840f56b047 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -244,7 +244,8 @@ EXTRA_DIST += \ tests/data/num-out.expected.cmp.gz \ tests/data/v13.sav \ tests/data/v14.sav \ - tests/language/data-io/Book1.gnm.unzipped + tests/language/data-io/Book1.gnm.unzipped \ + tests/language/data-io/test.ods CLEANFILES += *.save pspp.* foo* @@ -279,7 +280,7 @@ TESTSUITE_AT = \ tests/language/data-io/data-reader.at \ tests/language/data-io/dataset.at \ tests/language/data-io/file-handle.at \ - tests/language/data-io/get-data-gnm.at \ + tests/language/data-io/get-data-spreadsheet.at \ tests/language/data-io/get-data-psql.at \ tests/language/data-io/get-data-txt.at \ tests/language/data-io/get.at \ diff --git a/tests/language/data-io/get-data-gnm.at b/tests/language/data-io/get-data-spreadsheet.at similarity index 59% rename from tests/language/data-io/get-data-gnm.at rename to tests/language/data-io/get-data-spreadsheet.at index 92815dc0d3..19b8964dca 100644 --- a/tests/language/data-io/get-data-gnm.at +++ b/tests/language/data-io/get-data-spreadsheet.at @@ -1,10 +1,23 @@ -AT_BANNER([GET DATA /TYPE=GNM]) -AT_SETUP([GET DATA /TYPE=GNM with CELLRANGE]) -AT_SKIP_IF([test "$GNM_SUPPORT" = no]) -AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric]) +m4_define([SPREADSHEET_TEST_PREP],[dnl + m4_if($1,[GNM],[dnl + AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])dnl + m4_define([testsheet],[Book1.gnumeric])dnl + AT_SKIP_IF([test n$GNM_SUPPORT != nyes])dnl + ]) dnl + m4_if($1,[ODS],[dnl + AT_CHECK([cp $top_srcdir/tests/language/data-io/test.ods test.ods])dnl + m4_define([testsheet],[test.ods])dnl + AT_SKIP_IF([test n$ODF_READ_SUPPORT != nyes])dnl + ])dnl +]) + +m4_define([CHECK_SPREADSHEET_READER], + [dnl +AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE]) +SPREADSHEET_TEST_PREP($1) AT_DATA([get-data.sps], [dnl -GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' . +GET DATA /TYPE=$1 /FILE='testsheet' /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' . DISPLAY VARIABLES. LIST. ]) @@ -34,11 +47,10 @@ VAR001,VAR002,VAR003 ]) AT_CLEANUP -AT_SETUP([GET DATA /TYPE=GNM with CELLRANGE and READNAMES]) -AT_SKIP_IF([test "$GNM_SUPPORT" = no]) -AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric]) +AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE and READNAMES]) +SPREADSHEET_TEST_PREP($1) AT_DATA([get-data.sps], [dnl -GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' . +GET DATA /TYPE=$1 /FILE='testsheet' /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' . DISPLAY VARIABLES. LIST. ]) @@ -68,11 +80,10 @@ V1,V2,VAR001 ]) AT_CLEANUP -AT_SETUP([GET DATA /TYPE=GNM without CELLRANGE]) -AT_SKIP_IF([test "$GNM_SUPPORT" = no]) -AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric]) +AT_SETUP([GET DATA /TYPE=$1 without CELLRANGE]) +SPREADSHEET_TEST_PREP($1) AT_DATA([get-data.sps], [dnl -GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=index 3. +GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=index 3. DISPLAY VARIABLES. LIST. ]) @@ -101,12 +112,11 @@ dick ,3.00,-34.09 ]) AT_CLEANUP -AT_SETUP([GET DATA /TYPE=GNM with missing data]) -AT_SKIP_IF([test "$GNM_SUPPORT" = no]) -AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric]) +AT_SETUP([GET DATA /TYPE=$1 with missing data]) +SPREADSHEET_TEST_PREP($1) AT_DATA([get-data.sps], [dnl * This sheet has no data in one of its variables -GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /READNAMES=on /SHEET=index 5. +GET DATA /TYPE=$1 /FILE='testsheet' /READNAMES=on /SHEET=index 5. DISPLAY VARIABLES. LIST. ]) @@ -137,26 +147,35 @@ vone,vtwo,vthree,v4 ]) AT_CLEANUP -AT_SETUP([GET DATA /TYPE=GNM with empty sheet]) -AT_SKIP_IF([test "$GNM_SUPPORT" = no]) -AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric]) +AT_SETUP([GET DATA /TYPE=$1 with empty sheet]) +SPREADSHEET_TEST_PREP($1) AT_DATA([get-data.sps], [dnl * This sheet is empty -GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=name 'Empty'. +GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'Empty'. ]) AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl -warning: Selected sheet or range of spreadsheet `Book1.gnumeric' is empty. +warning: Selected sheet or range of spreadsheet `testsheet' is empty. ]) AT_CLEANUP -AT_SETUP([GET DATA /TYPE=GNM with nonexistent sheet]) -AT_SKIP_IF([test "$GNM_SUPPORT" = no]) -AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric]) +AT_SETUP([GET DATA /TYPE=$1 with nonexistent sheet]) +SPREADSHEET_TEST_PREP($1) AT_DATA([get-data.sps], [dnl * This sheet doesnt exist. -GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=name 'foobarxx'. +GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'foobarxx'. ]) AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl -warning: Selected sheet or range of spreadsheet `Book1.gnumeric' is empty. +warning: Selected sheet or range of spreadsheet `testsheet' is empty. ]) AT_CLEANUP +]) + + +AT_BANNER([GET DATA Spreadsheet /TYPE=GNM]) + +CHECK_SPREADSHEET_READER([GNM]) + +AT_BANNER([GET DATA Spreadsheet /TYPE=ODS]) + +CHECK_SPREADSHEET_READER([ODS]) + diff --git a/tests/language/data-io/test.ods b/tests/language/data-io/test.ods new file mode 100644 index 0000000000000000000000000000000000000000..c079454bd0e57a311d53a409c68315b0769e3c83 GIT binary patch literal 9231 zcma)C1z1$e+h0&=X%M8BMque~k(6#wYN>^#b_pq^1w^_*>2B!;0VxTk1f&&^R7#p} zLAiSG=e^H=&hB~U%)B*m=DhQpS5@)abwU6D4FJ%WtP%@=3Wl%)0Dy}>q6lCMu{C#c zb1*k?aDYOU%%kKsw%O6ISZqYbrLVIW#`8Lc%aVUdCzLVj|GuMb0n+oX~v!f|)21dVoB+wc> zh#d8HEAagB!Gs(C$(J*n8dj{}a7(0}3KGWm-}Hd<=Saqkrg4KaH@RQFCBc_|i%$sV zj6Ms^7GN&7VA#FC(<|%$V0dQb_J@wRNA$$XIeVI{PHfio{mah-* zmzy$t?6F#iE8~pIEA_+}BV_JbkbUcsj3Qt8c{B!$M4eWG@%FL*{S+Bbwxy;_8_q3` z3M!fz;Q<~8Zp|FYbJau?2D?zRk@;EIO~!-~@l>%f$Iiv(MnY)2rWhl{o)>OKkjkkY zT!6c6HooA#_$@YBoNTD?#hjut-4aS|3&BU(ky6%y1-?wAj~zGAP(hmAI`UaqG(xx{ z{HZ10_lD$;##N}qkMY9PZr*RcxgH@c7c&w!-;4et$*_SuH?*&p!{mGcb7q@m0!TW% z`99r3aYzuyS6da=a#f1dht7rgI_UgEUU9?X{F_k;tvUSreXRM$n9g5WI#ZaGaJ78q zI*>zA+S=YH`b-ymG9}ESP?U};}4TNsmtAbOW$US`)Y!= zYo%}}h$hJ=&Gex08boFR+iUa=aFQCP95MEL5OxDN+OKfFCy} z6XL365}`MUxqW>n+L2|CR9DS7E*31)*2nCC>?je)xmLS!%dNvI- z(MWovCH;?q3G{X5kr?z%+HXZrVnOmz<@b4tD_5?Ka_HVNGv`GeQ;30TFWi>VEAj8y zh*ybYU|oKhQEvq?h$DD%ZW;Y@z{j)%g3+!Z45%Ekwv_y~N@l8~TI{P+fj2?elyoGU z{B;ysb3j2oDK4N)8-w~un;d>09oxXP5ay+b!h;k-SU7B(s?T@oHO95F35s1;?_rN! z-_U$j*sl3Xk*#*bSCQ?}=c+WDr;me5a;kXicJbqn9P_E{vmVRsl0_^CK*gh0Jr-h? zzY^=z>~xeTJThym7t2-_l<^7Ti2Gh6cd zn2Nn#3n{dqF_+`Bq^`79VywP@q-fgjK_v&Ad3D^VuQeE_L>2dzxbXZn{`a_P{$i~c z`f#I;@~RV$L8E|c!V_-E&sH%vz6BVcC2b(R2-12yj#_!rJfB!J5IYfD%U#yNKlrslNNz$O*y8RL>fIcJ2i6X8VUv z(D*9Lvi%~h#TtrAm&|>_29vrDCr9to5i#o|cYTdr{Jf_-b0*(Uk7IQFb~@N?28}oL zgg44as-8aY!O`?Wf(4@a}zG1R3< z9EE3=k-`DvtkJpRw?yT-YcSc=W9Sasl%A0m=z+O+^`1Nn@+g*RcIOA64k*|4xq8=E zef-`yMGnEWeJ~Hg6(1!!a}FmvDc(Em)KQDR<=^Ez`}L8ks;NWeoPcE!tr7ixVN`Ty z$0R$h`id}Nu6W6r^)uR|ozYzo@o0rh?6#62Dptv2>M_N6=$kh(JD!y*P2+-+F(T&& z3kSkY@Q^6(SHp0FoTqY}q85FXnKYA+j~C2$t8pZ-S!We1Y}ck0o`Hm-L`%^;BHoH? zh@0%WAKqJmlsJDE(xymcl>X`@vmfZ?w_d?nHksczG`IL}?|Wdy{FhLylCv@bdXKTt zXdY36?16ihW?kL~MuWwSAI3^Q2JKsH-Y=C|OxE{3Ib?d?{!}1#jldb3Vp-9HvQ<=P zr$sI6ENhyRclH}s+$p=&T6^?eS4do#8u8m1S9DCs7X48`<>8y}3+pb0UE;%!T$JpE z0b6~$^%m}4b`0W~i;;JeKCT?^+a4t;${xCzF1EMBIL>*>Jtt3+tG}P4UifU+AfOQz zgy%-6bLF%B$rZ#vxEOFJH>f%M!rPnHo3@+cBW#}4K*Lb2Y&BC*em`cn{-B6VD2+L! zWJ;7)jY~&?P_8%3qi%UZV?*9WE+phJMb-3Bv0bF*Cs%Co$ID;8b~n{G_a)XaN~s${ zPpt5cD49)P4gseQTAI(9A8i`hw=^t36u_^PA3gY-rRt+8E0cx#Cg8zT&vT<-)Bf@- z*u%M$?y8XSM!EoQ{hD_+6)_Z?14~I{4paJ(!o1_UVBzWmm$P1!kJZZ8#oqFc=?+!e zzTB>R;~I5?8OQWxyzV>7XTmbiaLAtIYSziDI0&#}FJrQE5f*FTF|#0h?dW&sP8$|W zFrJAth(WC~D58BydcN+2r&=jLZ`%N9q3B0MDP2Vvs#Ime+UeLcnIZ$=tBxJ`fhopQS3E^!EJHPvH$?H#5qBBp`fS&l7V?jOlxWBogN>VidrN)6kfM9^~dN{ z@t<5z44jj+F}a_5W(jyGjFDN}xtn&EzK*bA2O*2LD& z>`XU4rKl1f5eVO~`ue($I$eEUs)l8L!L%yW$`KShu$P{nE`&D}Q%cJI3d52k8J8P( zr$7~%r1|9CRs-6c$Xo`frA0A$OvcOq#>^@DH zg)MnHM|YYSpjVu(g^QDgi!x3{2CLw`P3?t8SBBoLaf!>J@jOb#dc%L%|HbBWkK+V+ zI52%*dkU-_*`-2hzS@~1FY+L+NLzS?jv*;5wu)S4C0i7F=9GAM_{~<3i0v9L(JMD+ z%=)#La@ZP{tNc6#9GT@imaSTmxJX+h9VT>^Jc-`qRv89*517S#@-*JDTAyw3lVkf^ z2wUk;>&h1@i>4J3Ssv=GItB1j=!k#JUDysWKD~W9cs+;ncHK;+KW;)18eOS>^NhOt za?0Ihg)(A-0b``??1UceryTCKzMsA;^FPK4912h|j8%#o3NLfnt<*fzT=LZ8u(_8x z7&Q_A4$JbLs_6zOgyghfJf%}BwaKS)hOOBNVApTl&$l-T3~y9_Avzu7_9@&uFl)CD z7fRjjhU{|x`s%(IgYbvxQ~F6#MZu?E_mFe?wo9<3YIRH4Qzg007|Obh!y+{Yolx&R z-wK<7W$*TB*P`hRf}kn^IeMc`%_tfxNU{(Y=5$F`iw=jw=bfs9)3C^uQb*&jCkxDM zA{tXo6+@m_SslTr1-KmZ72Tu@2Y$^H&pL>9D+ETK+>X>oTOs4(l1pq0Jf{-en~mL= zCzp*TDtg&n4t>yl9N2?pt&*#sw4YDH>=}2q-|H!9FxK)p4ENcLp^=eM`kAq)`o8oE zy?0o4!|Qy1ls$pk>&1NitM+?_tEfs-eDJC6;_=&LstzSW8L3&`C_-X%8x2Q&c?7-e zsJ-=e)WhRR47_PXfI+X`cJnEow;n^zjvpRjuI8{DNH!K?Tt9)G6je13S z9#6hIc!Femj&kwUAZSNFlc6f8Xh|`bx!TT>}6v{+D2exs%DoH$y}maiexI zhrtnWhA5DeodZa1ZU?qEgVYx4iTZeN2 zsV@ZxIl)Zq;1>2U+eIAYkv*3c6S+FA> zfMQo;bk;|rm5pdK6Ud4tm#axVwgOihugO`nyUz3wdh=WInn_mR<2Racmu#gkP z32H8;X<~}-F+@Or6XK%Np@NJL%F#b#0fE**vx*yTwgVxgA?g#!Qrh@1tmCNaR*;m}`Ty*y#cFvEe z|6XB)fSat%!A>AW{O?fU1s0}h zyi6gTX%vY(hfSv}nr6K|xN zac!RE-wFnOH25Czg$4jf!wh(IVB<~zD8q43RlMtVf~gE>MH0_@Y9Ilt^L&qrFTwXk zuejz*;py50&6~nbo*GY(DvQl071g9jqe?F_B`BV7QBw=fp+IkY-?rr&C0d*y#~o&h z%>7eU+9j2hCsNR4qm1z$^w<5FfsQBaQ3Wsy{sRt~(rNi7iQBA}B;Liv;nr#cVI|k9 zq2U(yLiBsSeEGtkpyOx^V%45e5|&wJX_t_{BXva2%EfN?`8ieQ>qMvE#%KxPW36cU za$|6xfq_RuvhN##!ZcTtg#kwEq=bE1T)7vvDmpB&IOHk#=cifuV`F0(85t=l~a)LIvy-zsn7 zv$U{?s}$i?qnf86At52C*gZkcSTQXVzpXT*_Qv~;F^4W<$=Uoi+X;*q@|U3Q{=ALJ zZ$pu1%}%_cAWsTTFV(64s$RY5DDVJT2N?M_&UGPj{U#)vz~qy~?)J#BVqRX}xP3Z@ zE-~+MUzHJUJ^YMaBpKC7OCw*X-Go|2We@?-&CSgb;kA@!J_IHB*LHQe2Zg1vA4SB; z7}hL>KvZ^iii3NF^DKkkt8sT-j|k{Ic{)Os1$OrI^@SLz>14xvXX?N@h1VX7_oUOJ zd@%-BNB(SYbkj&U&Bnib(KB=a~S1F*K#`dNfraOq=eG@mr}r|Ms7=n zdisVrdDRX$R4hk|w!#*(qaUnTlc@>_z0O2(@zo|zRGGd`zne5I9BR9@uZ>T1cTsMf z^=m!>WlK%fZJ#c9mYsfN$RSdiVz2Ox`)=4PCrlzD58zGtU|^M;3y5GDKp*W! zamQ`4SxpU2THm$1w`Z;&hp(l40ROy{;~mfF{lbAkfzO3@-|IH1UNXevI_Gy;AVub} zl6!&gz}(cmn+YO5&nbuQ?b|ZI(!KjWztSz*+}zC1HxSk0qAATTcZ65d5~Nh7M0k?= zNSMWGw2gr}aSJOxdo1SN!C#PzWeqL>|_PaBrBoxK#UQ z?kLTJXQOkmXi!@gT~Cb4yjunzOr0pj(u%3>fRR*HYEk<{SvSNl+${7-83G-In-zk3 z*t)-`l{s+oX=$fKk|zc@^LnwAi=W_oS!qte=@Ce>jIdqGm~K(jptUJIFm zu-O1K)LFk6%zwPaQ>{FMsnkWfzEIY1()MLcVt+UbBZG(U`NsT6Rdc42yCO#6mKr?d zNOT>3m;MDO2?n)B=!=J$`d#&ru+LcfxtrlY75rxZI7=*mUErVZ9$A9qB;;w}j(ES5yUOShq9XO+l-BCyB)2q*zwkABd?syn zfrFi%jL;$b4BcY3^sDQMq?F3tj8&xFm$b#mX-2vd2A9`)qIZrKWw{n$R}t(jR_*=c zIwL@8qRU{An35$Y9{O<+0UnMux>e~qTiF^4#WjxJPYor@x z#(;dyu$!ZLyB9BREF4(1Cg0IjD};*g%>g#UM`r3Ly|#L58=OJ%nL|&O+YE+$h+acX zOQ5{K92Tq@(elnYwCMhUl->6Zk_rx#{4H0=x8g6=j6U1j0YnGs+tFskL-euPo8xqb z>nF|m(LFd@c_~G8R2ZLj?}!+&l}+PndHP=0^6D~hp~jks}&OgD>8SdGkn$+4B!6{m`0#rGDR)IG! zRyOHTDN(woDaj_UECaGNv4dEc!!Hkbt70YX+Np8x zeHRSqoXP1gc(ovHo&AzpwDo#81szrpX~w7h&d8`*TTR~B37>%3<51DENpe&Ta*ha> zYP`YGXr>{SaqB7m1@N(9b5Or~`R&$K?BKP&G`TIkdSx;^zha7roXOYTniT~vWT%9P zP%9rid1w@#vzJlpBWzoCAG1Bbwx?`Vb-M>l+v<@?8U7Gcqa-OVx8Rx#aWVkt=$eNvkz}yW@F{b zUE9{5j6HEQC=o}x7z;_wAD40K|a&maakrR2Bcu^&1J`;-vgn z(nMtcdD{N_;r|agzkgP)PVg^||9_=WME0L@{v{aTzl^)sNc>8Ri0nUw{t5;7Uz`-7xkp#%S`l(s)e`4vj=?@}&;34W#T ze~|M3LJNKfyP5^Rp9x$97yL>*h{*x|6>4xb(pQYX!$KEF2ES4~;_=r3!WG|D0L4E+ zqhEaexQKWd1io+qAzg7@Y5Uzf`g{M!Bi^Tf+phjo%I^!~g(vkZ@gUx$-~Fn8a{m4r fUo4!z(i6&mSw>YAZ=hU|;UKO}1VV;Meev{v3WzjX literal 0 HcmV?d00001 -- 2.30.2