Added support to read OpenDocument spreadsheet files

author John Darrington <john@darrington.wattle.id.au>

Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)

committer John Darrington <john@darrington.wattle.id.au>

Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
author John Darrington <john@darrington.wattle.id.au>
Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
committer John Darrington <john@darrington.wattle.id.au>
Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
diff --git a/configure.ac b/configure.ac

index eb10e560a2d2a4362a6f17ab269855b9ade0ecee..719f4c7411d68517adaeb31186257da92bceffb5 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -196,24 +196,30 @@ AC_DEFINE(
    [crc32], [gl_crc32],
    [Avoid making zlib call gnulib's crc32() instead of its own.])
  
-dnl Gnumeric support requires libxml2 and zlib.
+dnl Gnumeric and OpenDocument (read) support requires libxml2 and zlib.
  if test $HAVE_LIBXML2 = yes && test $HAVE_ZLIB = yes; then
    GNM_SUPPORT=yes
+  ODF_READ_SUPPORT=yes
    AC_DEFINE(
      [GNM_SUPPORT], [1],
      [Define to 1 if building in support for reading Gnumeric files.])
+  AC_DEFINE(
+    [ODF_READ_SUPPORT], [1],
+    [Define to 1 if building in support for reading OpenDocument files.])
  else
    GNM_SUPPORT=no
+  ODF_READ_SUPPORT=no
  fi
  AC_SUBST([GNM_SUPPORT])
+AC_SUBST([ODF_READ_SUPPORT])
  
-dnl ODT support requires libxml2.
+dnl ODF support requires libxml2 (zlib is optional).
  if test $HAVE_LIBXML2 = yes; then
    AC_DEFINE(
-    [ODT_SUPPORT], [1],
-    [Define to 1 if building in support for writing ODT files.])
+    [ODF_WRITE_SUPPORT], [1],
+    [Define to 1 if building in support for writing OpenDocument files.])
  fi
-AM_CONDITIONAL([ODT_SUPPORT], [test $HAVE_LIBXML2 = yes])
+AM_CONDITIONAL([ODF_WRITE_SUPPORT], [test $HAVE_LIBXML2 = yes])
  
  AC_ARG_WITH(
    gui_tools,
diff --git a/doc/files.texi b/doc/files.texi

index 89d043f8769f67c6c898dcf9a7729d0fbcd3356d..cdce0a3c4689875560ef57dad998253c5f51b5e2 100644 (file)
--- a/doc/files.texi
+++ b/doc/files.texi
@@ -182,7 +182,7 @@ Use of @cmd{GET} to read a portable file is a PSPP extension.
  
  @display
  GET DATA
-        /TYPE=@{GNM,PSQL,TXT@}
+        /TYPE=@{GNM,ODS,PSQL,TXT@}
          @dots{}additional subcommands depending on TYPE@dots{}
  @end display
  
@@ -199,6 +199,9 @@ PSPP currently supports the following file types:
  @item GNM
  Spreadsheet files created by Gnumeric (@url{http://gnumeric.org}).
  
+@item ODS
+Spreadsheet files in OpenDocument format.
+
  @item PSQL
  Relations from PostgreSQL databases (@url{http://postgresql.org}).
  
@@ -210,16 +213,16 @@ Each supported file type has additional subcommands, explained in
  separate sections below.
  
  @menu
-* GET DATA /TYPE=GNM::
-* GET DATA /TYPE=PSQL::
-* GET DATA /TYPE=TXT::
+* GET DATA /TYPE=GNM/ODS::     Spreadsheets
+* GET DATA /TYPE=PSQL::        Databases
+* GET DATA /TYPE=TXT::         Delimited Text Files
  @end menu
  
-@node GET DATA /TYPE=GNM
-@subsection Gnumeric Spreadsheet Files
+@node GET DATA /TYPE=GNM/ODS
+@subsection Spreadsheet Files
  
  @display
-GET DATA /TYPE=GNM
+GET DATA /TYPE=@{GNM, ODS@}
          /FILE=@{'file-name'@}
          /SHEET=@{NAME 'sheet-name', INDEX n@}
          /CELLRANGE=@{RANGE 'range', FULL@}
@@ -228,11 +231,19 @@ GET DATA /TYPE=GNM
  @end display
  
  @cindex Gnumeric
+@cindex OpenDocument
  @cindex spreadsheet files
-To use GET DATA to read a spreadsheet file created by Gnumeric
-(@url{http://gnumeric.org}), specify TYPE=GNM to indicate the file's
-format and use FILE to indicate the Gnumeric file to be read.  All
-other subcommands are optional.
+
+Gnumeric spreadsheets (@url{http://gnumeric.org}), and spreadsheets
+in OpenDocument format
+(@url{http://libreplanet.org/wiki/Group:OpenDocument/Software})
+can be read using the GET DATA command.
+Use the TYPE subcommand to indicate the file's format.  
+/TYPE=GNM indicates Gnumeric files,
+/TYPE=ODS indicates OpenDocument.
+The FILE subcommand is mandatory.
+Use it to specify the name file to be read. 
+All other subcommands are optional.
  
  The format of each variable is determined by the format of the spreadsheet 
  cell containing the first datum for the variable.
@@ -240,10 +251,6 @@ If this cell is of string (text) format, then the width of the variable is
  determined from the length of the string it contains, unless the 
  ASSUMEDVARWIDTH subcommand is given.
  
-
-The FILE subcommand is mandatory. Specify the name of the file
-to be read.
-
  The SHEET subcommand specifies the sheet within the spreadsheet file to read.
  There are two forms of the SHEET subcommand.
  In the first form,
@@ -266,8 +273,8 @@ If no CELLRANGE subcommand is given, then the entire sheet is read.
  
  If @samp{/READNAMES=ON} is specified, then the contents of cells of
  the first row are used as the names of the variables in which to store
-the data from subsequent rows. 
-If the READNAMES command is omitted, or if @samp{/READNAMES=OFF} is
+the data from subsequent rows.  This is the default.
+If @samp{/READNAMES=OFF} is
  used, then the variables  receive automatically assigned names.
  
  The ASSUMEDVARWIDTH subcommand specifies the maximum width of string
diff --git a/src/data/automake.mk b/src/data/automake.mk

index 81a9d9c5259bf87838cfe68260eb68eb28fa44d0..4385fd6d63055ead3ddba1836ba57f8aedbef006 100644 (file)
--- a/src/data/automake.mk
+++ b/src/data/automake.mk
@@ -82,6 +82,8 @@ src_data_libdata_la_SOURCES = \
         src/data/make-file.h \
         src/data/mrset.c \
         src/data/mrset.h \
+       src/data/ods-reader.c \
+       src/data/ods-reader.h \
         src/data/por-file-reader.c \
         src/data/por-file-reader.h \
         src/data/por-file-writer.c \
@@ -94,6 +96,8 @@ src_data_libdata_la_SOURCES = \
         src/data/settings.h \
         src/data/short-names.c \
         src/data/short-names.h \
+       src/data/spreadsheet-reader.c \
+       src/data/spreadsheet-reader.h \
         src/data/subcase.c \
         src/data/subcase.h \
         src/data/sys-file-encoding.c \
diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c

index 61fbab899b8bd44eff63633bdb0f90a57a2553d4..56ebc3062aed3090ccf26748b8d96a105bac85dc 100644 (file)
--- a/src/data/gnumeric-reader.c
+++ b/src/data/gnumeric-reader.c
@@ -14,8 +14,6 @@
     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
-
-
  #include <config.h>
  
  #include "libpspp/message.h"
@@ -27,13 +25,14 @@
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
+#include "spreadsheet-reader.h"
  
  #if !GNM_SUPPORT
  
  struct casereader *
-gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
+gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
  {
-  msg (ME, _("Support for Gnumeric files was not compiled into this installation of PSPP"));
+  msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric");
  
    return NULL;
  }
@@ -59,9 +58,6 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
  
  #include "gl/xalloc.h"
  
-/* Default width of string variables. */
-#define GNUMERIC_DEFAULT_WIDTH 8
-
  static void gnm_file_casereader_destroy (struct casereader *, void *);
  
  static struct ccase *gnm_file_casereader_read (struct casereader *, void *);
@@ -74,73 +70,6 @@ static const struct casereader_class gnm_file_casereader_class =
      NULL,
    };
  
-/* Convert a string, which is an integer encoded in base26
-   IE, A=0, B=1, ... Z=25 to the integer it represents.
-   ... except that in this scheme, digits with an exponent
-   greater than 1 are implicitly incremented by 1, so
-   AA  = 0 + 1*26, AB = 1 + 1*26,
-   ABC = 2 + 2*26 + 1*26^2 ....
-*/
-static int
-pseudo_base26 (const char *str)
-{
-  int i;
-  int multiplier = 1;
-  int result = 0;
-  int len = strlen (str);
-
-  for ( i = len - 1 ; i >= 0; --i)
-    {
-      int mantissa = (str[i] - 'A');
-
-      if ( mantissa < 0 || mantissa > 25 )
-       return -1;
-
-      if ( i != len - 1)
-       mantissa++;
-
-      result += mantissa * multiplier;
-
-      multiplier *= 26;
-    }
-
-  return result;
-}
-
-
-
-/* Convert a cell reference in the form "A1:B2", to
-   integers.  A1 means column zero, row zero.
-   B1 means column 1 row 0. AA1 means column 26, row 0.
-*/
-static bool
-convert_cell_ref (const char *ref,
-                 int *col0, int *row0,
-                 int *coli, int *rowi)
-{
-  char startcol[5];
-  char stopcol [5];
-
-  int startrow;
-  int stoprow;
-
-  int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d",
-             startcol, &startrow,
-             stopcol, &stoprow);
-  if ( n != 4)
-    return false;
-
-  str_uppercase (startcol);
-  *col0 = pseudo_base26 (startcol);
-  str_uppercase (stopcol);
-  *coli = pseudo_base26 (stopcol);
-  *row0 = startrow - 1;
-  *rowi = stoprow - 1 ;
-
-  return true;
-}
-
-
  enum reader_state
    {
      STATE_INIT = 0,        /* Initial state */
@@ -180,9 +109,6 @@ struct gnumeric_reader
  
  static void process_node (struct gnumeric_reader *r);
  
-#define _xml(X) (CHAR_CAST (const xmlChar *, X))
-
-#define _xmlchar_to_int(X) (atoi(CHAR_CAST (const char *, X)))
  
  static void
  gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
@@ -345,7 +271,7 @@ struct var_spec
  };
  
  struct casereader *
-gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
+gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
  {
    unsigned long int vstart = 0;
    int ret;
@@ -473,7 +399,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
  
               if (-1 ==  var_spec [idx].width )
                 var_spec [idx].width = (gri->asw == -1) ?
-                 ROUND_UP (strlen(text), GNUMERIC_DEFAULT_WIDTH) : gri->asw;
+                 ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : gri->asw;
             }
  
           free (value);
@@ -506,7 +432,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
        /* Probably no data exists for this variable, so allocate a
          default width */
        if ( var_spec[i].width == -1 )
-       var_spec[i].width = GNUMERIC_DEFAULT_WIDTH;
+       var_spec[i].width = SPREADSHEET_DEFAULT_WIDTH;
  
        name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart);
        dict_create_var (r->dict, name, var_spec[i].width);
diff --git a/src/data/gnumeric-reader.h b/src/data/gnumeric-reader.h

index b313fc78768cf446975156c6d68647f277a72d4b..fcd338567543e3e17d0ad1c591cef59be23ad805 100644 (file)
--- a/src/data/gnumeric-reader.h
+++ b/src/data/gnumeric-reader.h
@@ -20,21 +20,10 @@
  #include <stdbool.h>
  
  struct casereader;
-
-
-struct gnumeric_read_info
-{
-  char *sheet_name ;            /* In UTF-8. */
-  char *file_name ;             /* In filename encoding. */
-  char *cell_range ;            /* In UTF-8. */
-  int sheet_index ;
-  bool read_names ;
-  int asw ;
-};
-
  struct dictionary;
+struct spreadsheet_read_info;
  
-struct casereader * gnumeric_open_reader (struct gnumeric_read_info *, struct dictionary **);
+struct casereader * gnumeric_open_reader (struct spreadsheet_read_info *, struct dictionary **);
  
  
  #endif
diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c

new file mode 100644 (file)

index 0000000..51ee5ac
--- /dev/null
+++ b/src/data/ods-reader.c
@@ -0,0 +1,681 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+
+#include "data/data-in.h"
+
+#include "gl/minmax.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) (msgid)
+
+#include "ods-reader.h"
+#include "spreadsheet-reader.h"
+
+#if !ODF_READ_SUPPORT
+
+struct casereader *
+ods_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
+{
+  msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "OpenDocument");
+
+  return NULL;
+}
+
+#else
+
+#include "libpspp/zip-reader.h"
+
+
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <libxml/xmlreader.h>
+#include <zlib.h>
+
+#include "data/format.h"
+#include "data/case.h"
+#include "data/casereader-provider.h"
+#include "data/dictionary.h"
+#include "data/identifier.h"
+#include "data/value.h"
+#include "data/variable.h"
+#include "libpspp/i18n.h"
+#include "libpspp/str.h"
+
+#include "gl/xalloc.h"
+
+static void ods_file_casereader_destroy (struct casereader *, void *);
+
+static struct ccase *ods_file_casereader_read (struct casereader *, void *);
+
+static const struct casereader_class ods_file_casereader_class =
+  {
+    ods_file_casereader_read,
+    ods_file_casereader_destroy,
+    NULL,
+    NULL,
+  };
+
+enum reader_state
+  {
+    STATE_INIT = 0,        /* Initial state */
+    STATE_SPREADSHEET,     /* Found the start of the spreadsheet doc */
+    STATE_TABLE,           /* Found the sheet that we actually want */
+    STATE_ROW,             /* Found the start of the cell array */
+    STATE_CELL,            /* Found a cell */
+    STATE_CELL_CONTENT     /* Found a the text within a cell */
+  };
+
+struct ods_reader
+{
+  xmlTextReaderPtr xtr;
+
+  enum reader_state state;
+  bool sheet_found;
+  int row;
+  int col;
+  int node_type;
+  int sheet_index;
+
+  const xmlChar *target_sheet;
+  int target_sheet_index;
+
+  int start_row;
+  int start_col;
+  int stop_row;
+  int stop_col;
+
+  struct caseproto *proto;
+  struct dictionary *dict;
+  struct ccase *first_case;
+  bool used_first_case;
+  bool read_names;
+
+  struct string ods_errs;
+  int span;
+};
+
+static void process_node (struct ods_reader *r);
+
+static void
+ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
+{
+  struct ods_reader *r = r_;
+  if ( r == NULL)
+    return ;
+
+  if (r->xtr)
+    xmlFreeTextReader (r->xtr);
+
+  if ( ! ds_is_empty (&r->ods_errs))
+    msg (ME, ds_cstr (&r->ods_errs));
+
+  ds_destroy (&r->ods_errs);
+
+  if ( ! r->used_first_case )
+    case_unref (r->first_case);
+
+  caseproto_unref (r->proto);
+
+  free (r);
+}
+
+static void
+process_node (struct ods_reader *r)
+{
+  xmlChar *name = xmlTextReaderName (r->xtr);
+  if (name == NULL)
+    name = xmlStrdup (_xml ("--"));
+
+  r->node_type = xmlTextReaderNodeType (r->xtr);
+
+  switch ( r->state)
+    {
+    case STATE_INIT:
+      if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
+         XML_READER_TYPE_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_SPREADSHEET;
+       }
+      break;
+    case STATE_SPREADSHEET:
+      if (0 == xmlStrcasecmp (name, _xml("table:table")))
+       {
+         if (XML_READER_TYPE_ELEMENT == r->node_type)
+           {
+             r->col = -1;
+             r->row = -1;
+             ++r->sheet_index;
+             if ( r->target_sheet != NULL)
+               {
+                 xmlChar *value = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name"));
+                 if ( 0 == xmlStrcmp (value, r->target_sheet))
+                   {
+                     r->sheet_found = true;
+                     r->state = STATE_TABLE;
+                   }
+                 free (value);
+               }
+             else if (r->target_sheet_index == r->sheet_index)
+               {
+                 r->sheet_found = true;
+                 r->state = STATE_TABLE;
+               }
+             else if ( r->target_sheet_index == -1)
+               r->state = STATE_TABLE;
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type
+                  && r->sheet_found)
+       {
+         r->state = STATE_INIT;
+       }
+       break;
+    case STATE_TABLE:
+      if (0 == xmlStrcasecmp (name, _xml("table:table-row")) )
+       {
+         if ( XML_READER_TYPE_ELEMENT  == r->node_type)
+           {
+             if (! xmlTextReaderIsEmptyElement (r->xtr))
+               {
+                 r->state = STATE_ROW;
+               }
+             r->row++;
+             r->span = 1;
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_SPREADSHEET;
+       }
+      break;
+    case STATE_ROW:
+      if (0 == xmlStrcasecmp (name, _xml ("table:table-cell")))
+       {
+         if ( XML_READER_TYPE_ELEMENT  == r->node_type)
+           {
+             xmlChar *value =
+               xmlTextReaderGetAttribute (r->xtr,
+                                          _xml ("table:number-columns-repeated"));
+             r->col += r->span;
+             r->span = value ? _xmlchar_to_int (value) : 1;
+             free (value);
+             if (! xmlTextReaderIsEmptyElement (r->xtr))
+               {
+                 r->state = STATE_CELL;
+               }
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_TABLE;
+         r->col = -1;
+         /* Set the span back to the default */
+         r->span = 1;
+       }
+      break;
+    case STATE_CELL:
+      if (0 == xmlStrcasecmp (name, _xml("text:p")))
+       {
+         if ( XML_READER_TYPE_ELEMENT  == r->node_type)
+           {
+             r->state = STATE_CELL_CONTENT;
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_ROW;
+       }
+      break;
+    case STATE_CELL_CONTENT:
+      if (XML_READER_TYPE_TEXT != r->node_type)
+       r->state = STATE_CELL;
+      break;
+    default:
+      break;
+    };
+
+  xmlFree (name);
+}
+
+/* 
+   A struct containing the parameters of a cell's value 
+   parsed from the xml
+*/
+struct xml_value
+{
+  xmlChar *type;
+  xmlChar *value;
+  xmlChar *text;
+};
+
+struct var_spec
+{
+  char *name;
+  struct xml_value firstval;
+};
+
+
+/* Determine the width that a xmv should probably have */
+static int
+xmv_to_width (const struct xml_value *xmv, int fallback)
+{
+  int width = SPREADSHEET_DEFAULT_WIDTH;
+
+  /* Non-strings always have zero width */
+  if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string")))
+    return 0;
+
+  if ( fallback != -1)
+    return fallback;
+
+  if ( xmv->value )
+    width = ROUND_UP (xmlStrlen (xmv->value),
+                     SPREADSHEET_DEFAULT_WIDTH);
+  else if ( xmv->text)
+    width = ROUND_UP (xmlStrlen (xmv->text),
+                     SPREADSHEET_DEFAULT_WIDTH);
+
+  return width;
+}
+
+/*
+   Sets the VAR of case C, to the value corresponding to the xml data
+ */
+static void
+convert_xml_to_value (struct ccase *c, const struct variable *var,
+                     const struct xml_value *xmv)
+{
+  union value *v = case_data_rw (c, var);
+
+  if (xmv->value == NULL && xmv->text == NULL)
+    value_set_missing (v, var_get_width (var));
+  else if ( var_is_alpha (var))
+    /* Use the text field, because it seems that there is no
+       value field for strings */
+    value_copy_str_rpad (v, var_get_width (var), xmv->text, ' ');
+  else
+    {
+      const struct fmt_spec *fmt = var_get_write_format (var);
+      enum fmt_category fc  = fmt_get_category (fmt->type);
+
+      assert ( fc != FMT_CAT_STRING);
+
+      const char *text = xmv->value ? CHAR_CAST (const char *, xmv->value):
+       CHAR_CAST (const char *, xmv->text);
+
+      data_in (ss_cstr (text), "UTF-8",
+              fmt->type,
+              v,
+              var_get_width (var),
+              "UTF-8");
+    }
+}
+
+
+struct casereader *
+ods_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
+{
+  int ret = 0;
+  xmlChar *type = NULL;
+  unsigned long int vstart = 0;
+  casenumber n_cases = CASENUMBER_MAX;
+  int i;
+  struct var_spec *var_spec = NULL;
+  int n_var_specs = 0;
+
+  struct ods_reader *r = xzalloc (sizeof *r);
+
+  r->read_names = gri->read_names;
+  ds_init_empty (&r->ods_errs);
+
+  struct zip_reader *zreader = zip_reader_create (gri->file_name, &r->ods_errs);
+  struct zip_member *content = NULL;
+
+  if ( NULL == zreader)
+    {
+      msg (ME, _("Error opening `%s' for reading as a OpenDocument spreadsheet file: %s."),
+           gri->file_name, ds_cstr (&r->ods_errs));
+
+      goto error;
+    }
+
+  content = zip_member_open (zreader, "content.xml");
+  if ( NULL == content)
+    {
+      msg (ME, _("Could not extract OpenDocument spreadsheet from file `%s': %s."),
+           gri->file_name, ds_cstr (&r->ods_errs));
+
+      goto error;
+    }
+
+  zip_member_ref (content);
+
+  r->xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read,
+                          (xmlInputCloseCallback) zip_member_finish,
+                          content,   NULL, NULL, XML_PARSE_RECOVER);
+
+  if ( r->xtr == NULL)
+    {
+      goto error;
+    }
+
+  if ( gri->cell_range )
+    {
+      if ( ! convert_cell_ref (gri->cell_range,
+                              &r->start_col, &r->start_row,
+                              &r->stop_col, &r->stop_row))
+       {
+         msg (SE, _("Invalid cell range `%s'"),
+              gri->cell_range);
+         goto error;
+       }
+    }
+  else
+    {
+      r->start_col = 0;
+      r->start_row = 0;
+      r->stop_col = -1;
+      r->stop_row = -1;
+    }
+
+  r->state = STATE_INIT;
+  r->target_sheet = BAD_CAST gri->sheet_name;
+  r->target_sheet_index = gri->sheet_index;
+  r->row = r->col = -1;
+  r->sheet_index = 0;
+
+
+  /* If CELLRANGE was given, then we know how many variables should be read */
+  if ( r->stop_col != -1 )
+    {
+      n_var_specs =  r->stop_col - r->start_col + 1;
+      var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs);
+    }
+
+
+  /* Advance to the start of the cells for the target sheet */
+  while ( (r->row < r->start_row ))
+    {
+      if (1 != (ret = xmlTextReaderRead (r->xtr)))
+          break;
+
+      process_node (r);
+    }
+
+  if (ret < 1)
+    {
+      msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
+           gri->file_name);
+      goto error;
+    }
+
+  if ( gri->read_names)
+    {
+      while (1 == (ret = xmlTextReaderRead (r->xtr)))
+       {
+         int idx;
+         process_node (r);
+         if ( r->row > r->start_row)
+           break;
+
+         if (r->col == -1 && r->row == r->start_row)
+           break;
+
+         if ( r->col < r->start_col)
+           continue;
+
+         idx = r->col - r->start_col;
+
+         if (r->state == STATE_CELL_CONTENT 
+             &&
+             XML_READER_TYPE_TEXT  == r->node_type)
+           {
+             xmlChar *value = xmlTextReaderValue (r->xtr);
+             if ( idx >= n_var_specs)
+               {
+
+                 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
+
+                 /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */
+                 memset (var_spec + n_var_specs * sizeof (*var_spec),
+                         0, 
+                         (n_var_specs - idx + 1) * sizeof (*var_spec));
+                 n_var_specs = idx + 1;
+               }
+             var_spec[idx].firstval.text = 0;
+             var_spec[idx].firstval.value = 0;
+             var_spec[idx].firstval.type = 0;
+
+             var_spec [idx].name = strdup (CHAR_CAST (const char *, value));
+             free (value);
+             value = NULL;
+           }
+       }
+    }
+
+  xmlChar *val_string = NULL;
+  /* Read in the first row of data */
+  while (1 == xmlTextReaderRead (r->xtr))
+    {
+      int idx;
+      process_node (r);
+      if ( r->row >= r->start_row + 1 + gri->read_names)
+       break;
+
+      if ( r->col < r->start_col)
+       continue;
+
+      if ( r->col - r->start_col + 1 > n_var_specs)
+       continue;
+
+      idx = r->col - r->start_col;
+
+      if ( r->state == STATE_CELL &&
+          XML_READER_TYPE_ELEMENT  == r->node_type)
+       {
+         type = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value-type"));
+         val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value"));
+       }
+
+      if ( r->state == STATE_CELL_CONTENT &&
+          XML_READER_TYPE_TEXT  == r->node_type)
+       {
+         var_spec [idx].firstval.type = type;
+         var_spec [idx].firstval.text = xmlTextReaderValue (r->xtr);
+         var_spec [idx].firstval.value = val_string;
+         val_string = NULL;
+         type = NULL;
+       }
+    }
+
+  /* Create the dictionary and populate it */
+  *dict = r->dict = dict_create (
+    CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr)));
+
+  for (i = 0 ; i < n_var_specs ; ++i )
+    {
+      struct fmt_spec fmt;
+      struct variable *var = NULL;
+      char *name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart);
+      int width  = xmv_to_width (&var_spec[i].firstval, gri->asw);
+      dict_create_var (r->dict, name, width);
+      free (name);
+
+      var = dict_get_var (r->dict, i);
+
+      if ( 0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
+       {
+         fmt.type = FMT_DATE;
+         fmt.d = 0;
+         fmt.w = 20;
+       }
+      else
+       fmt = fmt_default_for_width (width);
+
+      var_set_both_formats (var, &fmt);
+    }
+
+  /* Create the first case, and cache it */
+  r->used_first_case = false;
+
+  if ( n_var_specs ==  0 )
+    {
+      msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
+           gri->file_name);
+      goto error;
+    }
+
+  r->proto = caseproto_ref (dict_get_proto (r->dict));
+  r->first_case = case_create (r->proto);
+  case_set_missing (r->first_case);
+
+  for ( i = 0 ; i < n_var_specs ; ++i )
+    {
+      const struct variable *var = dict_get_var (r->dict, i);
+
+      convert_xml_to_value (r->first_case, var,  &var_spec[i].firstval);
+    }
+
+  zip_reader_destroy (zreader);
+
+  for ( i = 0 ; i < n_var_specs ; ++i )
+    {
+      free (var_spec[i].firstval.type);
+      free (var_spec[i].firstval.value);
+      free (var_spec[i].firstval.text);
+      free (var_spec[i].name);
+    }
+
+  free (var_spec);
+
+  return casereader_create_sequential
+    (NULL,
+     r->proto,
+     n_cases,
+     &ods_file_casereader_class, r);
+
+ error:
+  
+  zip_reader_destroy (zreader);
+
+  for ( i = 0 ; i < n_var_specs ; ++i )
+    {
+      free (var_spec[i].firstval.type);
+      free (var_spec[i].firstval.value);
+      free (var_spec[i].firstval.text);
+      free (var_spec[i].name);
+    }
+
+  free (var_spec);
+
+  return NULL;
+}
+
+
+/* Reads and returns one case from READER's file.  Returns a null
+   pointer on failure. */
+static struct ccase *
+ods_file_casereader_read (struct casereader *reader UNUSED, void *r_)
+{
+  struct ccase *c = NULL;
+  xmlChar *val_string = NULL;
+  struct ods_reader *r = r_;
+  int current_row = r->row;
+
+  if ( r->row == -1)
+    return NULL;
+
+  if ( !r->used_first_case )
+    {
+      r->used_first_case = true;
+      return r->first_case;
+    }
+
+
+  if ( r->state > STATE_INIT)
+    {
+      c = case_create (r->proto);
+      case_set_missing (c);
+    }
+
+  while (1 == xmlTextReaderRead (r->xtr))
+    {
+      process_node (r);
+      if ( r->row > current_row)
+       {
+         break;
+       }
+      if ( r->col < r->start_col || (r->stop_col != -1 && r->col > r->stop_col))
+       {
+         continue;
+       }
+      if ( r->col - r->start_col >= caseproto_get_n_widths (r->proto))
+       {
+         continue;
+       }
+      if ( r->stop_row != -1 && r->row > r->stop_row)
+       {
+         continue;
+       }
+      if ( r->state == STATE_CELL &&
+          r->node_type == XML_READER_TYPE_ELEMENT )
+       {
+         val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value"));
+       }
+
+      if ( r->state == STATE_CELL_CONTENT && r->node_type == XML_READER_TYPE_TEXT )
+       {
+         int col;
+         struct xml_value *xmv = xzalloc (sizeof *xmv);
+         xmv->text = xmlTextReaderValue (r->xtr);
+         xmv->value = val_string;
+         val_string = NULL;
+
+         for (col = 0; col < r->span ; ++col)
+           {
+             const int idx = r->col + col - r->start_col;
+
+             const struct variable *var = dict_get_var (r->dict, idx);
+
+             convert_xml_to_value (c, var, xmv);
+           }
+         free (xmv->text);
+         free (xmv->value);
+         free (xmv);
+       }
+
+      if ( r->state < STATE_TABLE)
+       break;
+    }
+
+  if (NULL == c || (r->stop_row != -1 && r->row > r->stop_row + 1))
+    {
+      case_unref (c);
+      return NULL;
+    }
+  else
+    {
+      return c;
+    }
+}
+#endif
diff --git a/src/data/ods-reader.h b/src/data/ods-reader.h

new file mode 100644 (file)

index 0000000..79b7169
--- /dev/null
+++ b/src/data/ods-reader.h
@@ -0,0 +1,27 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef ODS_READ_H
+#define ODS_READ_H 1
+
+struct casereader;
+struct dictionary;
+struct spreadsheet_read_info;
+
+struct casereader * ods_open_reader (struct spreadsheet_read_info *, struct dictionary **);
+
+
+#endif
diff --git a/src/data/spreadsheet-reader.c b/src/data/spreadsheet-reader.c

new file mode 100644 (file)

index 0000000..11e8cf5
--- /dev/null
+++ b/src/data/spreadsheet-reader.c
@@ -0,0 +1,89 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "spreadsheet-reader.h"
+
+#include <libpspp/str.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Convert a string, which is an integer encoded in base26
+   IE, A=0, B=1, ... Z=25 to the integer it represents.
+   ... except that in this scheme, digits with an exponent
+   greater than 1 are implicitly incremented by 1, so
+   AA  = 0 + 1*26, AB = 1 + 1*26,
+   ABC = 2 + 2*26 + 1*26^2 ....
+*/
+int
+pseudo_base26 (const char *str)
+{
+  int i;
+  int multiplier = 1;
+  int result = 0;
+  int len = strlen (str);
+
+  for ( i = len - 1 ; i >= 0; --i)
+    {
+      int mantissa = (str[i] - 'A');
+
+      if ( mantissa < 0 || mantissa > 25 )
+       return -1;
+
+      if ( i != len - 1)
+       mantissa++;
+
+      result += mantissa * multiplier;
+
+      multiplier *= 26;
+    }
+
+  return result;
+}
+
+
+/* Convert a cell reference in the form "A1:B2", to
+   integers.  A1 means column zero, row zero.
+   B1 means column 1 row 0. AA1 means column 26, row 0.
+*/
+bool
+convert_cell_ref (const char *ref,
+                 int *col0, int *row0,
+                 int *coli, int *rowi)
+{
+  char startcol[5];
+  char stopcol [5];
+
+  int startrow;
+  int stoprow;
+
+  int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d",
+             startcol, &startrow,
+             stopcol, &stoprow);
+  if ( n != 4)
+    return false;
+
+  str_uppercase (startcol);
+  *col0 = pseudo_base26 (startcol);
+  str_uppercase (stopcol);
+  *coli = pseudo_base26 (stopcol);
+  *row0 = startrow - 1;
+  *rowi = stoprow - 1 ;
+
+  return true;
+}
+
diff --git a/src/data/spreadsheet-reader.h b/src/data/spreadsheet-reader.h

new file mode 100644 (file)

index 0000000..6edd705
--- /dev/null
+++ b/src/data/spreadsheet-reader.h
@@ -0,0 +1,47 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SPREADSHEET_READ_H
+#define SPREADSHEET_READ_H 1
+
+#include <stdbool.h>
+
+/* Default width of string variables. */
+#define SPREADSHEET_DEFAULT_WIDTH 8
+
+struct spreadsheet_read_info
+{
+  char *sheet_name ;            /* In UTF-8. */
+  char *file_name ;             /* In filename encoding. */
+  char *cell_range ;            /* In UTF-8. */
+  int sheet_index ;
+  bool read_names ;
+  int asw ;
+};
+
+int pseudo_base26 (const char *str);
+
+bool convert_cell_ref (const char *ref,
+                      int *col0, int *row0,
+                      int *coli, int *rowi);
+
+
+#define _xml(X) (CHAR_CAST (const xmlChar *, X))
+
+#define _xmlchar_to_int(X) (atoi(CHAR_CAST (const char *, X)))
+
+
+#endif
diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c

index 47b65b445adfc44ffa15e9b72c9738703fc687a0..9b878c553a92bf1a16e37f5985506e025db924c6 100644 (file)
--- a/src/language/data-io/get-data.c
+++ b/src/language/data-io/get-data.c
@@ -18,10 +18,14 @@
  
  #include <stdlib.h>
  
+#include <string.h>
+
  #include "data/dataset.h"
  #include "data/dictionary.h"
  #include "data/format.h"
  #include "data/gnumeric-reader.h"
+#include "data/ods-reader.h"
+#include "data/spreadsheet-reader.h"
  #include "data/psql-reader.h"
  #include "data/settings.h"
  #include "language/command.h"
@@ -40,13 +44,16 @@
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
-static int parse_get_gnm (struct lexer *lexer, struct dataset *);
+static struct spreadsheet_read_info *parse_spreadsheet (struct lexer *lexer);
+static void destroy_spreadsheet_read_info (struct spreadsheet_read_info *);
+
  static int parse_get_txt (struct lexer *lexer, struct dataset *);
  static int parse_get_psql (struct lexer *lexer, struct dataset *);
  
  int
  cmd_get_data (struct lexer *lexer, struct dataset *ds)
  {
+  char *tok = NULL;
    lex_force_match (lexer, T_SLASH);
  
    if (!lex_force_match_id (lexer, "TYPE"))
@@ -54,14 +61,44 @@ cmd_get_data (struct lexer *lexer, struct dataset *ds)
  
    lex_force_match (lexer, T_EQUALS);
  
-  if (lex_match_id (lexer, "GNM"))
-    return parse_get_gnm (lexer, ds);
-  else if (lex_match_id (lexer, "TXT"))
-    return parse_get_txt (lexer, ds);
+  tok = strdup (lex_tokcstr (lexer));
+  if (lex_match_id (lexer, "TXT"))
+    {
+      return parse_get_txt (lexer, ds);
+    }
    else if (lex_match_id (lexer, "PSQL"))
-    return parse_get_psql (lexer, ds);
+    {
+      return parse_get_psql (lexer, ds);
+    }
+  else if (lex_match_id (lexer, "GNM") || 
+      lex_match_id (lexer, "ODS"))
+    {
+      struct casereader *reader = NULL;
+      struct dictionary *dict = NULL;
+      struct spreadsheet_read_info *sri = parse_spreadsheet (lexer);
+      if (NULL == sri)
+       goto error;
+
+      if ( 0 == strncasecmp (tok, "GNM", 3))
+       reader = gnumeric_open_reader (sri, &dict);
+      else if (0 == strncasecmp (tok, "ODS", 3))
+       reader = ods_open_reader (sri, &dict);
+
+      if (reader)
+       {
+         dataset_set_dict (ds, dict);
+         dataset_set_source (ds, reader);
+         destroy_spreadsheet_read_info (sri);
+         free (tok);
+         return CMD_SUCCESS;
+       }
+      destroy_spreadsheet_read_info (sri);
+    }
+  else
+    msg (SE, _("Unsupported TYPE %s."), tok);
  
-  msg (SE, _("Unsupported TYPE %s."), lex_tokcstr (lexer));
+ error:
+  free (tok);
    return CMD_FAILURE;
  }
  
@@ -141,10 +178,13 @@ parse_get_psql (struct lexer *lexer, struct dataset *ds)
    return CMD_FAILURE;
  }
  
-static int
-parse_get_gnm (struct lexer *lexer, struct dataset *ds)
+static struct spreadsheet_read_info *
+parse_spreadsheet (struct lexer *lexer)
  {
-  struct gnumeric_read_info gri  = {NULL, NULL, NULL, 1, true, -1};
+  struct spreadsheet_read_info *sri = xzalloc (sizeof *sri);
+  sri->sheet_index = 1;
+  sri->read_names = true;
+  sri->asw = -1;
  
    lex_force_match (lexer, T_SLASH);
  
@@ -156,7 +196,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
    if (!lex_force_string (lexer))
      goto error;
  
-  gri.file_name = utf8_to_filename (lex_tokcstr (lexer));
+  sri->file_name = utf8_to_filename (lex_tokcstr (lexer));
  
    lex_get (lexer);
  
@@ -165,7 +205,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
        if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH"))
         {
           lex_match (lexer, T_EQUALS);
-         gri.asw = lex_integer (lexer);
+         sri->asw = lex_integer (lexer);
           lex_get (lexer);
         }
        else if (lex_match_id (lexer, "SHEET"))
@@ -176,14 +216,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
               if ( ! lex_force_string (lexer) )
                 goto error;
  
-             gri.sheet_name = ss_xstrdup (lex_tokss (lexer));
-             gri.sheet_index = -1;
+             sri->sheet_name = ss_xstrdup (lex_tokss (lexer));
+             sri->sheet_index = -1;
  
               lex_get (lexer);
             }
           else if (lex_match_id (lexer, "INDEX"))
             {
-             gri.sheet_index = lex_integer (lexer);
+             sri->sheet_index = lex_integer (lexer);
               lex_get (lexer);
             }
           else
@@ -195,14 +235,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
  
           if (lex_match_id (lexer, "FULL"))
             {
-             gri.cell_range = NULL;
+             sri->cell_range = NULL;
             }
           else if (lex_match_id (lexer, "RANGE"))
             {
               if ( ! lex_force_string (lexer) )
                 goto error;
  
-             gri.cell_range = ss_xstrdup (lex_tokss (lexer));
+             sri->cell_range = ss_xstrdup (lex_tokss (lexer));
               lex_get (lexer);
             }
           else
@@ -214,11 +254,11 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
  
           if ( lex_match_id (lexer, "ON"))
             {
-             gri.read_names = true;
+             sri->read_names = true;
             }
           else if (lex_match_id (lexer, "OFF"))
             {
-             gri.read_names = false;
+             sri->read_names = false;
             }
           else
             goto error;
@@ -230,30 +270,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
         }
      }
  
-  {
-    struct dictionary *dict = NULL;
-    struct casereader *reader = gnumeric_open_reader (&gri, &dict);
-
-    if ( reader )
-      {
-        dataset_set_dict (ds, dict);
-        dataset_set_source (ds, reader);
-      }
-  }
-
-  free (gri.file_name);
-  free (gri.sheet_name);
-  free (gri.cell_range);
-  return CMD_SUCCESS;
+  return sri;
  
   error:
-
-  free (gri.file_name);
-  free (gri.sheet_name);
-  free (gri.cell_range);
-  return CMD_FAILURE;
+  destroy_spreadsheet_read_info (sri);
+  return NULL;
  }
  
+
  static bool
  set_type (struct data_parser *parser, const char *subcommand,
            enum data_parser_type type, bool *has_type)
@@ -596,3 +620,16 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds)
    free (name);
    return CMD_CASCADING_FAILURE;
  }
+
+
+static void 
+destroy_spreadsheet_read_info (struct spreadsheet_read_info *sri)
+{
+  if ( NULL == sri)
+    return;
+
+  free (sri->sheet_name);
+  free (sri->cell_range);
+  free (sri->file_name);
+  free (sri);
+}
diff --git a/src/output/automake.mk b/src/output/automake.mk

index 6f9b149e82a8d7fe4ec83b208dc9ac68d1d51df0..78173e3ef2a45da602c8c39bc4af0a9638622e54 100644 (file)
--- a/src/output/automake.mk
+++ b/src/output/automake.mk
@@ -68,7 +68,7 @@ src_output_liboutput_la_SOURCES += \
         src/output/charts/roc-chart-cairo.c \
         src/output/charts/scree-cairo.c
  endif
-if ODT_SUPPORT
+if ODF_WRITE_SUPPORT
  src_output_liboutput_la_SOURCES += src/output/odt.c
  endif
  
diff --git a/tests/atlocal.in b/tests/atlocal.in

index 9b54a705b4755cbda91b9f46ddce34b1e589a62b..cfe019a238d4e2d7ee400b28c9d3ef7a0b198a43 100644 (file)
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -3,6 +3,7 @@
  # Variables used internally by the testsuite.
  EXEEXT='@EXEEXT@'
  GNM_SUPPORT='@GNM_SUPPORT@'
+ODF_READ_SUPPORT='@ODF_READ_SUPPORT@'
  PERL='@PERL@'
  WITH_PERL_MODULE='@WITH_PERL_MODULE@'
  host='@host@'
diff --git a/tests/automake.mk b/tests/automake.mk

index 7d4afef7071fa4fb6778292ad3e3b3f6fb10d3ea..840f56b04764429d467abef722d26100e7808e7b 100644 (file)
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -244,7 +244,8 @@ EXTRA_DIST += \
         tests/data/num-out.expected.cmp.gz \
         tests/data/v13.sav \
         tests/data/v14.sav \
-        tests/language/data-io/Book1.gnm.unzipped
+        tests/language/data-io/Book1.gnm.unzipped \
+        tests/language/data-io/test.ods
  
  CLEANFILES += *.save pspp.* foo*
  
@@ -279,7 +280,7 @@ TESTSUITE_AT = \
         tests/language/data-io/data-reader.at \
         tests/language/data-io/dataset.at \
         tests/language/data-io/file-handle.at \
-       tests/language/data-io/get-data-gnm.at \
+       tests/language/data-io/get-data-spreadsheet.at \
         tests/language/data-io/get-data-psql.at \
         tests/language/data-io/get-data-txt.at \
         tests/language/data-io/get.at \
diff --git a/tests/language/data-io/get-data-gnm.at b/tests/language/data-io/get-data-gnm.at

deleted file mode 100644 (file)

index 92815dc..0000000
--- a/tests/language/data-io/get-data-gnm.at
+++ /dev/null
@@ -1,162 +0,0 @@
-AT_BANNER([GET DATA /TYPE=GNM])
-
-AT_SETUP([GET DATA /TYPE=GNM with CELLRANGE])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric'  /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' .
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-VAR001,Format: F8.2,,1
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-VAR002,Format: A8,,2
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-VAR003,Format: F8.2,,3
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-VAR001,VAR002,VAR003
-.00,fred    ,20.00
-1.00,11      ,21.00
-2.00,twelve  ,22.00
-3.00,13      ,23.00
-4.00,14      ,24.00
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with CELLRANGE and READNAMES])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric'  /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' .
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-V1,Format: F8.2,,1
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-V2,Format: A8,,2
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-VAR001,Format: F8.2,,3
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-V1,V2,VAR001
-.00,fred    ,20.00
-1.00,11      ,21.00
-2.00,twelve  ,22.00
-3.00,13      ,23.00
-4.00,14      ,24.00
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM without CELLRANGE])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=index 3.
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-name,Format: A8,,1
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-id,Format: F8.2,,2
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-height,Format: F8.2,,3
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-name,id,height
-fred    ,.00,23.40
-bert    ,1.00,.56
-charlie ,2.00,.  @&t@
-dick    ,3.00,-34.09
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with missing data])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-* This sheet has no data in one of its variables
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /READNAMES=on /SHEET=index 5.
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-vone,Format: F8.2,,1
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-vtwo,Format: F8.2,,2
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-vthree,Format: A8,,3
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-v4,Format: F8.2,,4
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-vone,vtwo,vthree,v4
-1.00,3.00,,5.00
-2.00,4.00,,6.00
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with empty sheet])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-* This sheet is empty
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=name 'Empty'.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
-warning: Selected sheet or range of spreadsheet `Book1.gnumeric' is empty.
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with nonexistent sheet])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-* This sheet doesnt exist.
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=name 'foobarxx'.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
-warning: Selected sheet or range of spreadsheet `Book1.gnumeric' is empty.
-])
-AT_CLEANUP
diff --git a/tests/language/data-io/get-data-spreadsheet.at b/tests/language/data-io/get-data-spreadsheet.at

new file mode 100644 (file)

index 0000000..19b8964
--- /dev/null
+++ b/tests/language/data-io/get-data-spreadsheet.at
@@ -0,0 +1,181 @@
+
+m4_define([SPREADSHEET_TEST_PREP],[dnl
+ m4_if($1,[GNM],[dnl
+    AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])dnl
+    m4_define([testsheet],[Book1.gnumeric])dnl
+    AT_SKIP_IF([test n$GNM_SUPPORT != nyes])dnl
+    ]) dnl
+ m4_if($1,[ODS],[dnl
+    AT_CHECK([cp $top_srcdir/tests/language/data-io/test.ods test.ods])dnl
+    m4_define([testsheet],[test.ods])dnl
+    AT_SKIP_IF([test n$ODF_READ_SUPPORT != nyes])dnl
+    ])dnl
+])
+
+m4_define([CHECK_SPREADSHEET_READER],
+ [dnl
+AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+GET DATA /TYPE=$1 /FILE='testsheet'  /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' .
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+VAR001,Format: F8.2,,1
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+VAR002,Format: A8,,2
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+VAR003,Format: F8.2,,3
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+VAR001,VAR002,VAR003
+.00,fred    ,20.00
+1.00,11      ,21.00
+2.00,twelve  ,22.00
+3.00,13      ,23.00
+4.00,14      ,24.00
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE and READNAMES])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+GET DATA /TYPE=$1 /FILE='testsheet'  /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' .
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+V1,Format: F8.2,,1
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+V2,Format: A8,,2
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+VAR001,Format: F8.2,,3
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+V1,V2,VAR001
+.00,fred    ,20.00
+1.00,11      ,21.00
+2.00,twelve  ,22.00
+3.00,13      ,23.00
+4.00,14      ,24.00
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 without CELLRANGE])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=index 3.
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+name,Format: A8,,1
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+id,Format: F8.2,,2
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+height,Format: F8.2,,3
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+name,id,height
+fred    ,.00,23.40
+bert    ,1.00,.56
+charlie ,2.00,.  @&t@
+dick    ,3.00,-34.09
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with missing data])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+* This sheet has no data in one of its variables
+GET DATA /TYPE=$1 /FILE='testsheet' /READNAMES=on /SHEET=index 5.
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+vone,Format: F8.2,,1
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+vtwo,Format: F8.2,,2
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+vthree,Format: A8,,3
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+v4,Format: F8.2,,4
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+vone,vtwo,vthree,v4
+1.00,3.00,,5.00
+2.00,4.00,,6.00
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with empty sheet])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+* This sheet is empty
+GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'Empty'.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
+warning: Selected sheet or range of spreadsheet `testsheet' is empty.
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with nonexistent sheet])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+* This sheet doesnt exist.
+GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'foobarxx'.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
+warning: Selected sheet or range of spreadsheet `testsheet' is empty.
+])
+AT_CLEANUP
+])
+
+
+AT_BANNER([GET DATA Spreadsheet /TYPE=GNM])
+
+CHECK_SPREADSHEET_READER([GNM])
+
+AT_BANNER([GET DATA Spreadsheet /TYPE=ODS])
+
+CHECK_SPREADSHEET_READER([ODS])
+
diff --git a/tests/language/data-io/test.ods b/tests/language/data-io/test.ods

new file mode 100644 (file)

index 0000000..c079454

Binary files /dev/null and b/tests/language/data-io/test.ods differ
author	John Darrington <john@darrington.wattle.id.au>
	Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
committer	John Darrington <john@darrington.wattle.id.au>
	Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
configure.ac		patch \| blob \| history
doc/files.texi		patch \| blob \| history
src/data/automake.mk		patch \| blob \| history
src/data/gnumeric-reader.c		patch \| blob \| history
src/data/gnumeric-reader.h		patch \| blob \| history
src/data/ods-reader.c	[new file with mode: 0644]	patch \| blob
src/data/ods-reader.h	[new file with mode: 0644]	patch \| blob
src/data/spreadsheet-reader.c	[new file with mode: 0644]	patch \| blob
src/data/spreadsheet-reader.h	[new file with mode: 0644]	patch \| blob
src/language/data-io/get-data.c		patch \| blob \| history
src/output/automake.mk		patch \| blob \| history
tests/atlocal.in		patch \| blob \| history
tests/automake.mk		patch \| blob \| history
tests/language/data-io/get-data-gnm.at	[deleted file]	patch \| blob \| history
tests/language/data-io/get-data-spreadsheet.at	[new file with mode: 0644]	patch \| blob
tests/language/data-io/test.ods	[new file with mode: 0644]	patch \| blob