Added support to read OpenDocument spreadsheet files

author John Darrington <john@darrington.wattle.id.au>

Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)

committer John Darrington <john@darrington.wattle.id.au>

Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
author John Darrington <john@darrington.wattle.id.au>
Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
committer John Darrington <john@darrington.wattle.id.au>
Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
diff --git a/configure.ac b/configure.ac

index eb10e560a2d2a4362a6f17ab269855b9ade0ecee..719f4c7411d68517adaeb31186257da92bceffb5 100644 (file)
--- a/configure.ac
+++ b/configure.ac
@@ -196,24 +196,30 @@ AC_DEFINE(
    [crc32], [gl_crc32],
    [Avoid making zlib call gnulib's crc32() instead of its own.])
  
    [crc32], [gl_crc32],
    [Avoid making zlib call gnulib's crc32() instead of its own.])
  
-dnl Gnumeric support requires libxml2 and zlib.
+dnl Gnumeric and OpenDocument (read) support requires libxml2 and zlib.
  if test $HAVE_LIBXML2 = yes && test $HAVE_ZLIB = yes; then
    GNM_SUPPORT=yes
  if test $HAVE_LIBXML2 = yes && test $HAVE_ZLIB = yes; then
    GNM_SUPPORT=yes
+  ODF_READ_SUPPORT=yes
    AC_DEFINE(
      [GNM_SUPPORT], [1],
      [Define to 1 if building in support for reading Gnumeric files.])
    AC_DEFINE(
      [GNM_SUPPORT], [1],
      [Define to 1 if building in support for reading Gnumeric files.])
+  AC_DEFINE(
+    [ODF_READ_SUPPORT], [1],
+    [Define to 1 if building in support for reading OpenDocument files.])
  else
    GNM_SUPPORT=no
  else
    GNM_SUPPORT=no
+  ODF_READ_SUPPORT=no
  fi
  AC_SUBST([GNM_SUPPORT])
  fi
  AC_SUBST([GNM_SUPPORT])
+AC_SUBST([ODF_READ_SUPPORT])
  
  
-dnl ODT support requires libxml2.
+dnl ODF support requires libxml2 (zlib is optional).
  if test $HAVE_LIBXML2 = yes; then
    AC_DEFINE(
  if test $HAVE_LIBXML2 = yes; then
    AC_DEFINE(
-    [ODT_SUPPORT], [1],
-    [Define to 1 if building in support for writing ODT files.])
+    [ODF_WRITE_SUPPORT], [1],
+    [Define to 1 if building in support for writing OpenDocument files.])
  fi
  fi
-AM_CONDITIONAL([ODT_SUPPORT], [test $HAVE_LIBXML2 = yes])
+AM_CONDITIONAL([ODF_WRITE_SUPPORT], [test $HAVE_LIBXML2 = yes])
  
  AC_ARG_WITH(
    gui_tools,
  
  AC_ARG_WITH(
    gui_tools,
diff --git a/doc/files.texi b/doc/files.texi

index 89d043f8769f67c6c898dcf9a7729d0fbcd3356d..cdce0a3c4689875560ef57dad998253c5f51b5e2 100644 (file)
--- a/doc/files.texi
+++ b/doc/files.texi
@@ -182,7 +182,7 @@ Use of @cmd{GET} to read a portable file is a PSPP extension.
  
  @display
  GET DATA
  
  @display
  GET DATA
-        /TYPE=@{GNM,PSQL,TXT@}
+        /TYPE=@{GNM,ODS,PSQL,TXT@}
          @dots{}additional subcommands depending on TYPE@dots{}
  @end display
  
          @dots{}additional subcommands depending on TYPE@dots{}
  @end display
  
@@ -199,6 +199,9 @@ PSPP currently supports the following file types:
  @item GNM
  Spreadsheet files created by Gnumeric (@url{http://gnumeric.org}).
  
  @item GNM
  Spreadsheet files created by Gnumeric (@url{http://gnumeric.org}).
  
+@item ODS
+Spreadsheet files in OpenDocument format.
+
  @item PSQL
  Relations from PostgreSQL databases (@url{http://postgresql.org}).
  
  @item PSQL
  Relations from PostgreSQL databases (@url{http://postgresql.org}).
  
@@ -210,16 +213,16 @@ Each supported file type has additional subcommands, explained in
  separate sections below.
  
  @menu
  separate sections below.
  
  @menu
-* GET DATA /TYPE=GNM::
-* GET DATA /TYPE=PSQL::
-* GET DATA /TYPE=TXT::
+* GET DATA /TYPE=GNM/ODS::     Spreadsheets
+* GET DATA /TYPE=PSQL::        Databases
+* GET DATA /TYPE=TXT::         Delimited Text Files
  @end menu
  
  @end menu
  
-@node GET DATA /TYPE=GNM
-@subsection Gnumeric Spreadsheet Files
+@node GET DATA /TYPE=GNM/ODS
+@subsection Spreadsheet Files
  
  @display
  
  @display
-GET DATA /TYPE=GNM
+GET DATA /TYPE=@{GNM, ODS@}
          /FILE=@{'file-name'@}
          /SHEET=@{NAME 'sheet-name', INDEX n@}
          /CELLRANGE=@{RANGE 'range', FULL@}
          /FILE=@{'file-name'@}
          /SHEET=@{NAME 'sheet-name', INDEX n@}
          /CELLRANGE=@{RANGE 'range', FULL@}
@@ -228,11 +231,19 @@ GET DATA /TYPE=GNM
  @end display
  
  @cindex Gnumeric
  @end display
  
  @cindex Gnumeric
+@cindex OpenDocument
  @cindex spreadsheet files
  @cindex spreadsheet files
-To use GET DATA to read a spreadsheet file created by Gnumeric
-(@url{http://gnumeric.org}), specify TYPE=GNM to indicate the file's
-format and use FILE to indicate the Gnumeric file to be read.  All
-other subcommands are optional.
+
+Gnumeric spreadsheets (@url{http://gnumeric.org}), and spreadsheets
+in OpenDocument format
+(@url{http://libreplanet.org/wiki/Group:OpenDocument/Software})
+can be read using the GET DATA command.
+Use the TYPE subcommand to indicate the file's format.  
+/TYPE=GNM indicates Gnumeric files,
+/TYPE=ODS indicates OpenDocument.
+The FILE subcommand is mandatory.
+Use it to specify the name file to be read. 
+All other subcommands are optional.
  
  The format of each variable is determined by the format of the spreadsheet 
  cell containing the first datum for the variable.
  
  The format of each variable is determined by the format of the spreadsheet 
  cell containing the first datum for the variable.
@@ -240,10 +251,6 @@ If this cell is of string (text) format, then the width of the variable is
  determined from the length of the string it contains, unless the 
  ASSUMEDVARWIDTH subcommand is given.
  
  determined from the length of the string it contains, unless the 
  ASSUMEDVARWIDTH subcommand is given.
  
-
-The FILE subcommand is mandatory. Specify the name of the file
-to be read.
-
  The SHEET subcommand specifies the sheet within the spreadsheet file to read.
  There are two forms of the SHEET subcommand.
  In the first form,
  The SHEET subcommand specifies the sheet within the spreadsheet file to read.
  There are two forms of the SHEET subcommand.
  In the first form,
@@ -266,8 +273,8 @@ If no CELLRANGE subcommand is given, then the entire sheet is read.
  
  If @samp{/READNAMES=ON} is specified, then the contents of cells of
  the first row are used as the names of the variables in which to store
  
  If @samp{/READNAMES=ON} is specified, then the contents of cells of
  the first row are used as the names of the variables in which to store
-the data from subsequent rows. 
-If the READNAMES command is omitted, or if @samp{/READNAMES=OFF} is
+the data from subsequent rows.  This is the default.
+If @samp{/READNAMES=OFF} is
  used, then the variables  receive automatically assigned names.
  
  The ASSUMEDVARWIDTH subcommand specifies the maximum width of string
  used, then the variables  receive automatically assigned names.
  
  The ASSUMEDVARWIDTH subcommand specifies the maximum width of string
diff --git a/src/data/automake.mk b/src/data/automake.mk

index 81a9d9c5259bf87838cfe68260eb68eb28fa44d0..4385fd6d63055ead3ddba1836ba57f8aedbef006 100644 (file)
--- a/src/data/automake.mk
+++ b/src/data/automake.mk
@@ -82,6 +82,8 @@ src_data_libdata_la_SOURCES = \
         src/data/make-file.h \
         src/data/mrset.c \
         src/data/mrset.h \
         src/data/make-file.h \
         src/data/mrset.c \
         src/data/mrset.h \
+       src/data/ods-reader.c \
+       src/data/ods-reader.h \
         src/data/por-file-reader.c \
         src/data/por-file-reader.h \
         src/data/por-file-writer.c \
         src/data/por-file-reader.c \
         src/data/por-file-reader.h \
         src/data/por-file-writer.c \
@@ -94,6 +96,8 @@ src_data_libdata_la_SOURCES = \
         src/data/settings.h \
         src/data/short-names.c \
         src/data/short-names.h \
         src/data/settings.h \
         src/data/short-names.c \
         src/data/short-names.h \
+       src/data/spreadsheet-reader.c \
+       src/data/spreadsheet-reader.h \
         src/data/subcase.c \
         src/data/subcase.h \
         src/data/sys-file-encoding.c \
         src/data/subcase.c \
         src/data/subcase.h \
         src/data/sys-file-encoding.c \
diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c

index 61fbab899b8bd44eff63633bdb0f90a57a2553d4..56ebc3062aed3090ccf26748b8d96a105bac85dc 100644 (file)
--- a/src/data/gnumeric-reader.c
+++ b/src/data/gnumeric-reader.c
@@ -14,8 +14,6 @@
     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
     You should have received a copy of the GNU General Public License
     along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  
-
-
  #include <config.h>
  
  #include "libpspp/message.h"
  #include <config.h>
  
  #include "libpspp/message.h"
@@ -27,13 +25,14 @@
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
+#include "spreadsheet-reader.h"
  
  #if !GNM_SUPPORT
  
  struct casereader *
  
  #if !GNM_SUPPORT
  
  struct casereader *
-gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
+gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
  {
  {
-  msg (ME, _("Support for Gnumeric files was not compiled into this installation of PSPP"));
+  msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric");
  
    return NULL;
  }
  
    return NULL;
  }
@@ -59,9 +58,6 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
  
  #include "gl/xalloc.h"
  
  
  #include "gl/xalloc.h"
  
-/* Default width of string variables. */
-#define GNUMERIC_DEFAULT_WIDTH 8
-
  static void gnm_file_casereader_destroy (struct casereader *, void *);
  
  static struct ccase *gnm_file_casereader_read (struct casereader *, void *);
  static void gnm_file_casereader_destroy (struct casereader *, void *);
  
  static struct ccase *gnm_file_casereader_read (struct casereader *, void *);
@@ -74,73 +70,6 @@ static const struct casereader_class gnm_file_casereader_class =
      NULL,
    };
  
      NULL,
    };
  
-/* Convert a string, which is an integer encoded in base26
-   IE, A=0, B=1, ... Z=25 to the integer it represents.
-   ... except that in this scheme, digits with an exponent
-   greater than 1 are implicitly incremented by 1, so
-   AA  = 0 + 1*26, AB = 1 + 1*26,
-   ABC = 2 + 2*26 + 1*26^2 ....
-*/
-static int
-pseudo_base26 (const char *str)
-{
-  int i;
-  int multiplier = 1;
-  int result = 0;
-  int len = strlen (str);
-
-  for ( i = len - 1 ; i >= 0; --i)
-    {
-      int mantissa = (str[i] - 'A');
-
-      if ( mantissa < 0 || mantissa > 25 )
-       return -1;
-
-      if ( i != len - 1)
-       mantissa++;
-
-      result += mantissa * multiplier;
-
-      multiplier *= 26;
-    }
-
-  return result;
-}
-
-
-
-/* Convert a cell reference in the form "A1:B2", to
-   integers.  A1 means column zero, row zero.
-   B1 means column 1 row 0. AA1 means column 26, row 0.
-*/
-static bool
-convert_cell_ref (const char *ref,
-                 int *col0, int *row0,
-                 int *coli, int *rowi)
-{
-  char startcol[5];
-  char stopcol [5];
-
-  int startrow;
-  int stoprow;
-
-  int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d",
-             startcol, &startrow,
-             stopcol, &stoprow);
-  if ( n != 4)
-    return false;
-
-  str_uppercase (startcol);
-  *col0 = pseudo_base26 (startcol);
-  str_uppercase (stopcol);
-  *coli = pseudo_base26 (stopcol);
-  *row0 = startrow - 1;
-  *rowi = stoprow - 1 ;
-
-  return true;
-}
-
-
  enum reader_state
    {
      STATE_INIT = 0,        /* Initial state */
  enum reader_state
    {
      STATE_INIT = 0,        /* Initial state */
@@ -180,9 +109,6 @@ struct gnumeric_reader
  
  static void process_node (struct gnumeric_reader *r);
  
  
  static void process_node (struct gnumeric_reader *r);
  
-#define _xml(X) (CHAR_CAST (const xmlChar *, X))
-
-#define _xmlchar_to_int(X) (atoi(CHAR_CAST (const char *, X)))
  
  static void
  gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
  
  static void
  gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
@@ -345,7 +271,7 @@ struct var_spec
  };
  
  struct casereader *
  };
  
  struct casereader *
-gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
+gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
  {
    unsigned long int vstart = 0;
    int ret;
  {
    unsigned long int vstart = 0;
    int ret;
@@ -473,7 +399,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
  
               if (-1 ==  var_spec [idx].width )
                 var_spec [idx].width = (gri->asw == -1) ?
  
               if (-1 ==  var_spec [idx].width )
                 var_spec [idx].width = (gri->asw == -1) ?
-                 ROUND_UP (strlen(text), GNUMERIC_DEFAULT_WIDTH) : gri->asw;
+                 ROUND_UP (strlen(text), SPREADSHEET_DEFAULT_WIDTH) : gri->asw;
             }
  
           free (value);
             }
  
           free (value);
@@ -506,7 +432,7 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
        /* Probably no data exists for this variable, so allocate a
          default width */
        if ( var_spec[i].width == -1 )
        /* Probably no data exists for this variable, so allocate a
          default width */
        if ( var_spec[i].width == -1 )
-       var_spec[i].width = GNUMERIC_DEFAULT_WIDTH;
+       var_spec[i].width = SPREADSHEET_DEFAULT_WIDTH;
  
        name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart);
        dict_create_var (r->dict, name, var_spec[i].width);
  
        name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart);
        dict_create_var (r->dict, name, var_spec[i].width);
diff --git a/src/data/gnumeric-reader.h b/src/data/gnumeric-reader.h

index b313fc78768cf446975156c6d68647f277a72d4b..fcd338567543e3e17d0ad1c591cef59be23ad805 100644 (file)
--- a/src/data/gnumeric-reader.h
+++ b/src/data/gnumeric-reader.h
@@ -20,21 +20,10 @@
  #include <stdbool.h>
  
  struct casereader;
  #include <stdbool.h>
  
  struct casereader;
-
-
-struct gnumeric_read_info
-{
-  char *sheet_name ;            /* In UTF-8. */
-  char *file_name ;             /* In filename encoding. */
-  char *cell_range ;            /* In UTF-8. */
-  int sheet_index ;
-  bool read_names ;
-  int asw ;
-};
-
  struct dictionary;
  struct dictionary;
+struct spreadsheet_read_info;
  
  
-struct casereader * gnumeric_open_reader (struct gnumeric_read_info *, struct dictionary **);
+struct casereader * gnumeric_open_reader (struct spreadsheet_read_info *, struct dictionary **);
  
  
  #endif
  
  
  #endif
diff --git a/src/data/ods-reader.c b/src/data/ods-reader.c

new file mode 100644 (file)

index 0000000..51ee5ac
--- /dev/null
+++ b/src/data/ods-reader.c
@@ -0,0 +1,681 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "libpspp/message.h"
+#include "libpspp/misc.h"
+
+#include "data/data-in.h"
+
+#include "gl/minmax.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) (msgid)
+
+#include "ods-reader.h"
+#include "spreadsheet-reader.h"
+
+#if !ODF_READ_SUPPORT
+
+struct casereader *
+ods_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
+{
+  msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "OpenDocument");
+
+  return NULL;
+}
+
+#else
+
+#include "libpspp/zip-reader.h"
+
+
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <libxml/xmlreader.h>
+#include <zlib.h>
+
+#include "data/format.h"
+#include "data/case.h"
+#include "data/casereader-provider.h"
+#include "data/dictionary.h"
+#include "data/identifier.h"
+#include "data/value.h"
+#include "data/variable.h"
+#include "libpspp/i18n.h"
+#include "libpspp/str.h"
+
+#include "gl/xalloc.h"
+
+static void ods_file_casereader_destroy (struct casereader *, void *);
+
+static struct ccase *ods_file_casereader_read (struct casereader *, void *);
+
+static const struct casereader_class ods_file_casereader_class =
+  {
+    ods_file_casereader_read,
+    ods_file_casereader_destroy,
+    NULL,
+    NULL,
+  };
+
+enum reader_state
+  {
+    STATE_INIT = 0,        /* Initial state */
+    STATE_SPREADSHEET,     /* Found the start of the spreadsheet doc */
+    STATE_TABLE,           /* Found the sheet that we actually want */
+    STATE_ROW,             /* Found the start of the cell array */
+    STATE_CELL,            /* Found a cell */
+    STATE_CELL_CONTENT     /* Found a the text within a cell */
+  };
+
+struct ods_reader
+{
+  xmlTextReaderPtr xtr;
+
+  enum reader_state state;
+  bool sheet_found;
+  int row;
+  int col;
+  int node_type;
+  int sheet_index;
+
+  const xmlChar *target_sheet;
+  int target_sheet_index;
+
+  int start_row;
+  int start_col;
+  int stop_row;
+  int stop_col;
+
+  struct caseproto *proto;
+  struct dictionary *dict;
+  struct ccase *first_case;
+  bool used_first_case;
+  bool read_names;
+
+  struct string ods_errs;
+  int span;
+};
+
+static void process_node (struct ods_reader *r);
+
+static void
+ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
+{
+  struct ods_reader *r = r_;
+  if ( r == NULL)
+    return ;
+
+  if (r->xtr)
+    xmlFreeTextReader (r->xtr);
+
+  if ( ! ds_is_empty (&r->ods_errs))
+    msg (ME, ds_cstr (&r->ods_errs));
+
+  ds_destroy (&r->ods_errs);
+
+  if ( ! r->used_first_case )
+    case_unref (r->first_case);
+
+  caseproto_unref (r->proto);
+
+  free (r);
+}
+
+static void
+process_node (struct ods_reader *r)
+{
+  xmlChar *name = xmlTextReaderName (r->xtr);
+  if (name == NULL)
+    name = xmlStrdup (_xml ("--"));
+
+  r->node_type = xmlTextReaderNodeType (r->xtr);
+
+  switch ( r->state)
+    {
+    case STATE_INIT:
+      if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
+         XML_READER_TYPE_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_SPREADSHEET;
+       }
+      break;
+    case STATE_SPREADSHEET:
+      if (0 == xmlStrcasecmp (name, _xml("table:table")))
+       {
+         if (XML_READER_TYPE_ELEMENT == r->node_type)
+           {
+             r->col = -1;
+             r->row = -1;
+             ++r->sheet_index;
+             if ( r->target_sheet != NULL)
+               {
+                 xmlChar *value = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name"));
+                 if ( 0 == xmlStrcmp (value, r->target_sheet))
+                   {
+                     r->sheet_found = true;
+                     r->state = STATE_TABLE;
+                   }
+                 free (value);
+               }
+             else if (r->target_sheet_index == r->sheet_index)
+               {
+                 r->sheet_found = true;
+                 r->state = STATE_TABLE;
+               }
+             else if ( r->target_sheet_index == -1)
+               r->state = STATE_TABLE;
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type
+                  && r->sheet_found)
+       {
+         r->state = STATE_INIT;
+       }
+       break;
+    case STATE_TABLE:
+      if (0 == xmlStrcasecmp (name, _xml("table:table-row")) )
+       {
+         if ( XML_READER_TYPE_ELEMENT  == r->node_type)
+           {
+             if (! xmlTextReaderIsEmptyElement (r->xtr))
+               {
+                 r->state = STATE_ROW;
+               }
+             r->row++;
+             r->span = 1;
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_SPREADSHEET;
+       }
+      break;
+    case STATE_ROW:
+      if (0 == xmlStrcasecmp (name, _xml ("table:table-cell")))
+       {
+         if ( XML_READER_TYPE_ELEMENT  == r->node_type)
+           {
+             xmlChar *value =
+               xmlTextReaderGetAttribute (r->xtr,
+                                          _xml ("table:number-columns-repeated"));
+             r->col += r->span;
+             r->span = value ? _xmlchar_to_int (value) : 1;
+             free (value);
+             if (! xmlTextReaderIsEmptyElement (r->xtr))
+               {
+                 r->state = STATE_CELL;
+               }
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_TABLE;
+         r->col = -1;
+         /* Set the span back to the default */
+         r->span = 1;
+       }
+      break;
+    case STATE_CELL:
+      if (0 == xmlStrcasecmp (name, _xml("text:p")))
+       {
+         if ( XML_READER_TYPE_ELEMENT  == r->node_type)
+           {
+             r->state = STATE_CELL_CONTENT;
+           }
+       }
+      else if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
+       {
+         r->state = STATE_ROW;
+       }
+      break;
+    case STATE_CELL_CONTENT:
+      if (XML_READER_TYPE_TEXT != r->node_type)
+       r->state = STATE_CELL;
+      break;
+    default:
+      break;
+    };
+
+  xmlFree (name);
+}
+
+/* 
+   A struct containing the parameters of a cell's value 
+   parsed from the xml
+*/
+struct xml_value
+{
+  xmlChar *type;
+  xmlChar *value;
+  xmlChar *text;
+};
+
+struct var_spec
+{
+  char *name;
+  struct xml_value firstval;
+};
+
+
+/* Determine the width that a xmv should probably have */
+static int
+xmv_to_width (const struct xml_value *xmv, int fallback)
+{
+  int width = SPREADSHEET_DEFAULT_WIDTH;
+
+  /* Non-strings always have zero width */
+  if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string")))
+    return 0;
+
+  if ( fallback != -1)
+    return fallback;
+
+  if ( xmv->value )
+    width = ROUND_UP (xmlStrlen (xmv->value),
+                     SPREADSHEET_DEFAULT_WIDTH);
+  else if ( xmv->text)
+    width = ROUND_UP (xmlStrlen (xmv->text),
+                     SPREADSHEET_DEFAULT_WIDTH);
+
+  return width;
+}
+
+/*
+   Sets the VAR of case C, to the value corresponding to the xml data
+ */
+static void
+convert_xml_to_value (struct ccase *c, const struct variable *var,
+                     const struct xml_value *xmv)
+{
+  union value *v = case_data_rw (c, var);
+
+  if (xmv->value == NULL && xmv->text == NULL)
+    value_set_missing (v, var_get_width (var));
+  else if ( var_is_alpha (var))
+    /* Use the text field, because it seems that there is no
+       value field for strings */
+    value_copy_str_rpad (v, var_get_width (var), xmv->text, ' ');
+  else
+    {
+      const struct fmt_spec *fmt = var_get_write_format (var);
+      enum fmt_category fc  = fmt_get_category (fmt->type);
+
+      assert ( fc != FMT_CAT_STRING);
+
+      const char *text = xmv->value ? CHAR_CAST (const char *, xmv->value):
+       CHAR_CAST (const char *, xmv->text);
+
+      data_in (ss_cstr (text), "UTF-8",
+              fmt->type,
+              v,
+              var_get_width (var),
+              "UTF-8");
+    }
+}
+
+
+struct casereader *
+ods_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dict)
+{
+  int ret = 0;
+  xmlChar *type = NULL;
+  unsigned long int vstart = 0;
+  casenumber n_cases = CASENUMBER_MAX;
+  int i;
+  struct var_spec *var_spec = NULL;
+  int n_var_specs = 0;
+
+  struct ods_reader *r = xzalloc (sizeof *r);
+
+  r->read_names = gri->read_names;
+  ds_init_empty (&r->ods_errs);
+
+  struct zip_reader *zreader = zip_reader_create (gri->file_name, &r->ods_errs);
+  struct zip_member *content = NULL;
+
+  if ( NULL == zreader)
+    {
+      msg (ME, _("Error opening `%s' for reading as a OpenDocument spreadsheet file: %s."),
+           gri->file_name, ds_cstr (&r->ods_errs));
+
+      goto error;
+    }
+
+  content = zip_member_open (zreader, "content.xml");
+  if ( NULL == content)
+    {
+      msg (ME, _("Could not extract OpenDocument spreadsheet from file `%s': %s."),
+           gri->file_name, ds_cstr (&r->ods_errs));
+
+      goto error;
+    }
+
+  zip_member_ref (content);
+
+  r->xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read,
+                          (xmlInputCloseCallback) zip_member_finish,
+                          content,   NULL, NULL, XML_PARSE_RECOVER);
+
+  if ( r->xtr == NULL)
+    {
+      goto error;
+    }
+
+  if ( gri->cell_range )
+    {
+      if ( ! convert_cell_ref (gri->cell_range,
+                              &r->start_col, &r->start_row,
+                              &r->stop_col, &r->stop_row))
+       {
+         msg (SE, _("Invalid cell range `%s'"),
+              gri->cell_range);
+         goto error;
+       }
+    }
+  else
+    {
+      r->start_col = 0;
+      r->start_row = 0;
+      r->stop_col = -1;
+      r->stop_row = -1;
+    }
+
+  r->state = STATE_INIT;
+  r->target_sheet = BAD_CAST gri->sheet_name;
+  r->target_sheet_index = gri->sheet_index;
+  r->row = r->col = -1;
+  r->sheet_index = 0;
+
+
+  /* If CELLRANGE was given, then we know how many variables should be read */
+  if ( r->stop_col != -1 )
+    {
+      n_var_specs =  r->stop_col - r->start_col + 1;
+      var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs);
+    }
+
+
+  /* Advance to the start of the cells for the target sheet */
+  while ( (r->row < r->start_row ))
+    {
+      if (1 != (ret = xmlTextReaderRead (r->xtr)))
+          break;
+
+      process_node (r);
+    }
+
+  if (ret < 1)
+    {
+      msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
+           gri->file_name);
+      goto error;
+    }
+
+  if ( gri->read_names)
+    {
+      while (1 == (ret = xmlTextReaderRead (r->xtr)))
+       {
+         int idx;
+         process_node (r);
+         if ( r->row > r->start_row)
+           break;
+
+         if (r->col == -1 && r->row == r->start_row)
+           break;
+
+         if ( r->col < r->start_col)
+           continue;
+
+         idx = r->col - r->start_col;
+
+         if (r->state == STATE_CELL_CONTENT 
+             &&
+             XML_READER_TYPE_TEXT  == r->node_type)
+           {
+             xmlChar *value = xmlTextReaderValue (r->xtr);
+             if ( idx >= n_var_specs)
+               {
+
+                 var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
+
+                 /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */
+                 memset (var_spec + n_var_specs * sizeof (*var_spec),
+                         0, 
+                         (n_var_specs - idx + 1) * sizeof (*var_spec));
+                 n_var_specs = idx + 1;
+               }
+             var_spec[idx].firstval.text = 0;
+             var_spec[idx].firstval.value = 0;
+             var_spec[idx].firstval.type = 0;
+
+             var_spec [idx].name = strdup (CHAR_CAST (const char *, value));
+             free (value);
+             value = NULL;
+           }
+       }
+    }
+
+  xmlChar *val_string = NULL;
+  /* Read in the first row of data */
+  while (1 == xmlTextReaderRead (r->xtr))
+    {
+      int idx;
+      process_node (r);
+      if ( r->row >= r->start_row + 1 + gri->read_names)
+       break;
+
+      if ( r->col < r->start_col)
+       continue;
+
+      if ( r->col - r->start_col + 1 > n_var_specs)
+       continue;
+
+      idx = r->col - r->start_col;
+
+      if ( r->state == STATE_CELL &&
+          XML_READER_TYPE_ELEMENT  == r->node_type)
+       {
+         type = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value-type"));
+         val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value"));
+       }
+
+      if ( r->state == STATE_CELL_CONTENT &&
+          XML_READER_TYPE_TEXT  == r->node_type)
+       {
+         var_spec [idx].firstval.type = type;
+         var_spec [idx].firstval.text = xmlTextReaderValue (r->xtr);
+         var_spec [idx].firstval.value = val_string;
+         val_string = NULL;
+         type = NULL;
+       }
+    }
+
+  /* Create the dictionary and populate it */
+  *dict = r->dict = dict_create (
+    CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr)));
+
+  for (i = 0 ; i < n_var_specs ; ++i )
+    {
+      struct fmt_spec fmt;
+      struct variable *var = NULL;
+      char *name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart);
+      int width  = xmv_to_width (&var_spec[i].firstval, gri->asw);
+      dict_create_var (r->dict, name, width);
+      free (name);
+
+      var = dict_get_var (r->dict, i);
+
+      if ( 0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
+       {
+         fmt.type = FMT_DATE;
+         fmt.d = 0;
+         fmt.w = 20;
+       }
+      else
+       fmt = fmt_default_for_width (width);
+
+      var_set_both_formats (var, &fmt);
+    }
+
+  /* Create the first case, and cache it */
+  r->used_first_case = false;
+
+  if ( n_var_specs ==  0 )
+    {
+      msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
+           gri->file_name);
+      goto error;
+    }
+
+  r->proto = caseproto_ref (dict_get_proto (r->dict));
+  r->first_case = case_create (r->proto);
+  case_set_missing (r->first_case);
+
+  for ( i = 0 ; i < n_var_specs ; ++i )
+    {
+      const struct variable *var = dict_get_var (r->dict, i);
+
+      convert_xml_to_value (r->first_case, var,  &var_spec[i].firstval);
+    }
+
+  zip_reader_destroy (zreader);
+
+  for ( i = 0 ; i < n_var_specs ; ++i )
+    {
+      free (var_spec[i].firstval.type);
+      free (var_spec[i].firstval.value);
+      free (var_spec[i].firstval.text);
+      free (var_spec[i].name);
+    }
+
+  free (var_spec);
+
+  return casereader_create_sequential
+    (NULL,
+     r->proto,
+     n_cases,
+     &ods_file_casereader_class, r);
+
+ error:
+  
+  zip_reader_destroy (zreader);
+
+  for ( i = 0 ; i < n_var_specs ; ++i )
+    {
+      free (var_spec[i].firstval.type);
+      free (var_spec[i].firstval.value);
+      free (var_spec[i].firstval.text);
+      free (var_spec[i].name);
+    }
+
+  free (var_spec);
+
+  return NULL;
+}
+
+
+/* Reads and returns one case from READER's file.  Returns a null
+   pointer on failure. */
+static struct ccase *
+ods_file_casereader_read (struct casereader *reader UNUSED, void *r_)
+{
+  struct ccase *c = NULL;
+  xmlChar *val_string = NULL;
+  struct ods_reader *r = r_;
+  int current_row = r->row;
+
+  if ( r->row == -1)
+    return NULL;
+
+  if ( !r->used_first_case )
+    {
+      r->used_first_case = true;
+      return r->first_case;
+    }
+
+
+  if ( r->state > STATE_INIT)
+    {
+      c = case_create (r->proto);
+      case_set_missing (c);
+    }
+
+  while (1 == xmlTextReaderRead (r->xtr))
+    {
+      process_node (r);
+      if ( r->row > current_row)
+       {
+         break;
+       }
+      if ( r->col < r->start_col || (r->stop_col != -1 && r->col > r->stop_col))
+       {
+         continue;
+       }
+      if ( r->col - r->start_col >= caseproto_get_n_widths (r->proto))
+       {
+         continue;
+       }
+      if ( r->stop_row != -1 && r->row > r->stop_row)
+       {
+         continue;
+       }
+      if ( r->state == STATE_CELL &&
+          r->node_type == XML_READER_TYPE_ELEMENT )
+       {
+         val_string = xmlTextReaderGetAttribute (r->xtr, _xml ("office:value"));
+       }
+
+      if ( r->state == STATE_CELL_CONTENT && r->node_type == XML_READER_TYPE_TEXT )
+       {
+         int col;
+         struct xml_value *xmv = xzalloc (sizeof *xmv);
+         xmv->text = xmlTextReaderValue (r->xtr);
+         xmv->value = val_string;
+         val_string = NULL;
+
+         for (col = 0; col < r->span ; ++col)
+           {
+             const int idx = r->col + col - r->start_col;
+
+             const struct variable *var = dict_get_var (r->dict, idx);
+
+             convert_xml_to_value (c, var, xmv);
+           }
+         free (xmv->text);
+         free (xmv->value);
+         free (xmv);
+       }
+
+      if ( r->state < STATE_TABLE)
+       break;
+    }
+
+  if (NULL == c || (r->stop_row != -1 && r->row > r->stop_row + 1))
+    {
+      case_unref (c);
+      return NULL;
+    }
+  else
+    {
+      return c;
+    }
+}
+#endif
diff --git a/src/data/ods-reader.h b/src/data/ods-reader.h

new file mode 100644 (file)

index 0000000..79b7169
--- /dev/null
+++ b/src/data/ods-reader.h
@@ -0,0 +1,27 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef ODS_READ_H
+#define ODS_READ_H 1
+
+struct casereader;
+struct dictionary;
+struct spreadsheet_read_info;
+
+struct casereader * ods_open_reader (struct spreadsheet_read_info *, struct dictionary **);
+
+
+#endif
diff --git a/src/data/spreadsheet-reader.c b/src/data/spreadsheet-reader.c

new file mode 100644 (file)

index 0000000..11e8cf5
--- /dev/null
+++ b/src/data/spreadsheet-reader.c
@@ -0,0 +1,89 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+#include "spreadsheet-reader.h"
+
+#include <libpspp/str.h>
+#include <stdio.h>
+#include <string.h>
+
+/* Convert a string, which is an integer encoded in base26
+   IE, A=0, B=1, ... Z=25 to the integer it represents.
+   ... except that in this scheme, digits with an exponent
+   greater than 1 are implicitly incremented by 1, so
+   AA  = 0 + 1*26, AB = 1 + 1*26,
+   ABC = 2 + 2*26 + 1*26^2 ....
+*/
+int
+pseudo_base26 (const char *str)
+{
+  int i;
+  int multiplier = 1;
+  int result = 0;
+  int len = strlen (str);
+
+  for ( i = len - 1 ; i >= 0; --i)
+    {
+      int mantissa = (str[i] - 'A');
+
+      if ( mantissa < 0 || mantissa > 25 )
+       return -1;
+
+      if ( i != len - 1)
+       mantissa++;
+
+      result += mantissa * multiplier;
+
+      multiplier *= 26;
+    }
+
+  return result;
+}
+
+
+/* Convert a cell reference in the form "A1:B2", to
+   integers.  A1 means column zero, row zero.
+   B1 means column 1 row 0. AA1 means column 26, row 0.
+*/
+bool
+convert_cell_ref (const char *ref,
+                 int *col0, int *row0,
+                 int *coli, int *rowi)
+{
+  char startcol[5];
+  char stopcol [5];
+
+  int startrow;
+  int stoprow;
+
+  int n = sscanf (ref, "%4[a-zA-Z]%d:%4[a-zA-Z]%d",
+             startcol, &startrow,
+             stopcol, &stoprow);
+  if ( n != 4)
+    return false;
+
+  str_uppercase (startcol);
+  *col0 = pseudo_base26 (startcol);
+  str_uppercase (stopcol);
+  *coli = pseudo_base26 (stopcol);
+  *row0 = startrow - 1;
+  *rowi = stoprow - 1 ;
+
+  return true;
+}
+
diff --git a/src/data/spreadsheet-reader.h b/src/data/spreadsheet-reader.h

new file mode 100644 (file)

index 0000000..6edd705
--- /dev/null
+++ b/src/data/spreadsheet-reader.h
@@ -0,0 +1,47 @@
+/* PSPP - a program for statistical analysis.
+   Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef SPREADSHEET_READ_H
+#define SPREADSHEET_READ_H 1
+
+#include <stdbool.h>
+
+/* Default width of string variables. */
+#define SPREADSHEET_DEFAULT_WIDTH 8
+
+struct spreadsheet_read_info
+{
+  char *sheet_name ;            /* In UTF-8. */
+  char *file_name ;             /* In filename encoding. */
+  char *cell_range ;            /* In UTF-8. */
+  int sheet_index ;
+  bool read_names ;
+  int asw ;
+};
+
+int pseudo_base26 (const char *str);
+
+bool convert_cell_ref (const char *ref,
+                      int *col0, int *row0,
+                      int *coli, int *rowi);
+
+
+#define _xml(X) (CHAR_CAST (const xmlChar *, X))
+
+#define _xmlchar_to_int(X) (atoi(CHAR_CAST (const char *, X)))
+
+
+#endif
diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c

index 47b65b445adfc44ffa15e9b72c9738703fc687a0..9b878c553a92bf1a16e37f5985506e025db924c6 100644 (file)
--- a/src/language/data-io/get-data.c
+++ b/src/language/data-io/get-data.c
@@ -18,10 +18,14 @@
  
  #include <stdlib.h>
  
  
  #include <stdlib.h>
  
+#include <string.h>
+
  #include "data/dataset.h"
  #include "data/dictionary.h"
  #include "data/format.h"
  #include "data/gnumeric-reader.h"
  #include "data/dataset.h"
  #include "data/dictionary.h"
  #include "data/format.h"
  #include "data/gnumeric-reader.h"
+#include "data/ods-reader.h"
+#include "data/spreadsheet-reader.h"
  #include "data/psql-reader.h"
  #include "data/settings.h"
  #include "language/command.h"
  #include "data/psql-reader.h"
  #include "data/settings.h"
  #include "language/command.h"
@@ -40,13 +44,16 @@
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
  #define _(msgid) gettext (msgid)
  #define N_(msgid) (msgid)
  
-static int parse_get_gnm (struct lexer *lexer, struct dataset *);
+static struct spreadsheet_read_info *parse_spreadsheet (struct lexer *lexer);
+static void destroy_spreadsheet_read_info (struct spreadsheet_read_info *);
+
  static int parse_get_txt (struct lexer *lexer, struct dataset *);
  static int parse_get_psql (struct lexer *lexer, struct dataset *);
  
  int
  cmd_get_data (struct lexer *lexer, struct dataset *ds)
  {
  static int parse_get_txt (struct lexer *lexer, struct dataset *);
  static int parse_get_psql (struct lexer *lexer, struct dataset *);
  
  int
  cmd_get_data (struct lexer *lexer, struct dataset *ds)
  {
+  char *tok = NULL;
    lex_force_match (lexer, T_SLASH);
  
    if (!lex_force_match_id (lexer, "TYPE"))
    lex_force_match (lexer, T_SLASH);
  
    if (!lex_force_match_id (lexer, "TYPE"))
@@ -54,14 +61,44 @@ cmd_get_data (struct lexer *lexer, struct dataset *ds)
  
    lex_force_match (lexer, T_EQUALS);
  
  
    lex_force_match (lexer, T_EQUALS);
  
-  if (lex_match_id (lexer, "GNM"))
-    return parse_get_gnm (lexer, ds);
-  else if (lex_match_id (lexer, "TXT"))
-    return parse_get_txt (lexer, ds);
+  tok = strdup (lex_tokcstr (lexer));
+  if (lex_match_id (lexer, "TXT"))
+    {
+      return parse_get_txt (lexer, ds);
+    }
    else if (lex_match_id (lexer, "PSQL"))
    else if (lex_match_id (lexer, "PSQL"))
-    return parse_get_psql (lexer, ds);
+    {
+      return parse_get_psql (lexer, ds);
+    }
+  else if (lex_match_id (lexer, "GNM") || 
+      lex_match_id (lexer, "ODS"))
+    {
+      struct casereader *reader = NULL;
+      struct dictionary *dict = NULL;
+      struct spreadsheet_read_info *sri = parse_spreadsheet (lexer);
+      if (NULL == sri)
+       goto error;
+
+      if ( 0 == strncasecmp (tok, "GNM", 3))
+       reader = gnumeric_open_reader (sri, &dict);
+      else if (0 == strncasecmp (tok, "ODS", 3))
+       reader = ods_open_reader (sri, &dict);
+
+      if (reader)
+       {
+         dataset_set_dict (ds, dict);
+         dataset_set_source (ds, reader);
+         destroy_spreadsheet_read_info (sri);
+         free (tok);
+         return CMD_SUCCESS;
+       }
+      destroy_spreadsheet_read_info (sri);
+    }
+  else
+    msg (SE, _("Unsupported TYPE %s."), tok);
  
  
-  msg (SE, _("Unsupported TYPE %s."), lex_tokcstr (lexer));
+ error:
+  free (tok);
    return CMD_FAILURE;
  }
  
    return CMD_FAILURE;
  }
  
@@ -141,10 +178,13 @@ parse_get_psql (struct lexer *lexer, struct dataset *ds)
    return CMD_FAILURE;
  }
  
    return CMD_FAILURE;
  }
  
-static int
-parse_get_gnm (struct lexer *lexer, struct dataset *ds)
+static struct spreadsheet_read_info *
+parse_spreadsheet (struct lexer *lexer)
  {
  {
-  struct gnumeric_read_info gri  = {NULL, NULL, NULL, 1, true, -1};
+  struct spreadsheet_read_info *sri = xzalloc (sizeof *sri);
+  sri->sheet_index = 1;
+  sri->read_names = true;
+  sri->asw = -1;
  
    lex_force_match (lexer, T_SLASH);
  
  
    lex_force_match (lexer, T_SLASH);
  
@@ -156,7 +196,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
    if (!lex_force_string (lexer))
      goto error;
  
    if (!lex_force_string (lexer))
      goto error;
  
-  gri.file_name = utf8_to_filename (lex_tokcstr (lexer));
+  sri->file_name = utf8_to_filename (lex_tokcstr (lexer));
  
    lex_get (lexer);
  
  
    lex_get (lexer);
  
@@ -165,7 +205,7 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
        if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH"))
         {
           lex_match (lexer, T_EQUALS);
        if ( lex_match_id (lexer, "ASSUMEDSTRWIDTH"))
         {
           lex_match (lexer, T_EQUALS);
-         gri.asw = lex_integer (lexer);
+         sri->asw = lex_integer (lexer);
           lex_get (lexer);
         }
        else if (lex_match_id (lexer, "SHEET"))
           lex_get (lexer);
         }
        else if (lex_match_id (lexer, "SHEET"))
@@ -176,14 +216,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
               if ( ! lex_force_string (lexer) )
                 goto error;
  
               if ( ! lex_force_string (lexer) )
                 goto error;
  
-             gri.sheet_name = ss_xstrdup (lex_tokss (lexer));
-             gri.sheet_index = -1;
+             sri->sheet_name = ss_xstrdup (lex_tokss (lexer));
+             sri->sheet_index = -1;
  
               lex_get (lexer);
             }
           else if (lex_match_id (lexer, "INDEX"))
             {
  
               lex_get (lexer);
             }
           else if (lex_match_id (lexer, "INDEX"))
             {
-             gri.sheet_index = lex_integer (lexer);
+             sri->sheet_index = lex_integer (lexer);
               lex_get (lexer);
             }
           else
               lex_get (lexer);
             }
           else
@@ -195,14 +235,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
  
           if (lex_match_id (lexer, "FULL"))
             {
  
           if (lex_match_id (lexer, "FULL"))
             {
-             gri.cell_range = NULL;
+             sri->cell_range = NULL;
             }
           else if (lex_match_id (lexer, "RANGE"))
             {
               if ( ! lex_force_string (lexer) )
                 goto error;
  
             }
           else if (lex_match_id (lexer, "RANGE"))
             {
               if ( ! lex_force_string (lexer) )
                 goto error;
  
-             gri.cell_range = ss_xstrdup (lex_tokss (lexer));
+             sri->cell_range = ss_xstrdup (lex_tokss (lexer));
               lex_get (lexer);
             }
           else
               lex_get (lexer);
             }
           else
@@ -214,11 +254,11 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
  
           if ( lex_match_id (lexer, "ON"))
             {
  
           if ( lex_match_id (lexer, "ON"))
             {
-             gri.read_names = true;
+             sri->read_names = true;
             }
           else if (lex_match_id (lexer, "OFF"))
             {
             }
           else if (lex_match_id (lexer, "OFF"))
             {
-             gri.read_names = false;
+             sri->read_names = false;
             }
           else
             goto error;
             }
           else
             goto error;
@@ -230,30 +270,14 @@ parse_get_gnm (struct lexer *lexer, struct dataset *ds)
         }
      }
  
         }
      }
  
-  {
-    struct dictionary *dict = NULL;
-    struct casereader *reader = gnumeric_open_reader (&gri, &dict);
-
-    if ( reader )
-      {
-        dataset_set_dict (ds, dict);
-        dataset_set_source (ds, reader);
-      }
-  }
-
-  free (gri.file_name);
-  free (gri.sheet_name);
-  free (gri.cell_range);
-  return CMD_SUCCESS;
+  return sri;
  
   error:
  
   error:
-
-  free (gri.file_name);
-  free (gri.sheet_name);
-  free (gri.cell_range);
-  return CMD_FAILURE;
+  destroy_spreadsheet_read_info (sri);
+  return NULL;
  }
  
  }
  
+
  static bool
  set_type (struct data_parser *parser, const char *subcommand,
            enum data_parser_type type, bool *has_type)
  static bool
  set_type (struct data_parser *parser, const char *subcommand,
            enum data_parser_type type, bool *has_type)
@@ -596,3 +620,16 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds)
    free (name);
    return CMD_CASCADING_FAILURE;
  }
    free (name);
    return CMD_CASCADING_FAILURE;
  }
+
+
+static void 
+destroy_spreadsheet_read_info (struct spreadsheet_read_info *sri)
+{
+  if ( NULL == sri)
+    return;
+
+  free (sri->sheet_name);
+  free (sri->cell_range);
+  free (sri->file_name);
+  free (sri);
+}
diff --git a/src/output/automake.mk b/src/output/automake.mk

index 6f9b149e82a8d7fe4ec83b208dc9ac68d1d51df0..78173e3ef2a45da602c8c39bc4af0a9638622e54 100644 (file)
--- a/src/output/automake.mk
+++ b/src/output/automake.mk
@@ -68,7 +68,7 @@ src_output_liboutput_la_SOURCES += \
         src/output/charts/roc-chart-cairo.c \
         src/output/charts/scree-cairo.c
  endif
         src/output/charts/roc-chart-cairo.c \
         src/output/charts/scree-cairo.c
  endif
-if ODT_SUPPORT
+if ODF_WRITE_SUPPORT
  src_output_liboutput_la_SOURCES += src/output/odt.c
  endif
  
  src_output_liboutput_la_SOURCES += src/output/odt.c
  endif
  
diff --git a/tests/atlocal.in b/tests/atlocal.in

index 9b54a705b4755cbda91b9f46ddce34b1e589a62b..cfe019a238d4e2d7ee400b28c9d3ef7a0b198a43 100644 (file)
--- a/tests/atlocal.in
+++ b/tests/atlocal.in
@@ -3,6 +3,7 @@
  # Variables used internally by the testsuite.
  EXEEXT='@EXEEXT@'
  GNM_SUPPORT='@GNM_SUPPORT@'
  # Variables used internally by the testsuite.
  EXEEXT='@EXEEXT@'
  GNM_SUPPORT='@GNM_SUPPORT@'
+ODF_READ_SUPPORT='@ODF_READ_SUPPORT@'
  PERL='@PERL@'
  WITH_PERL_MODULE='@WITH_PERL_MODULE@'
  host='@host@'
  PERL='@PERL@'
  WITH_PERL_MODULE='@WITH_PERL_MODULE@'
  host='@host@'
diff --git a/tests/automake.mk b/tests/automake.mk

index 7d4afef7071fa4fb6778292ad3e3b3f6fb10d3ea..840f56b04764429d467abef722d26100e7808e7b 100644 (file)
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -244,7 +244,8 @@ EXTRA_DIST += \
         tests/data/num-out.expected.cmp.gz \
         tests/data/v13.sav \
         tests/data/v14.sav \
         tests/data/num-out.expected.cmp.gz \
         tests/data/v13.sav \
         tests/data/v14.sav \
-        tests/language/data-io/Book1.gnm.unzipped
+        tests/language/data-io/Book1.gnm.unzipped \
+        tests/language/data-io/test.ods
  
  CLEANFILES += *.save pspp.* foo*
  
  
  CLEANFILES += *.save pspp.* foo*
  
@@ -279,7 +280,7 @@ TESTSUITE_AT = \
         tests/language/data-io/data-reader.at \
         tests/language/data-io/dataset.at \
         tests/language/data-io/file-handle.at \
         tests/language/data-io/data-reader.at \
         tests/language/data-io/dataset.at \
         tests/language/data-io/file-handle.at \
-       tests/language/data-io/get-data-gnm.at \
+       tests/language/data-io/get-data-spreadsheet.at \
         tests/language/data-io/get-data-psql.at \
         tests/language/data-io/get-data-txt.at \
         tests/language/data-io/get.at \
         tests/language/data-io/get-data-psql.at \
         tests/language/data-io/get-data-txt.at \
         tests/language/data-io/get.at \
diff --git a/tests/language/data-io/get-data-gnm.at b/tests/language/data-io/get-data-gnm.at

deleted file mode 100644 (file)

index 92815dc..0000000
--- a/tests/language/data-io/get-data-gnm.at
+++ /dev/null
@@ -1,162 +0,0 @@
-AT_BANNER([GET DATA /TYPE=GNM])
-
-AT_SETUP([GET DATA /TYPE=GNM with CELLRANGE])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric'  /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' .
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-VAR001,Format: F8.2,,1
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-VAR002,Format: A8,,2
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-VAR003,Format: F8.2,,3
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-VAR001,VAR002,VAR003
-.00,fred    ,20.00
-1.00,11      ,21.00
-2.00,twelve  ,22.00
-3.00,13      ,23.00
-4.00,14      ,24.00
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with CELLRANGE and READNAMES])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric'  /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' .
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-V1,Format: F8.2,,1
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-V2,Format: A8,,2
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-VAR001,Format: F8.2,,3
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-V1,V2,VAR001
-.00,fred    ,20.00
-1.00,11      ,21.00
-2.00,twelve  ,22.00
-3.00,13      ,23.00
-4.00,14      ,24.00
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM without CELLRANGE])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=index 3.
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-name,Format: A8,,1
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-id,Format: F8.2,,2
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-height,Format: F8.2,,3
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-name,id,height
-fred    ,.00,23.40
-bert    ,1.00,.56
-charlie ,2.00,.  @&t@
-dick    ,3.00,-34.09
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with missing data])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-* This sheet has no data in one of its variables
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /READNAMES=on /SHEET=index 5.
-DISPLAY VARIABLES.
-LIST.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps])
-AT_CHECK([cat pspp.csv], [0], [dnl
-Variable,Description,,Position
-vone,Format: F8.2,,1
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-vtwo,Format: F8.2,,2
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-vthree,Format: A8,,3
-,Measure: Nominal,,
-,Display Alignment: Left,,
-,Display Width: 8,,
-v4,Format: F8.2,,4
-,Measure: Scale,,
-,Display Alignment: Right,,
-,Display Width: 8,,
-
-Table: Data List
-vone,vtwo,vthree,v4
-1.00,3.00,,5.00
-2.00,4.00,,6.00
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with empty sheet])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-* This sheet is empty
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=name 'Empty'.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
-warning: Selected sheet or range of spreadsheet `Book1.gnumeric' is empty.
-])
-AT_CLEANUP
-
-AT_SETUP([GET DATA /TYPE=GNM with nonexistent sheet])
-AT_SKIP_IF([test "$GNM_SUPPORT" = no])
-AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])
-AT_DATA([get-data.sps], [dnl
-* This sheet doesnt exist.
-GET DATA /TYPE=gnm /FILE='Book1.gnumeric' /SHEET=name 'foobarxx'.
-])
-AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
-warning: Selected sheet or range of spreadsheet `Book1.gnumeric' is empty.
-])
-AT_CLEANUP
diff --git a/tests/language/data-io/get-data-spreadsheet.at b/tests/language/data-io/get-data-spreadsheet.at

new file mode 100644 (file)

index 0000000..19b8964
--- /dev/null
+++ b/tests/language/data-io/get-data-spreadsheet.at
@@ -0,0 +1,181 @@
+
+m4_define([SPREADSHEET_TEST_PREP],[dnl
+ m4_if($1,[GNM],[dnl
+    AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])dnl
+    m4_define([testsheet],[Book1.gnumeric])dnl
+    AT_SKIP_IF([test n$GNM_SUPPORT != nyes])dnl
+    ]) dnl
+ m4_if($1,[ODS],[dnl
+    AT_CHECK([cp $top_srcdir/tests/language/data-io/test.ods test.ods])dnl
+    m4_define([testsheet],[test.ods])dnl
+    AT_SKIP_IF([test n$ODF_READ_SUPPORT != nyes])dnl
+    ])dnl
+])
+
+m4_define([CHECK_SPREADSHEET_READER],
+ [dnl
+AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+GET DATA /TYPE=$1 /FILE='testsheet'  /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' .
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+VAR001,Format: F8.2,,1
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+VAR002,Format: A8,,2
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+VAR003,Format: F8.2,,3
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+VAR001,VAR002,VAR003
+.00,fred    ,20.00
+1.00,11      ,21.00
+2.00,twelve  ,22.00
+3.00,13      ,23.00
+4.00,14      ,24.00
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE and READNAMES])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+GET DATA /TYPE=$1 /FILE='testsheet'  /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' .
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+V1,Format: F8.2,,1
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+V2,Format: A8,,2
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+VAR001,Format: F8.2,,3
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+V1,V2,VAR001
+.00,fred    ,20.00
+1.00,11      ,21.00
+2.00,twelve  ,22.00
+3.00,13      ,23.00
+4.00,14      ,24.00
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 without CELLRANGE])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=index 3.
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+name,Format: A8,,1
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+id,Format: F8.2,,2
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+height,Format: F8.2,,3
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+name,id,height
+fred    ,.00,23.40
+bert    ,1.00,.56
+charlie ,2.00,.  @&t@
+dick    ,3.00,-34.09
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with missing data])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+* This sheet has no data in one of its variables
+GET DATA /TYPE=$1 /FILE='testsheet' /READNAMES=on /SHEET=index 5.
+DISPLAY VARIABLES.
+LIST.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps])
+AT_CHECK([cat pspp.csv], [0], [dnl
+Variable,Description,,Position
+vone,Format: F8.2,,1
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+vtwo,Format: F8.2,,2
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+vthree,Format: A8,,3
+,Measure: Nominal,,
+,Display Alignment: Left,,
+,Display Width: 8,,
+v4,Format: F8.2,,4
+,Measure: Scale,,
+,Display Alignment: Right,,
+,Display Width: 8,,
+
+Table: Data List
+vone,vtwo,vthree,v4
+1.00,3.00,,5.00
+2.00,4.00,,6.00
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with empty sheet])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+* This sheet is empty
+GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'Empty'.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
+warning: Selected sheet or range of spreadsheet `testsheet' is empty.
+])
+AT_CLEANUP
+
+AT_SETUP([GET DATA /TYPE=$1 with nonexistent sheet])
+SPREADSHEET_TEST_PREP($1)
+AT_DATA([get-data.sps], [dnl
+* This sheet doesnt exist.
+GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'foobarxx'.
+])
+AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
+warning: Selected sheet or range of spreadsheet `testsheet' is empty.
+])
+AT_CLEANUP
+])
+
+
+AT_BANNER([GET DATA Spreadsheet /TYPE=GNM])
+
+CHECK_SPREADSHEET_READER([GNM])
+
+AT_BANNER([GET DATA Spreadsheet /TYPE=ODS])
+
+CHECK_SPREADSHEET_READER([ODS])
+
diff --git a/tests/language/data-io/test.ods b/tests/language/data-io/test.ods

new file mode 100644 (file)

index 0000000..c079454

Binary files /dev/null and b/tests/language/data-io/test.ods differ
author	John Darrington <john@darrington.wattle.id.au>
	Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
committer	John Darrington <john@darrington.wattle.id.au>
	Fri, 1 Jul 2011 15:27:26 +0000 (17:27 +0200)
configure.ac		patch \| blob \| history
doc/files.texi		patch \| blob \| history
src/data/automake.mk		patch \| blob \| history
src/data/gnumeric-reader.c		patch \| blob \| history
src/data/gnumeric-reader.h		patch \| blob \| history
src/data/ods-reader.c	[new file with mode: 0644]	patch \| blob
src/data/ods-reader.h	[new file with mode: 0644]	patch \| blob
src/data/spreadsheet-reader.c	[new file with mode: 0644]	patch \| blob
src/data/spreadsheet-reader.h	[new file with mode: 0644]	patch \| blob
src/language/data-io/get-data.c		patch \| blob \| history
src/output/automake.mk		patch \| blob \| history
tests/atlocal.in		patch \| blob \| history
tests/automake.mk		patch \| blob \| history
tests/language/data-io/get-data-gnm.at	[deleted file]	patch \| blob \| history
tests/language/data-io/get-data-spreadsheet.at	[new file with mode: 0644]	patch \| blob
tests/language/data-io/test.ods	[new file with mode: 0644]	patch \| blob