Correct URL in documentation.

[pspp] / src / data / gnumeric-reader.c
diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c

index 3b82ffc1672a04de9a693b884fc66c0c8474585d..0ef8e4e14e431ce3b049d6a823b02d32ac0394f5 100644 (file)
--- a/src/data/gnumeric-reader.c
+++ b/src/data/gnumeric-reader.c
@@ -1,5 +1,5 @@
  /* PSPP - a program for statistical analysis.
-   Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013  Free Software Foundation, Inc.
+   Copyright (C) 2007, 2009, 2010, 2011, 2012, 2013, 2016  Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -28,16 +28,42 @@
  
  #include "spreadsheet-reader.h"
  
-#if !GNM_SUPPORT
+#if !GNM_READ_SUPPORT
  
-struct casereader *
-gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictionary **dict)
+struct spreadsheet *
+gnumeric_probe (const char *filename, bool report_errors)
+{
+  if (report_errors)
+    msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric");
+
+  return NULL;
+}
+
+const char *
+gnumeric_get_sheet_name (struct spreadsheet *s, int n)
+{
+  return NULL;
+}
+
+char *
+gnumeric_get_sheet_range (struct spreadsheet *s, int n)
  {
-  msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "Gnumeric");
+  return NULL;
+}
  
+struct casereader *
+gnumeric_make_reader (struct spreadsheet *spreadsheet,
+                     const struct spreadsheet_read_options *opts)
+{
    return NULL;
  }
  
+void
+gnumeric_unref (struct spreadsheet *r)
+{
+}
+
+
  #else
  
  #include "data/gnumeric-reader.h"
@@ -48,6 +74,8 @@ gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictio
  #include <libxml/xmlreader.h>
  #include <zlib.h>
  
+#include "data/format.h"
+#include "data/data-in.h"
  #include "data/case.h"
  #include "data/casereader-provider.h"
  #include "data/dictionary.h"
@@ -59,6 +87,25 @@ gnumeric_open_reader (const struct spreadsheet_read_options *opts, struct dictio
  
  #include "gl/xalloc.h"
  
+
+/* Shamelessly lifted from the Gnumeric sources:
+   https://git.gnome.org/browse/gnumeric/tree/src/value.h
+ */
+enum gnm_value_type
+{
+  VALUE_EMPTY   = 10,
+  VALUE_BOOLEAN = 20,
+  VALUE_INTEGER = 30, /* Note, this was removed from gnumeric in 2006 - old versions may of
+                        course still be around. New ones are supposed to use float.*/
+  VALUE_FLOAT   = 40,
+  VALUE_ERROR   = 50,
+  VALUE_STRING  = 60,
+  VALUE_CELLRANGE  = 70,
+  VALUE_ARRAY   = 80
+};
+
+
+
  static void gnm_file_casereader_destroy (struct casereader *, void *);
  
  static struct ccase *gnm_file_casereader_read (struct casereader *, void *);
@@ -100,7 +147,7 @@ struct sheet_detail
    int maxrow;
  };
  
-struct state_data 
+struct state_data
  {
    /* The libxml reader for this instance */
    xmlTextReaderPtr xtr;
@@ -128,7 +175,6 @@ state_data_destroy (struct state_data *sd)
  struct gnumeric_reader
  {
    struct spreadsheet spreadsheet;
-  int ref_cnt;
  
    struct state_data rsd;
    struct state_data msd;
@@ -137,7 +183,7 @@ struct gnumeric_reader
    int stop_col;
    int start_row;
    int stop_row;
-  
+
    struct sheet_detail *sheets;
  
    const xmlChar *target_sheet;
@@ -147,15 +193,17 @@ struct gnumeric_reader
    struct dictionary *dict;
    struct ccase *first_case;
    bool used_first_case;
+
+  enum gnm_value_type vtype;
  };
  
  
  void
-gnumeric_destroy (struct spreadsheet *s)
+gnumeric_unref (struct spreadsheet *s)
  {
    struct gnumeric_reader *r = (struct gnumeric_reader *) s;
  
-  if (0 == --r->ref_cnt)
+  if (0 == --s->ref_cnt)
      {
        int i;
  
@@ -163,10 +211,15 @@ gnumeric_destroy (struct spreadsheet *s)
         {
           xmlFree (r->sheets[i].name);
         }
-    
+
+
        free (r->sheets);
        state_data_destroy (&r->msd);
  
+      dict_destroy (r->dict);
+
+      free (s->file_name);
+
        free (r);
      }
  }
@@ -178,7 +231,7 @@ gnumeric_get_sheet_name (struct spreadsheet *s, int n)
    struct gnumeric_reader *gr = (struct gnumeric_reader *) s;
    assert (n < s->n_sheets);
  
-  return gr->sheets[n].name; 
+  return gr->sheets[n].name;
  }
  
  
@@ -191,19 +244,19 @@ gnumeric_get_sheet_range (struct spreadsheet *s, int n)
  {
    int ret;
    struct gnumeric_reader *gr = (struct gnumeric_reader *) s;
-  
+
    assert (n < s->n_sheets);
  
-  while ( 
+  while (
          (gr->sheets[n].stop_col == -1)
-        && 
+        &&
          (1 == (ret = xmlTextReaderRead (gr->msd.xtr)))
           )
      {
        process_node (gr, &gr->msd);
      }
  
-  return create_cell_ref (
+  return create_cell_range (
                           gr->sheets[n].start_col,
                           gr->sheets[n].start_row,
                           gr->sheets[n].stop_col,
@@ -221,18 +274,13 @@ gnm_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
  
    state_data_destroy (&r->rsd);
  
-#if 0
-  if ( r->rsd.xtr)
-    xmlFreeTextReader (r->rsd.xtr);
-  r->rsd.xtr = NULL;
-
-  if ( ! r->used_first_case )
+  if (r->first_case &&  ! r->used_first_case )
      case_unref (r->first_case);
  
-  caseproto_unref (r->proto);
+  if (r->proto)
+    caseproto_unref (r->proto);
  
-#endif
-  gnumeric_destroy (&r->spreadsheet);
+  gnumeric_unref (&r->spreadsheet);
  }
  
  
@@ -434,7 +482,7 @@ process_node (struct gnumeric_reader *r, struct state_data *sd)
   */
  static void
  convert_xml_string_to_value (struct ccase *c, const struct variable *var,
-                            const xmlChar *xv)
+                            const xmlChar *xv, enum gnm_value_type type, int col, int row)
  {
    union value *v = case_data_rw (c, var);
  
@@ -442,7 +490,7 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var,
      value_set_missing (v, var_get_width (var));
    else if ( var_is_alpha (var))
      value_copy_str_rpad (v, var_get_width (var), xv, ' ');
-  else
+  else if (type == VALUE_FLOAT || type == VALUE_INTEGER)
      {
        const char *text = CHAR_CAST (const char *, xv);
        char *endptr;
@@ -452,6 +500,29 @@ convert_xml_string_to_value (struct ccase *c, const struct variable *var,
        if ( errno != 0 || endptr == text)
         v->f = SYSMIS;
      }
+  else
+    {
+      const char *text = CHAR_CAST (const char *, xv);
+
+      const struct fmt_spec *fmt = var_get_write_format (var);
+
+      char *m = data_in (ss_cstr (text), "UTF-8",
+                        fmt->type,
+                        v,
+                        var_get_width (var),
+                        "UTF-8");
+
+      if (m)
+       {
+         char buf [FMT_STRING_LEN_MAX + 1];
+         char *cell = create_cell_ref (col, row);
+
+         msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"),
+              cell, fmt_to_string (fmt, buf), m);
+         free (cell);
+       }
+      free (m);
+    }
  }
  
  struct var_spec
@@ -459,6 +530,7 @@ struct var_spec
    char *name;
    int width;
    xmlChar *first_value;
+  int first_type;
  };
  
  
@@ -467,7 +539,7 @@ gnumeric_error_handler (void *ctx, const char *mesg,
                         UNUSED xmlParserSeverities sev, xmlTextReaderLocatorPtr loc)
  {
    struct gnumeric_reader *r = ctx;
-       
+
    msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"),
         "Gnumeric",
         r->spreadsheet.file_name,
@@ -477,8 +549,8 @@ gnumeric_error_handler (void *ctx, const char *mesg,
  
  static struct gnumeric_reader *
  gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_errors)
-{  
-  int ret;
+{
+  int ret = -1;
    struct state_data *sd;
  
    xmlTextReaderPtr xtr;
@@ -514,15 +586,15 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro
      {
        r = xzalloc (sizeof *r);
        r->spreadsheet.n_sheets = -1;
-      r->spreadsheet.file_name = filename;
+      r->spreadsheet.file_name = strdup (filename);
        sd = &r->msd;
      }
    else
      {
        sd = &r->rsd;
      }
-  
-  if (show_errors) 
+
+  if (show_errors)
      xmlTextReaderSetErrorHandler (xtr, gnumeric_error_handler, r);
  
    r->target_sheet = NULL;
@@ -531,7 +603,8 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro
    sd->row = sd->col = -1;
    sd->state = STATE_PRE_INIT;
    sd->xtr = xtr;
-  r->ref_cnt++;
+  r->spreadsheet.ref_cnt++;
+
  
    /* Advance to the start of the workbook.
       This gives us some confidence that we are actually dealing with a gnumeric
@@ -547,8 +620,7 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro
    if ( ret != 1)
      {
        /* Does not seem to be a gnumeric file */
-      xmlFreeTextReader (sd->xtr);
-      free (r);
+      gnumeric_unref (&r->spreadsheet);
        return NULL;
      }
  
@@ -561,7 +633,7 @@ gnumeric_reopen (struct gnumeric_reader *r, const char *filename, bool show_erro
  
        if ( XML_CHAR_ENCODING_UTF8 != xce)
         {
-         /* I have been told that ALL gnumeric files are UTF8 encoded.  If that is correct, this 
+         /* I have been told that ALL gnumeric files are UTF8 encoded.  If that is correct, this
              can never happen. */
           msg (MW, _("The gnumeric file `%s' is encoded as %s instead of the usual UTF-8 encoding. "
                      "Any non-ascii characters will be incorrectly imported."),
@@ -587,6 +659,7 @@ struct casereader *
  gnumeric_make_reader (struct spreadsheet *spreadsheet,
                       const struct spreadsheet_read_options *opts)
  {
+  int type = 0;
    int x = 0;
    struct gnumeric_reader *r = NULL;
    unsigned long int vstart = 0;
@@ -598,8 +671,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
  
    r = (struct gnumeric_reader *) (spreadsheet);
  
-  if (r->rsd.row != -1)
-    r = gnumeric_reopen (r, NULL, true);
+  r = gnumeric_reopen (r, NULL, true);
  
    if ( opts->cell_range )
      {
@@ -624,6 +696,8 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
    r->target_sheet_index = opts->sheet_index;
    r->rsd.row = r->rsd.col = -1;
    r->rsd.current_sheet = -1;
+  r->first_case = NULL;
+  r->proto = NULL;
  
    /* Advance to the start of the cells for the target sheet */
    while ( (r->rsd.state != STATE_CELL || r->rsd.row < r->start_row )
@@ -653,6 +727,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
        n_cases --;
      }
  
+
    /* Read in the first row of cells,
       including the headers if read_names was set */
    while (
@@ -661,9 +736,29 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
          )
      {
        int idx;
+
+      if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_TEXT)
+       {
+         xmlChar *attr =
+           xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType"));
+
+         type  =  _xmlchar_to_int (attr);
+
+         xmlFree (attr);
+       }
+
        process_node (r, &r->rsd);
  
-      if ( r->rsd.row > r->start_row ) break;
+      if ( r->rsd.row > r->start_row )
+       {
+         xmlChar *attr =
+           xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType"));
+
+         r->vtype  =  _xmlchar_to_int (attr);
+
+         xmlFree (attr);
+         break;
+       }
  
        if ( r->rsd.col < r->start_col ||
            (r->stop_col != -1 && r->rsd.col > r->stop_col))
@@ -680,10 +775,13 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
             var_spec [i].name = NULL;
             var_spec [i].width = -1;
             var_spec [i].first_value = NULL;
+           var_spec [i].first_type = -1;
           }
           n_var_specs =  idx + 1 ;
         }
  
+      var_spec [idx].first_type = type;
+
        if ( r->rsd.node_type == XML_READER_TYPE_TEXT )
         {
           xmlChar *value = xmlTextReaderValue (r->rsd.xtr);
@@ -715,7 +813,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
               xmlChar *attr =
                 xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType"));
  
-             if ( NULL == attr || 60 !=  _xmlchar_to_int (attr))
+             if ( NULL == attr || VALUE_STRING !=  _xmlchar_to_int (attr))
                 var_spec [idx].width = 0;
  
               free (attr);
@@ -773,7 +871,10 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
        var = dict_get_var (r->dict, x++);
  
        convert_xml_string_to_value (r->first_case, var,
-                                  var_spec[i].first_value);
+                                  var_spec[i].first_value,
+                                  var_spec[i].first_type,
+                                  r->rsd.col + i - 1,
+                                  r->rsd.row - 1);
      }
  
    for ( i = 0 ; i < n_var_specs ; ++i )
@@ -783,7 +884,7 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
      }
  
    free (var_spec);
-  
+
  
    return casereader_create_sequential
      (NULL,
@@ -800,8 +901,6 @@ gnumeric_make_reader (struct spreadsheet *spreadsheet,
      }
  
    free (var_spec);
-  dict_destroy (spreadsheet->dict);
-  spreadsheet->dict = NULL;
  
    gnm_file_casereader_destroy (NULL, r);
  
@@ -832,11 +931,22 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_)
    if (r->start_col == -1)
      r->start_col = r->rsd.min_col;
  
+
    while ((r->rsd.state == STATE_CELL || r->rsd.state == STATE_CELLS_START )
          && r->rsd.row == current_row && (ret = xmlTextReaderRead (r->rsd.xtr)))
      {
        process_node (r, &r->rsd);
  
+      if (r->rsd.state == STATE_CELL && r->rsd.node_type == XML_READER_TYPE_ELEMENT)
+       {
+         xmlChar *attr =
+           xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("ValueType"));
+
+         r->vtype  = _xmlchar_to_int (attr);
+
+         xmlFree (attr);
+       }
+
        if ( r->rsd.col < r->start_col || (r->stop_col != -1 &&
                                      r->rsd.col > r->stop_col))
         continue;
@@ -847,17 +957,17 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_)
        if ( r->stop_row != -1 && r->rsd.row > r->stop_row)
         break;
  
+
        if ( r->rsd.node_type == XML_READER_TYPE_TEXT )
         {
           xmlChar *value = xmlTextReaderValue (r->rsd.xtr);
-
           const int idx = r->rsd.col - r->start_col;
-
           const struct variable *var = dict_get_var (r->dict, idx);
  
-         convert_xml_string_to_value (c, var, value);
+         convert_xml_string_to_value (c, var, value, r->vtype,
+                                      r->rsd.col, r->rsd.row);
  
-         free (value);
+         xmlFree (value);
         }
      }
  
@@ -871,4 +981,4 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_)
  }
  
  
-#endif /* GNM_SUPPORT */
+#endif /* GNM_READ_SUPPORT */