Fixed a bug reading gnumeric files. 20130217030503/pspp
authorJohn Darrington <john@darrington.wattle.id.au>
Sat, 16 Feb 2013 13:44:31 +0000 (14:44 +0100)
committerJohn Darrington <john@darrington.wattle.id.au>
Sat, 16 Feb 2013 13:44:31 +0000 (14:44 +0100)
Importing  gnumeric spreadsheets would assert-fail if there were empty columns at the start of the sheet.

src/data/gnumeric-reader.c
tests/language/data-io/get-data-spreadsheet.at

index 29f7ae517f0a8d7543bff3612e8247ac877c43b4..459eeb7e8f5a6787420458be05a2c1faa6e5e747 100644 (file)
@@ -90,6 +90,7 @@ struct gnumeric_reader
   enum reader_state state;
   int row;
   int col;
+  int min_col;
   int node_type;
   int sheet_index;
 
@@ -181,6 +182,7 @@ process_node (struct gnumeric_reader *r)
       if (0 == xmlStrcasecmp (name, _xml("gnm:Cells"))  &&
          XML_READER_TYPE_ELEMENT  == r->node_type)
        {
+         r->min_col = INT_MAX;
          if (! xmlTextReaderIsEmptyElement (r->xtr))
            r->state = STATE_CELLS_START;
        }
@@ -212,6 +214,9 @@ process_node (struct gnumeric_reader *r)
          r->col =  _xmlchar_to_int (attr);
          free (attr);
 
+         if (r->col < r->min_col)
+           r->min_col = r->col;
+
          attr = xmlTextReaderGetAttribute (r->xtr, _xml ("Row"));
          r->row = _xmlchar_to_int (attr);
          free (attr);
@@ -312,7 +317,7 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic
     }
   else
     {
-      r->start_col = 0;
+      r->start_col = -1;
       r->start_row = 0;
       r->stop_col = -1;
       r->stop_row = -1;
@@ -373,11 +378,15 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic
 
       if ( idx  >= n_var_specs )
        {
+         int i;
+         var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
+         for (i = n_var_specs; i <= idx; ++i)
+         {
+           var_spec [i].name = NULL;
+           var_spec [i].width = -1;
+           var_spec [i].first_value = NULL;
+         }
          n_var_specs =  idx + 1 ;
-         var_spec = xrealloc (var_spec, sizeof (*var_spec) * n_var_specs);
-         var_spec [idx].name = NULL;
-         var_spec [idx].width = -1;
-         var_spec [idx].first_value = NULL;
        }
 
       if ( r->node_type == XML_READER_TYPE_TEXT )
@@ -431,6 +440,9 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic
     {
       char *name;
 
+      if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL))
+       continue;
+
       /* Probably no data exists for this variable, so allocate a
         default width */
       if ( var_spec[i].width == -1 )
@@ -455,9 +467,13 @@ gnumeric_open_reader (struct spreadsheet_read_info *gri, struct dictionary **dic
   r->first_case = case_create (r->proto);
   case_set_missing (r->first_case);
 
+  int x = 0;
   for ( i = 0 ; i < n_var_specs ; ++i )
     {
-      const struct variable *var = dict_get_var (r->dict, i);
+      if ( (var_spec[i].name == NULL) && (var_spec[i].first_value == NULL))
+       continue;
+
+      const struct variable *var = dict_get_var (r->dict, x++);
 
       convert_xml_string_to_value (r->first_case, var,
                                   var_spec[i].first_value);
@@ -515,6 +531,9 @@ gnm_file_casereader_read (struct casereader *reader UNUSED, void *r_)
   c = case_create (r->proto);
   case_set_missing (c);
 
+  if (r->start_col == -1)
+    r->start_col = r->min_col;
+
   while ((r->state == STATE_CELL || r->state == STATE_CELLS_START )
         && r->row == current_row && (ret = xmlTextReaderRead (r->xtr)))
     {
index c9060aef5f7a4bf29c0604dab0e2165bca149772..147458f378908ba78404a3a4c5394e77376f8c96 100644 (file)
@@ -322,6 +322,59 @@ VAR001,VAR002,VAR003
 
 AT_CLEANUP
 
+
+dnl Check for a bug where certain gnumeric files failed an assertion
+AT_SETUP([GET DATA /TYPE=GNM assert-fail])
+AT_DATA([read.sps],[dnl
+GET DATA 
+       /TYPE=GNM
+       /FILE='crash.gnumeric' 
+       .
+list.
+])
+
+
+AT_DATA([crash.gnumeric],[dnl
+<?xml version="1.0" encoding="UTF-8"?>
+<gnm:Workbook xmlns:gnm="http://www.gnumeric.org/v10.dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gnumeric.org/v9.xsd">
+  <office:document-meta xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:ooo="http://openoffice.org/2004/office" office:version="1.1">
+  </office:document-meta>
+  <gnm:SheetNameIndex>
+    <gnm:SheetName gnm:Cols="256" gnm:Rows="65536">Sheet1</gnm:SheetName>
+  </gnm:SheetNameIndex>
+  <gnm:Sheets>
+    <gnm:Sheet DisplayFormulas="0" HideZero="0" HideGrid="0" HideColHeader="0" HideRowHeader="0" DisplayOutlines="1" OutlineSymbolsBelow="1" OutlineSymbolsRight="1" Visibility="GNM_SHEET_VISIBILITY_VISIBLE" GridColor="0:0:0">
+      <gnm:Name>Sheet1</gnm:Name>
+      <gnm:MaxCol>2</gnm:MaxCol>
+      <gnm:MaxRow>4</gnm:MaxRow>
+      <gnm:Styles>
+        <gnm:StyleRegion startCol="0" startRow="0" endCol="255" endRow="65535">
+          <gnm:Style HAlign="1" VAlign="2" WrapText="0" ShrinkToFit="0" Rotation="0" Shade="0" Indent="0" Locked="1" Hidden="0" Fore="0:0:0" Back="FFFF:FFFF:FFFF" PatternColor="0:0:0" Format="General">
+          </gnm:Style>
+        </gnm:StyleRegion>
+      </gnm:Styles>
+      <gnm:Cells>
+        <gnm:Cell Row="1" Col="1" ValueType="60">one</gnm:Cell>
+        <gnm:Cell Row="1" Col="2" ValueType="60">two</gnm:Cell>
+        <gnm:Cell Row="2" Col="1" ValueType="40">1</gnm:Cell>
+        <gnm:Cell Row="2" Col="2" ValueType="40">2</gnm:Cell>
+        <gnm:Cell Row="3" Col="1" ValueType="40">1</gnm:Cell>
+        <gnm:Cell Row="3" Col="2" ValueType="40">2</gnm:Cell>
+        <gnm:Cell Row="4" Col="1" ValueType="40">1</gnm:Cell>
+        <gnm:Cell Row="4" Col="2" ValueType="40">2</gnm:Cell>
+      </gnm:Cells>
+    </gnm:Sheet>
+  </gnm:Sheets>
+</gnm:Workbook>
+])
+
+AT_CHECK([pspp -O format=csv read.sps], [0], [ignore])
+
+
+AT_CLEANUP
+
+
+
 AT_BANNER([GET DATA Spreadsheet /TYPE=ODS])
 
 CHECK_SPREADSHEET_READER([ODS])