pintos-os.org Git - pspp/blob - src/data/ods-reader.c

   1 /* PSPP - a program for statistical analysis.
   2    Copyright (C) 2011, 2012, 2013, 2016 Free Software Foundation, Inc.
   3
   4    This program is free software: you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation, either version 3 of the License, or
   7    (at your option) any later version.
   8
   9    This program is distributed in the hope that it will be useful,
  10    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12    GNU General Public License for more details.
  13
  14    You should have received a copy of the GNU General Public License
  15    along with this program.  If not, see <http://www.gnu.org/licenses/>. */
  16
  17 #include <config.h>
  18
  19 #include "libpspp/message.h"
  20 #include "libpspp/misc.h"
  21 #include "libpspp/assertion.h"
  22
  23 #include "data/data-in.h"
  24
  25 #include "gl/c-strtod.h"
  26 #include "gl/minmax.h"
  27
  28 #include "gettext.h"
  29 #define _(msgid) gettext (msgid)
  30 #define N_(msgid) (msgid)
  31
  32 #include "ods-reader.h"
  33 #include "spreadsheet-reader.h"
  34
  35 #if !ODF_READ_SUPPORT
  36
  37 struct casereader *
  38 ods_open_reader (const struct spreadsheet_read_options *opts,
  39                  struct dictionary **dict)
  40 {
  41   msg (ME, _("Support for %s files was not compiled into this installation of PSPP"), "OpenDocument");
  42
  43   return NULL;
  44 }
  45
  46 struct casereader *
  47 ods_make_reader (struct spreadsheet *spreadsheet,
  48                  const struct spreadsheet_read_options *opts)
  49 {
  50   return NULL;
  51 }
  52
  53
  54 void
  55 ods_unref (struct spreadsheet *r)
  56 {
  57 }
  58
  59 #else
  60
  61 #include "libpspp/zip-reader.h"
  62
  63
  64 #include <assert.h>
  65 #include <stdbool.h>
  66 #include <errno.h>
  67 #include <libxml/xmlreader.h>
  68 #include <zlib.h>
  69
  70 #include "data/format.h"
  71 #include "data/case.h"
  72 #include "data/casereader-provider.h"
  73 #include "data/dictionary.h"
  74 #include "data/identifier.h"
  75 #include "data/value.h"
  76 #include "data/variable.h"
  77 #include "libpspp/i18n.h"
  78 #include "libpspp/str.h"
  79
  80 #include "gl/xalloc.h"
  81
  82 static void ods_file_casereader_destroy (struct casereader *, void *);
  83 static struct ccase *ods_file_casereader_read (struct casereader *, void *);
  84
  85
  86 static const struct casereader_class ods_file_casereader_class =
  87   {
  88     ods_file_casereader_read,
  89     ods_file_casereader_destroy,
  90     NULL,
  91     NULL,
  92   };
  93
  94 struct sheet_detail
  95 {
  96   /* The name of the sheet (utf8 encoding) */
  97   char *name;
  98
  99   int start_col;
 100   int stop_col;
 101   int start_row;
 102   int stop_row;
 103 };
 104
 105
 106 enum reader_state
 107   {
 108     STATE_INIT = 0,        /* Initial state */
 109     STATE_SPREADSHEET,     /* Found the start of the spreadsheet doc */
 110     STATE_TABLE,           /* Found the sheet that we actually want */
 111     STATE_ROW,             /* Found the start of the cell array */
 112     STATE_CELL,            /* Found a cell */
 113     STATE_CELL_CONTENT     /* Found a the text within a cell */
 114   };
 115
 116 struct state_data
 117 {
 118   xmlTextReaderPtr xtr;
 119   int node_type;
 120   enum reader_state state;
 121   int row;
 122   int col;
 123   int current_sheet;
 124   xmlChar *current_sheet_name;
 125
 126   int col_span;
 127 };
 128
 129 static void
 130 state_data_destroy (struct state_data *sd)
 131 {
 132   xmlFree (sd->current_sheet_name);
 133   sd->current_sheet_name = NULL;
 134
 135   xmlFreeTextReader (sd->xtr);
 136   sd->xtr = NULL;
 137 }
 138
 139 struct ods_reader
 140 {
 141   struct spreadsheet spreadsheet;
 142   struct zip_reader *zreader;
 143
 144   int target_sheet_index;
 145   xmlChar *target_sheet_name;
 146
 147   /* State data for the meta data */
 148   struct state_data msd;
 149
 150   /* State data for the reader */
 151   struct state_data rsd;
 152
 153   int start_row;
 154   int start_col;
 155   int stop_row;
 156   int stop_col;
 157
 158   struct sheet_detail *sheets;
 159   int n_allocated_sheets;
 160
 161   struct caseproto *proto;
 162   struct dictionary *dict;
 163   struct ccase *first_case;
 164   bool used_first_case;
 165   bool read_names;
 166
 167   struct string ods_errs;
 168
 169   struct string zip_errs;
 170 };
 171
 172 void
 173 ods_unref (struct spreadsheet *s)
 174 {
 175   struct ods_reader *r = (struct ods_reader *) s;
 176
 177   if (--s->ref_cnt == 0)
 178     {
 179       int i;
 180
 181       state_data_destroy (&r->msd);
 182       for (i = 0; i < r->n_allocated_sheets; ++i)
 183         {
 184           xmlFree (r->sheets[i].name);
 185         }
 186
 187       dict_destroy (r->dict);
 188
 189       zip_reader_destroy (r->zreader);
 190       free (r->sheets);
 191       free (s->file_name);
 192       free (r);
 193     }
 194 }
 195
 196
 197
 198 static bool
 199 reading_target_sheet (const struct ods_reader *r, const struct state_data *msd)
 200 {
 201   if (r->target_sheet_name != NULL)
 202     {
 203       if ( 0 == xmlStrcmp (r->target_sheet_name, msd->current_sheet_name))
 204         return true;
 205     }
 206
 207   if (r->target_sheet_index == msd->current_sheet + 1)
 208     return true;
 209
 210   return false;
 211 }
 212
 213
 214 static void process_node (struct ods_reader *or, struct state_data *r);
 215
 216
 217 const char *
 218 ods_get_sheet_name (struct spreadsheet *s, int n)
 219 {
 220   struct ods_reader *r = (struct ods_reader *) s;
 221   struct state_data *or = &r->msd;
 222
 223   assert (n < s->n_sheets);
 224
 225   while (
 226           (r->n_allocated_sheets <= n)
 227           || or->state != STATE_SPREADSHEET
 228           )
 229     {
 230       int ret = xmlTextReaderRead (or->xtr);
 231       if ( ret != 1)
 232         break;
 233
 234       process_node (r, or);
 235     }
 236
 237   return r->sheets[n].name;
 238 }
 239
 240 char *
 241 ods_get_sheet_range (struct spreadsheet *s, int n)
 242 {
 243   struct ods_reader *r = (struct ods_reader *) s;
 244   struct state_data *or = &r->msd;
 245
 246   assert (n < s->n_sheets);
 247
 248   while (
 249           (r->n_allocated_sheets <= n)
 250           || (r->sheets[n].stop_row == -1)
 251           || or->state != STATE_SPREADSHEET
 252           )
 253     {
 254       int ret = xmlTextReaderRead (or->xtr);
 255       if ( ret != 1)
 256         break;
 257
 258       process_node (r, or);
 259     }
 260
 261   return create_cell_range (
 262                           r->sheets[n].start_col,
 263                           r->sheets[n].start_row,
 264                           r->sheets[n].stop_col,
 265                           r->sheets[n].stop_row);
 266 }
 267
 268
 269 static void
 270 ods_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
 271 {
 272   struct ods_reader *r = r_;
 273   if ( r == NULL)
 274     return ;
 275
 276   state_data_destroy (&r->rsd);
 277
 278   if ( ! ds_is_empty (&r->ods_errs))
 279     msg (ME, "%s", ds_cstr (&r->ods_errs));
 280
 281   ds_destroy (&r->ods_errs);
 282
 283   if ( r->first_case && ! r->used_first_case )
 284     case_unref (r->first_case);
 285
 286
 287   caseproto_unref (r->proto);
 288   r->proto = NULL;
 289
 290   xmlFree (r->target_sheet_name);
 291   r->target_sheet_name = NULL;
 292
 293
 294   ods_unref (&r->spreadsheet);
 295 }
 296
 297
 298
 299
 300
 301 static void
 302 process_node (struct ods_reader *or, struct state_data *r)
 303 {
 304   xmlChar *name = xmlTextReaderName (r->xtr);
 305   if (name == NULL)
 306     name = xmlStrdup (_xml ("--"));
 307
 308
 309   r->node_type = xmlTextReaderNodeType (r->xtr);
 310
 311   switch (r->state)
 312     {
 313     case STATE_INIT:
 314       if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
 315           XML_READER_TYPE_ELEMENT  == r->node_type)
 316         {
 317           r->state = STATE_SPREADSHEET;
 318           r->current_sheet = -1;
 319           r->current_sheet_name = NULL;
 320         }
 321       break;
 322     case STATE_SPREADSHEET:
 323       if (0 == xmlStrcasecmp (name, _xml("table:table"))
 324           &&
 325           (XML_READER_TYPE_ELEMENT == r->node_type))
 326         {
 327           xmlFree (r->current_sheet_name);
 328           r->current_sheet_name = xmlTextReaderGetAttribute (r->xtr, _xml ("table:name"));
 329
 330           ++r->current_sheet;
 331
 332           if (r->current_sheet >= or->n_allocated_sheets)
 333             {
 334               assert (r->current_sheet == or->n_allocated_sheets);
 335               or->sheets = xrealloc (or->sheets, sizeof (*or->sheets) * ++or->n_allocated_sheets);
 336               or->sheets[or->n_allocated_sheets - 1].start_col = -1;
 337               or->sheets[or->n_allocated_sheets - 1].stop_col = -1;
 338               or->sheets[or->n_allocated_sheets - 1].start_row = -1;
 339               or->sheets[or->n_allocated_sheets - 1].stop_row = -1;
 340               or->sheets[or->n_allocated_sheets - 1].name = CHAR_CAST (char *, xmlStrdup (r->current_sheet_name));
 341             }
 342
 343           r->col = 0;
 344           r->row = 0;
 345
 346           r->state = STATE_TABLE;
 347         }
 348       else if (0 == xmlStrcasecmp (name, _xml("office:spreadsheet")) &&
 349                XML_READER_TYPE_ELEMENT  == r->node_type)
 350         {
 351           r->state = STATE_INIT;
 352         }
 353       break;
 354     case STATE_TABLE:
 355       if (0 == xmlStrcasecmp (name, _xml("table:table-row")) &&
 356           (XML_READER_TYPE_ELEMENT  == r->node_type))
 357         {
 358           xmlChar *value =
 359             xmlTextReaderGetAttribute (r->xtr,
 360                                        _xml ("table:number-rows-repeated"));
 361
 362           int row_span = value ? _xmlchar_to_int (value) : 1;
 363
 364           r->row += row_span;
 365           r->col = 0;
 366
 367           if (! xmlTextReaderIsEmptyElement (r->xtr))
 368             r->state = STATE_ROW;
 369
 370           xmlFree (value);
 371         }
 372       else if (0 == xmlStrcasecmp (name, _xml("table:table")) &&
 373                (XML_READER_TYPE_END_ELEMENT  == r->node_type))
 374         {
 375           r->state = STATE_SPREADSHEET;
 376         }
 377       break;
 378     case STATE_ROW:
 379       if ( (0 == xmlStrcasecmp (name, _xml ("table:table-cell")))
 380            &&
 381            (XML_READER_TYPE_ELEMENT  == r->node_type))
 382         {
 383           xmlChar *value =
 384             xmlTextReaderGetAttribute (r->xtr,
 385                                        _xml ("table:number-columns-repeated"));
 386
 387           r->col_span = value ? _xmlchar_to_int (value) : 1;
 388           r->col += r->col_span;
 389
 390           if (! xmlTextReaderIsEmptyElement (r->xtr))
 391             r->state = STATE_CELL;
 392
 393           xmlFree (value);
 394         }
 395       else if ( (0 == xmlStrcasecmp (name, _xml ("table:table-row")))
 396                 &&
 397                 (XML_READER_TYPE_END_ELEMENT  == r->node_type))
 398         {
 399           r->state = STATE_TABLE;
 400         }
 401       break;
 402     case STATE_CELL:
 403       if ( (0 == xmlStrcasecmp (name, _xml("text:p")))
 404             &&
 405            ( XML_READER_TYPE_ELEMENT  == r->node_type))
 406         {
 407           if (! xmlTextReaderIsEmptyElement (r->xtr))
 408             r->state = STATE_CELL_CONTENT;
 409         }
 410       else if
 411         ( (0 == xmlStrcasecmp (name, _xml("table:table-cell")))
 412           &&
 413           (XML_READER_TYPE_END_ELEMENT  == r->node_type)
 414           )
 415         {
 416           r->state = STATE_ROW;
 417         }
 418       break;
 419     case STATE_CELL_CONTENT:
 420       assert (r->current_sheet >= 0);
 421       assert (r->current_sheet < or->n_allocated_sheets);
 422
 423       if (or->sheets[r->current_sheet].start_row == -1)
 424         or->sheets[r->current_sheet].start_row = r->row - 1;
 425
 426       if (
 427           (or->sheets[r->current_sheet].start_col == -1)
 428           ||
 429           (or->sheets[r->current_sheet].start_col >= r->col - 1)
 430            )
 431         or->sheets[r->current_sheet].start_col = r->col - 1;
 432
 433       or->sheets[r->current_sheet].stop_row = r->row - 1;
 434
 435       if ( or->sheets[r->current_sheet].stop_col <  r->col - 1)
 436         or->sheets[r->current_sheet].stop_col = r->col - 1;
 437
 438       if (XML_READER_TYPE_END_ELEMENT  == r->node_type)
 439         r->state = STATE_CELL;
 440       break;
 441     default:
 442       NOT_REACHED ();
 443       break;
 444     };
 445
 446   xmlFree (name);
 447 }
 448
 449 /*
 450    A struct containing the parameters of a cell's value
 451    parsed from the xml
 452 */
 453 struct xml_value
 454 {
 455   xmlChar *type;
 456   xmlChar *value;
 457   xmlChar *text;
 458 };
 459
 460 struct var_spec
 461 {
 462   char *name;
 463   struct xml_value firstval;
 464 };
 465
 466
 467 /* Determine the width that a xmv should probably have */
 468 static int
 469 xmv_to_width (const struct xml_value *xmv, int fallback)
 470 {
 471   int width = SPREADSHEET_DEFAULT_WIDTH;
 472
 473   /* Non-strings always have zero width */
 474   if (xmv->type != NULL && 0 != xmlStrcmp (xmv->type, _xml("string")))
 475     return 0;
 476
 477   if ( fallback != -1)
 478     return fallback;
 479
 480   if ( xmv->value )
 481     width = ROUND_UP (xmlStrlen (xmv->value),
 482                       SPREADSHEET_DEFAULT_WIDTH);
 483   else if ( xmv->text)
 484     width = ROUND_UP (xmlStrlen (xmv->text),
 485                       SPREADSHEET_DEFAULT_WIDTH);
 486
 487   return width;
 488 }
 489
 490 /*
 491    Sets the VAR of case C, to the value corresponding to the xml data
 492  */
 493 static void
 494 convert_xml_to_value (struct ccase *c, const struct variable *var,
 495                       const struct xml_value *xmv, int col, int row)
 496 {
 497   union value *v = case_data_rw (c, var);
 498
 499   if (xmv->value == NULL && xmv->text == NULL)
 500     value_set_missing (v, var_get_width (var));
 501   else if ( var_is_alpha (var))
 502     /* Use the text field, because it seems that there is no
 503        value field for strings */
 504     value_copy_str_rpad (v, var_get_width (var), xmv->text, ' ');
 505   else
 506     {
 507       const struct fmt_spec *fmt = var_get_write_format (var);
 508       enum fmt_category fc  = fmt_get_category (fmt->type);
 509
 510       assert ( fc != FMT_CAT_STRING);
 511
 512       if ( 0 == xmlStrcmp (xmv->type, _xml("float")))
 513         {
 514           v->f = c_strtod (CHAR_CAST (const char *, xmv->value), NULL);
 515         }
 516       else
 517         {
 518           const char *text = xmv->value ?
 519             CHAR_CAST (const char *, xmv->value) : CHAR_CAST (const char *, xmv->text);
 520
 521           char *m = data_in (ss_cstr (text), "UTF-8",
 522                          fmt->type,
 523                          v,
 524                          var_get_width (var),
 525                          "UTF-8");
 526
 527           if (m)
 528             {
 529               char buf [FMT_STRING_LEN_MAX + 1];
 530               char *cell = create_cell_ref (col, row);
 531
 532               msg (MW, _("Cannot convert the value in the spreadsheet cell %s to format (%s): %s"),
 533                    cell, fmt_to_string (fmt, buf), m);
 534               free (cell);
 535             }
 536           free (m);
 537         }
 538     }
 539 }
 540
 541
 542 /* Try to find out how many sheets there are in the "workbook" */
 543 static int
 544 get_sheet_count (struct zip_reader *zreader)
 545 {
 546   xmlTextReaderPtr mxtr;
 547   struct zip_member *meta = NULL;
 548   meta = zip_member_open (zreader, "meta.xml");
 549
 550   if ( meta == NULL)
 551     return -1;
 552
 553   mxtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read,
 554                          (xmlInputCloseCallback) NULL,
 555                          meta,   NULL, NULL, 0);
 556
 557   while (1 == xmlTextReaderRead (mxtr))
 558     {
 559       xmlChar *name = xmlTextReaderName (mxtr);
 560       if ( 0 == xmlStrcmp (name, _xml("meta:document-statistic")))
 561         {
 562           xmlChar *attr = xmlTextReaderGetAttribute (mxtr, _xml ("meta:table-count"));
 563
 564           if ( attr != NULL)
 565             {
 566               int s = _xmlchar_to_int (attr);
 567               xmlFreeTextReader (mxtr);
 568               xmlFree (name);
 569               xmlFree (attr);
 570               return s;
 571             }
 572           xmlFree (attr);
 573         }
 574       xmlFree (name);
 575     }
 576
 577   xmlFreeTextReader (mxtr);
 578   return -1;
 579 }
 580
 581 static void
 582 ods_error_handler (void *ctx, const char *mesg,
 583                         UNUSED xmlParserSeverities sev, xmlTextReaderLocatorPtr loc)
 584 {
 585   struct ods_reader *r = ctx;
 586
 587   msg (MW, _("There was a problem whilst reading the %s file `%s' (near line %d): `%s'"),
 588        "ODF",
 589        r->spreadsheet.file_name,
 590        xmlTextReaderLocatorLineNumber (loc),
 591        mesg);
 592 }
 593
 594
 595 static xmlTextReaderPtr
 596 init_reader (struct ods_reader *r, bool report_errors)
 597 {
 598   struct zip_member *content = zip_member_open (r->zreader, "content.xml");
 599   xmlTextReaderPtr xtr;
 600
 601   if ( content == NULL)
 602     return NULL;
 603
 604   xtr = xmlReaderForIO ((xmlInputReadCallback) zip_member_read,
 605                         (xmlInputCloseCallback) NULL,
 606                         content,   NULL, NULL,
 607                         report_errors ? 0 : (XML_PARSE_NOERROR | XML_PARSE_NOWARNING) );
 608
 609   if ( xtr == NULL)
 610     return false;
 611
 612
 613   r->spreadsheet.type = SPREADSHEET_ODS;
 614
 615   if (report_errors)
 616     xmlTextReaderSetErrorHandler (xtr, ods_error_handler, r);
 617
 618   return xtr;
 619 }
 620
 621
 622
 623 struct spreadsheet *
 624 ods_probe (const char *filename, bool report_errors)
 625 {
 626   int sheet_count;
 627   struct ods_reader *r = xzalloc (sizeof *r);
 628   xmlTextReaderPtr xtr;
 629   struct zip_reader *zr;
 630
 631   ds_init_empty (&r->zip_errs);
 632
 633   zr = zip_reader_create (filename, &r->zip_errs);
 634
 635   if (zr == NULL)
 636     {
 637       if (report_errors)
 638         {
 639           msg (ME, _("Cannot open %s as a OpenDocument file: %s"),
 640                filename, ds_cstr (&r->zip_errs));
 641         }
 642       ds_destroy (&r->zip_errs);
 643       free (r);
 644       return NULL;
 645     }
 646
 647   sheet_count = get_sheet_count (zr);
 648
 649   r->zreader = zr;
 650   r->spreadsheet.ref_cnt = 1;
 651
 652   xtr = init_reader (r, report_errors);
 653   if (xtr == NULL)
 654     {
 655       goto error;
 656     }
 657   r->msd.xtr = xtr;
 658   r->msd.row = 0;
 659   r->msd.col = 0;
 660   r->msd.current_sheet = 0;
 661   r->msd.state = STATE_INIT;
 662
 663
 664   r->spreadsheet.n_sheets = sheet_count;
 665   r->n_allocated_sheets = 0;
 666   r->sheets = NULL;
 667
 668   r->spreadsheet.file_name = strdup (filename);
 669   return &r->spreadsheet;
 670
 671  error:
 672   ds_destroy (&r->zip_errs);
 673   zip_reader_destroy (r->zreader);
 674   free (r);
 675   return NULL;
 676 }
 677
 678 struct casereader *
 679 ods_make_reader (struct spreadsheet *spreadsheet,
 680                  const struct spreadsheet_read_options *opts)
 681 {
 682   intf ret = 0;
 683   xmlChar *type = NULL;
 684   unsigned long int vstart = 0;
 685   casenumber n_cases = CASENUMBER_MAX;
 686   int i;
 687   struct var_spec *var_spec = NULL;
 688   int n_var_specs = 0;
 689   xmlTextReaderPtr xtr;
 690
 691   struct ods_reader *r = (struct ods_reader *) spreadsheet;
 692   xmlChar *val_string = NULL;
 693
 694   assert (r);
 695   r->read_names = opts->read_names;
 696   ds_init_empty (&r->ods_errs);
 697   ++r->spreadsheet.ref_cnt;
 698
 699   xtr = init_reader (r, true);
 700   if ( xtr == NULL)
 701     goto error;
 702
 703   r->rsd.xtr = xtr;
 704   r->rsd.row = 0;
 705   r->rsd.col = 0;
 706   r->rsd.current_sheet = 0;
 707   r->rsd.state = STATE_INIT;
 708
 709   r->used_first_case = false;
 710   r->first_case = NULL;
 711
 712   if (opts->cell_range)
 713     {
 714       if ( ! convert_cell_ref (opts->cell_range,
 715                                &r->start_col, &r->start_row,
 716                                &r->stop_col, &r->stop_row))
 717         {
 718           msg (SE, _("Invalid cell range `%s'"),
 719                opts->cell_range);
 720           goto error;
 721         }
 722     }
 723   else
 724     {
 725       r->start_col = 0;
 726       r->start_row = 0;
 727       r->stop_col = -1;
 728       r->stop_row = -1;
 729     }
 730
 731   r->target_sheet_name = xmlStrdup (BAD_CAST opts->sheet_name);
 732   r->target_sheet_index = opts->sheet_index;
 733
 734   /* Advance to the start of the cells for the target sheet */
 735   while ( ! reading_target_sheet (r, &r->rsd)
 736           || r->rsd.state != STATE_ROW || r->rsd.row <= r->start_row )
 737     {
 738       if (1 != (ret = xmlTextReaderRead (r->rsd.xtr)))
 739            break;
 740
 741       process_node (r, &r->rsd);
 742     }
 743
 744   if (ret < 1)
 745     {
 746       msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
 747            spreadsheet->file_name);
 748       goto error;
 749     }
 750
 751   if ( opts->read_names)
 752     {
 753       while (1 == xmlTextReaderRead (r->rsd.xtr))
 754         {
 755           int idx;
 756
 757           process_node (r, &r->rsd);
 758
 759           /* If the row is finished then stop for now */
 760           if (r->rsd.state == STATE_TABLE && r->rsd.row > r->start_row)
 761             break;
 762
 763           idx = r->rsd.col - r->start_col -1 ;
 764
 765           if ( idx < 0)
 766             continue;
 767
 768           if (r->stop_col != -1 && idx > r->stop_col - r->start_col)
 769             continue;
 770
 771           if (r->rsd.state == STATE_CELL_CONTENT
 772               &&
 773               XML_READER_TYPE_TEXT  == r->rsd.node_type)
 774             {
 775               xmlChar *value = xmlTextReaderValue (r->rsd.xtr);
 776
 777               if ( idx >= n_var_specs)
 778                 {
 779                   var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
 780
 781                   /* xrealloc (unlike realloc) doesn't initialise its memory to 0 */
 782                   memset (var_spec + n_var_specs,
 783                           0,
 784                           (idx - n_var_specs + 1) * sizeof (*var_spec));
 785                   n_var_specs = idx + 1;
 786                 }
 787               var_spec[idx].firstval.text = 0;
 788               var_spec[idx].firstval.value = 0;
 789               var_spec[idx].firstval.type = 0;
 790
 791               var_spec [idx].name = strdup (CHAR_CAST (const char *, value));
 792
 793               xmlFree (value);
 794             }
 795         }
 796     }
 797
 798   /* Read in the first row of data */
 799   while (1 == xmlTextReaderRead (r->rsd.xtr))
 800     {
 801       int idx;
 802       process_node (r, &r->rsd);
 803
 804       if ( ! reading_target_sheet (r, &r->rsd) )
 805         break;
 806
 807       /* If the row is finished then stop for now */
 808       if (r->rsd.state == STATE_TABLE &&
 809           r->rsd.row > r->start_row + (opts->read_names ? 1 : 0))
 810         break;
 811
 812       idx = r->rsd.col - r->start_col - 1;
 813       if (idx < 0)
 814         continue;
 815
 816       if (r->stop_col != -1 && idx > r->stop_col - r->start_col)
 817         continue;
 818
 819       if ( r->rsd.state == STATE_CELL &&
 820            XML_READER_TYPE_ELEMENT  == r->rsd.node_type)
 821         {
 822           type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
 823           val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
 824         }
 825
 826       if ( r->rsd.state == STATE_CELL_CONTENT &&
 827            XML_READER_TYPE_TEXT  == r->rsd.node_type)
 828         {
 829           if (idx >= n_var_specs)
 830             {
 831               var_spec = xrealloc (var_spec, sizeof (*var_spec) * (idx + 1));
 832               memset (var_spec + n_var_specs,
 833                       0,
 834                       (idx - n_var_specs + 1) * sizeof (*var_spec));
 835
 836               var_spec [idx].name = NULL;
 837               n_var_specs = idx + 1;
 838             }
 839
 840           var_spec [idx].firstval.type = type;
 841           var_spec [idx].firstval.text = xmlTextReaderValue (r->rsd.xtr);
 842           var_spec [idx].firstval.value = val_string;
 843
 844           val_string = NULL;
 845           type = NULL;
 846         }
 847     }
 848
 849
 850   /* Create the dictionary and populate it */
 851   r->spreadsheet.dict = r->dict = dict_create (
 852     CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->rsd.xtr)));
 853
 854   for (i = 0; i < n_var_specs ; ++i )
 855     {
 856       struct fmt_spec fmt;
 857       struct variable *var = NULL;
 858       char *name = dict_make_unique_var_name (r->dict, var_spec[i].name, &vstart);
 859       int width  = xmv_to_width (&var_spec[i].firstval, opts->asw);
 860       dict_create_var (r->dict, name, width);
 861       free (name);
 862
 863       var = dict_get_var (r->dict, i);
 864
 865       if ( 0 == xmlStrcmp (var_spec[i].firstval.type, _xml("date")))
 866         {
 867           fmt.type = FMT_DATE;
 868           fmt.d = 0;
 869           fmt.w = 20;
 870         }
 871       else
 872         fmt = fmt_default_for_width (width);
 873
 874       var_set_both_formats (var, &fmt);
 875     }
 876
 877   if ( n_var_specs ==  0 )
 878     {
 879       msg (MW, _("Selected sheet or range of spreadsheet `%s' is empty."),
 880            spreadsheet->file_name);
 881       goto error;
 882     }
 883
 884   /* Create the first case, and cache it */
 885   r->proto = caseproto_ref (dict_get_proto (r->dict));
 886   r->first_case = case_create (r->proto);
 887   case_set_missing (r->first_case);
 888
 889   for (i = 0 ; i < n_var_specs; ++i)
 890     {
 891       const struct variable *var = dict_get_var (r->dict, i);
 892
 893       convert_xml_to_value (r->first_case, var,  &var_spec[i].firstval,
 894                             r->rsd.col - n_var_specs + i,
 895                             r->rsd.row - 1);
 896     }
 897
 898   /* Read in the first row of data */
 899   while (1 == xmlTextReaderRead (r->rsd.xtr))
 900     {
 901       process_node (r, &r->rsd);
 902
 903       if (r->rsd.state == STATE_ROW)
 904         break;
 905     }
 906
 907
 908   for ( i = 0 ; i < n_var_specs ; ++i )
 909     {
 910       free (var_spec[i].firstval.type);
 911       free (var_spec[i].firstval.value);
 912       free (var_spec[i].firstval.text);
 913       free (var_spec[i].name);
 914     }
 915
 916   free (var_spec);
 917
 918
 919   return casereader_create_sequential
 920     (NULL,
 921      r->proto,
 922      n_cases,
 923      &ods_file_casereader_class, r);
 924
 925  error:
 926
 927   for ( i = 0 ; i < n_var_specs ; ++i )
 928     {
 929       free (var_spec[i].firstval.type);
 930       free (var_spec[i].firstval.value);
 931       free (var_spec[i].firstval.text);
 932       free (var_spec[i].name);
 933     }
 934
 935   free (var_spec);
 936
 937   ods_file_casereader_destroy (NULL, r);
 938
 939   return NULL;
 940 }
 941
 942
 943 /* Reads and returns one case from READER's file.  Returns a null
 944    pointer on failure. */
 945 static struct ccase *
 946 ods_file_casereader_read (struct casereader *reader UNUSED, void *r_)
 947 {
 948   struct ccase *c = NULL;
 949   struct ods_reader *r = r_;
 950
 951   xmlChar *val_string = NULL;
 952   xmlChar *type = NULL;
 953
 954   if (!r->used_first_case)
 955     {
 956       r->used_first_case = true;
 957       return r->first_case;
 958     }
 959
 960
 961   /* Advance to the start of a row. (If there is one) */
 962   while (r->rsd.state != STATE_ROW
 963          && 1 == xmlTextReaderRead (r->rsd.xtr)
 964          )
 965     {
 966       process_node (r, &r->rsd);
 967     }
 968
 969
 970   if ( ! reading_target_sheet (r, &r->rsd)
 971        ||  r->rsd.state < STATE_TABLE
 972        ||  (r->stop_row != -1 && r->rsd.row > r->stop_row + 1)
 973        )
 974     {
 975       return NULL;
 976     }
 977
 978   c = case_create (r->proto);
 979   case_set_missing (c);
 980
 981   while (1 == xmlTextReaderRead (r->rsd.xtr))
 982     {
 983       process_node (r, &r->rsd);
 984
 985       if ( r->stop_row != -1 && r->rsd.row > r->stop_row + 1)
 986         break;
 987
 988       if (r->rsd.state == STATE_CELL &&
 989            r->rsd.node_type == XML_READER_TYPE_ELEMENT)
 990         {
 991           type = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value-type"));
 992           val_string = xmlTextReaderGetAttribute (r->rsd.xtr, _xml ("office:value"));
 993         }
 994
 995       if (r->rsd.state == STATE_CELL_CONTENT &&
 996            r->rsd.node_type == XML_READER_TYPE_TEXT)
 997         {
 998           int col;
 999           struct xml_value *xmv = xzalloc (sizeof *xmv);
1000           xmv->text = xmlTextReaderValue (r->rsd.xtr);
1001           xmv->value = val_string;
1002           val_string = NULL;
1003           xmv->type = type;
1004           type = NULL;
1005
1006           for (col = 0; col < r->rsd.col_span; ++col)
1007             {
1008               const struct variable *var;
1009               const int idx = r->rsd.col - col - r->start_col - 1;
1010               if (idx < 0)
1011                 continue;
1012               if (r->stop_col != -1 && idx > r->stop_col - r->start_col )
1013                 break;
1014               if (idx >= dict_get_var_cnt (r->dict))
1015                 break;
1016
1017               var = dict_get_var (r->dict, idx);
1018               convert_xml_to_value (c, var, xmv, idx + r->start_col, r->rsd.row - 1);
1019             }
1020
1021           xmlFree (xmv->text);
1022           xmlFree (xmv->value);
1023           xmlFree (xmv->type);
1024           free (xmv);
1025         }
1026       if ( r->rsd.state <= STATE_TABLE)
1027         break;
1028     }
1029
1030   xmlFree (type);
1031   xmlFree (val_string);
1032
1033   return c;
1034 }
1035 #endif