src/language/data-io/matrix-data.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  18    02110-1301, USA. */
  19
  20 #include <config.h>
  21 #include "message.h"
  22 #include <stdlib.h>
  23 #include <ctype.h>
  24 #include <float.h>
  25 #include "array.h"
  26 #include "alloc.h"
  27 #include "case.h"
  28 #include "command.h"
  29 #include "compiler.h"
  30 #include "data-in.h"
  31 #include "data-reader.h"
  32 #include "dictionary.h"
  33 #include "message.h"
  34 #include "file-handle.h"
  35 #include "lexer.h"
  36 #include "misc.h"
  37 #include "pool.h"
  38 #include "str.h"
  39 #include "variable.h"
  40 #include "procedure.h"
  41
  42 #include "gettext.h"
  43 #define _(msgid) gettext (msgid)
  44
  45 #include "debug-print.h"
  46
  47 /* FIXME: /N subcommand not implemented.  It should be pretty simple,
  48    too. */
  49
  50 /* Different types of variables for MATRIX DATA procedure.  Order is
  51    important: these are used for sort keys. */
  52 enum
  53   {
  54     MXD_SPLIT,                  /* SPLIT FILE variables. */
  55     MXD_ROWTYPE,                /* ROWTYPE_. */
  56     MXD_FACTOR,                 /* Factor variables. */
  57     MXD_VARNAME,                /* VARNAME_. */
  58     MXD_CONTINUOUS,             /* Continuous variables. */
  59
  60     MXD_COUNT
  61   };
  62
  63 /* Format type enums. */
  64 enum format_type
  65   {
  66     LIST,
  67     FREE
  68   };
  69
  70 /* Matrix section enums. */
  71 enum matrix_section
  72   {
  73     LOWER,
  74     UPPER,
  75     FULL
  76   };
  77
  78 /* Diagonal inclusion enums. */
  79 enum include_diagonal
  80   {
  81     DIAGONAL,
  82     NODIAGONAL
  83   };
  84
  85 /* CONTENTS types. */
  86 enum content_type
  87   {
  88     N_VECTOR,
  89     N_SCALAR,
  90     N_MATRIX,
  91     MEAN,
  92     STDDEV,
  93     COUNT,
  94     MSE,
  95     DFE,
  96     MAT,
  97     COV,
  98     CORR,
  99     PROX,
 100
 101     LPAREN,
 102     RPAREN,
 103     EOC
 104   };
 105
 106 /* 0=vector, 1=matrix, 2=scalar. */
 107 static const int content_type[PROX + 1] =
 108   {
 109     0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
 110   };
 111
 112 /* Name of each content type. */
 113 static const char *content_names[PROX + 1] =
 114   {
 115     "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
 116     "DFE", "MAT", "COV", "CORR", "PROX",
 117   };
 118
 119 /* A MATRIX DATA input program. */
 120 struct matrix_data_pgm
 121   {
 122     struct pool *container;     /* Arena used for all allocations. */
 123     struct dfm_reader *reader;  /* Data file to read. */
 124
 125     /* Format. */
 126     enum format_type fmt;       /* LIST or FREE. */
 127     enum matrix_section section;/* LOWER or UPPER or FULL. */
 128     enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */
 129
 130     int explicit_rowtype;       /* ROWTYPE_ specified explicitly in data? */
 131     struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */
 132
 133     struct variable *single_split; /* Single SPLIT FILE variable. */
 134
 135     /* Factor variables.  */
 136     size_t n_factors;           /* Number of factor variables. */
 137     struct variable **factors;  /* Factor variables. */
 138     int is_per_factor[PROX + 1]; /* Is there per-factor data? */
 139
 140     int cells;                  /* Number of cells, or -1 if none. */
 141
 142     int pop_n;                  /* Population N specified by user. */
 143
 144     /* CONTENTS subcommand. */
 145     int contents[EOC * 3 + 1];  /* Contents. */
 146     int n_contents;             /* Number of entries. */
 147
 148     /* Continuous variables. */
 149     int n_continuous;           /* Number of continuous variables. */
 150     int first_continuous;       /* Index into default_dict.var of
 151                                    first continuous variable. */
 152   };
 153
 154 /* Auxiliary data attached to MATRIX DATA variables. */
 155 struct mxd_var
 156   {
 157     int var_type;               /* Variable type. */
 158     int sub_type;               /* Subtype. */
 159   };
 160
 161 static const struct case_source_class matrix_data_with_rowtype_source_class;
 162 static const struct case_source_class matrix_data_without_rowtype_source_class;
 163
 164 static int compare_variables_by_mxd_var_type (const void *pa,
 165                                              const void *pb);
 166 static bool read_matrices_without_rowtype (struct matrix_data_pgm *);
 167 static bool read_matrices_with_rowtype (struct matrix_data_pgm *);
 168 static int string_to_content_type (char *, int *);
 169 static void attach_mxd_aux (struct variable *, int var_type, int sub_type);
 170
 171 int
 172 cmd_matrix_data (void)
 173 {
 174   struct pool *pool;
 175   struct matrix_data_pgm *mx;
 176   struct file_handle *fh = fh_inline_file ();
 177   bool ok;
 178
 179   unsigned seen = 0;
 180
 181   discard_variables ();
 182
 183   pool = pool_create ();
 184   mx = pool_alloc (pool, sizeof *mx);
 185   mx->container = pool;
 186   mx->reader = NULL;
 187   mx->fmt = LIST;
 188   mx->section = LOWER;
 189   mx->diag = DIAGONAL;
 190   mx->explicit_rowtype = 0;
 191   mx->rowtype_ = NULL;
 192   mx->varname_ = NULL;
 193   mx->single_split = NULL;
 194   mx->n_factors = 0;
 195   mx->factors = NULL;
 196   memset (mx->is_per_factor, 0, sizeof mx->is_per_factor);
 197   mx->cells = -1;
 198   mx->pop_n = -1;
 199   mx->n_contents = 0;
 200   mx->n_continuous = 0;
 201   mx->first_continuous = 0;
 202   while (token != '.')
 203     {
 204       lex_match ('/');
 205
 206       if (lex_match_id ("VARIABLES"))
 207         {
 208           char **v;
 209           size_t nv;
 210
 211           if (seen & 1)
 212             {
 213               msg (SE, _("VARIABLES subcommand multiply specified."));
 214               goto lossage;
 215             }
 216           seen |= 1;
 217
 218           lex_match ('=');
 219           if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
 220             goto lossage;
 221
 222           {
 223             size_t i;
 224
 225             for (i = 0; i < nv; i++)
 226               if (!strcasecmp (v[i], "VARNAME_"))
 227                 {
 228                   msg (SE, _("VARNAME_ cannot be explicitly specified on "
 229                              "VARIABLES."));
 230                   for (i = 0; i < nv; i++)
 231                     free (v[i]);
 232                   free (v);
 233                   goto lossage;
 234                 }
 235           }
 236
 237           {
 238             size_t i;
 239
 240             for (i = 0; i < nv; i++)
 241               {
 242                 struct variable *new_var;
 243
 244                 if (strcasecmp (v[i], "ROWTYPE_"))
 245                   {
 246                     new_var = dict_create_var_assert (default_dict, v[i], 0);
 247                     attach_mxd_aux (new_var, MXD_CONTINUOUS, i);
 248                   }
 249                 else
 250                   mx->explicit_rowtype = 1;
 251                 free (v[i]);
 252               }
 253             free (v);
 254           }
 255
 256           mx->rowtype_ = dict_create_var_assert (default_dict,
 257                                                  "ROWTYPE_", 8);
 258           attach_mxd_aux (mx->rowtype_, MXD_ROWTYPE, 0);
 259         }
 260       else if (lex_match_id ("FILE"))
 261         {
 262           lex_match ('=');
 263           fh = fh_parse (FH_REF_FILE | FH_REF_INLINE);
 264           if (fh == NULL)
 265             goto lossage;
 266         }
 267       else if (lex_match_id ("FORMAT"))
 268         {
 269           lex_match ('=');
 270
 271           while (token == T_ID)
 272             {
 273               if (lex_match_id ("LIST"))
 274                 mx->fmt = LIST;
 275               else if (lex_match_id ("FREE"))
 276                 mx->fmt = FREE;
 277               else if (lex_match_id ("LOWER"))
 278                 mx->section = LOWER;
 279               else if (lex_match_id ("UPPER"))
 280                 mx->section = UPPER;
 281               else if (lex_match_id ("FULL"))
 282                 mx->section = FULL;
 283               else if (lex_match_id ("DIAGONAL"))
 284                 mx->diag = DIAGONAL;
 285               else if (lex_match_id ("NODIAGONAL"))
 286                 mx->diag = NODIAGONAL;
 287               else
 288                 {
 289                   lex_error (_("in FORMAT subcommand"));
 290                   goto lossage;
 291                 }
 292             }
 293         }
 294       else if (lex_match_id ("SPLIT"))
 295         {
 296           lex_match ('=');
 297
 298           if (seen & 2)
 299             {
 300               msg (SE, _("SPLIT subcommand multiply specified."));
 301               goto lossage;
 302             }
 303           seen |= 2;
 304
 305           if (token != T_ID)
 306             {
 307               lex_error (_("in SPLIT subcommand"));
 308               goto lossage;
 309             }
 310
 311           if (dict_lookup_var (default_dict, tokid) == NULL
 312               && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
 313             {
 314               if (!strcasecmp (tokid, "ROWTYPE_")
 315                   || !strcasecmp (tokid, "VARNAME_"))
 316                 {
 317                   msg (SE, _("Split variable may not be named ROWTYPE_ "
 318                              "or VARNAME_."));
 319                   goto lossage;
 320                 }
 321
 322               mx->single_split = dict_create_var_assert (default_dict,
 323                                                          tokid, 0);
 324               attach_mxd_aux (mx->single_split, MXD_CONTINUOUS, 0);
 325               lex_get ();
 326
 327               dict_set_split_vars (default_dict, &mx->single_split, 1);
 328             }
 329           else
 330             {
 331               struct variable **split;
 332               size_t n;
 333
 334               if (!parse_variables (default_dict, &split, &n, PV_NO_DUPLICATE))
 335                 goto lossage;
 336
 337               dict_set_split_vars (default_dict, split, n);
 338             }
 339
 340           {
 341             struct variable *const *split = dict_get_split_vars (default_dict);
 342             size_t split_cnt = dict_get_split_cnt (default_dict);
 343             int i;
 344
 345             for (i = 0; i < split_cnt; i++)
 346               {
 347                 struct mxd_var *mv = split[i]->aux;
 348                 assert (mv != NULL);
 349                 if (mv->var_type != MXD_CONTINUOUS)
 350                   {
 351                     msg (SE, _("Split variable %s is already another type."),
 352                          tokid);
 353                     goto lossage;
 354                   }
 355                 var_clear_aux (split[i]);
 356                 attach_mxd_aux (split[i], MXD_SPLIT, i);
 357               }
 358           }
 359         }
 360       else if (lex_match_id ("FACTORS"))
 361         {
 362           lex_match ('=');
 363
 364           if (seen & 4)
 365             {
 366               msg (SE, _("FACTORS subcommand multiply specified."));
 367               goto lossage;
 368             }
 369           seen |= 4;
 370
 371           if (!parse_variables (default_dict, &mx->factors, &mx->n_factors,
 372                                 PV_NONE))
 373             goto lossage;
 374
 375           {
 376             size_t i;
 377
 378             for (i = 0; i < mx->n_factors; i++)
 379               {
 380                 struct variable *v = mx->factors[i];
 381                 struct mxd_var *mv = v->aux;
 382                 assert (mv != NULL);
 383                 if (mv->var_type != MXD_CONTINUOUS)
 384                   {
 385                     msg (SE, _("Factor variable %s is already another type."),
 386                          tokid);
 387                     goto lossage;
 388                   }
 389                 var_clear_aux (v);
 390                 attach_mxd_aux (v, MXD_FACTOR, i);
 391               }
 392           }
 393         }
 394       else if (lex_match_id ("CELLS"))
 395         {
 396           lex_match ('=');
 397
 398           if (mx->cells != -1)
 399             {
 400               msg (SE, _("CELLS subcommand multiply specified."));
 401               goto lossage;
 402             }
 403
 404           if (!lex_is_integer () || lex_integer () < 1)
 405             {
 406               lex_error (_("expecting positive integer"));
 407               goto lossage;
 408             }
 409
 410           mx->cells = lex_integer ();
 411           lex_get ();
 412         }
 413       else if (lex_match_id ("N"))
 414         {
 415           lex_match ('=');
 416
 417           if (mx->pop_n != -1)
 418             {
 419               msg (SE, _("N subcommand multiply specified."));
 420               goto lossage;
 421             }
 422
 423           if (!lex_is_integer () || lex_integer () < 1)
 424             {
 425               lex_error (_("expecting positive integer"));
 426               goto lossage;
 427             }
 428
 429           mx->pop_n = lex_integer ();
 430           lex_get ();
 431         }
 432       else if (lex_match_id ("CONTENTS"))
 433         {
 434           int inside_parens = 0;
 435           unsigned collide = 0;
 436           int item;
 437
 438           if (seen & 8)
 439             {
 440               msg (SE, _("CONTENTS subcommand multiply specified."));
 441               goto lossage;
 442             }
 443           seen |= 8;
 444
 445           lex_match ('=');
 446
 447           {
 448             int i;
 449
 450             for (i = 0; i <= PROX; i++)
 451               mx->is_per_factor[i] = 0;
 452           }
 453
 454           for (;;)
 455             {
 456               if (lex_match ('('))
 457                 {
 458                   if (inside_parens)
 459                     {
 460                       msg (SE, _("Nested parentheses not allowed."));
 461                       goto lossage;
 462                     }
 463                   inside_parens = 1;
 464                   item = LPAREN;
 465                 }
 466               else if (lex_match (')'))
 467                 {
 468                   if (!inside_parens)
 469                     {
 470                       msg (SE, _("Mismatched right parenthesis (`(')."));
 471                       goto lossage;
 472                     }
 473                   if (mx->contents[mx->n_contents - 1] == LPAREN)
 474                     {
 475                       msg (SE, _("Empty parentheses not allowed."));
 476                       goto lossage;
 477                     }
 478                   inside_parens = 0;
 479                   item = RPAREN;
 480                 }
 481               else
 482                 {
 483                   int content_type;
 484                   int collide_index;
 485
 486                   if (token != T_ID)
 487                     {
 488                       lex_error (_("in CONTENTS subcommand"));
 489                       goto lossage;
 490                     }
 491
 492                   content_type = string_to_content_type (tokid,
 493                                                          &collide_index);
 494                   if (content_type == -1)
 495                     {
 496                       lex_error (_("in CONTENTS subcommand"));
 497                       goto lossage;
 498                     }
 499                   lex_get ();
 500
 501                   if (collide & (1 << collide_index))
 502                     {
 503                       msg (SE, _("Content multiply specified for %s."),
 504                            content_names[content_type]);
 505                       goto lossage;
 506                     }
 507                   collide |= (1 << collide_index);
 508
 509                   item = content_type;
 510                   mx->is_per_factor[item] = inside_parens;
 511                 }
 512               mx->contents[mx->n_contents++] = item;
 513
 514               if (token == '/' || token == '.')
 515                 break;
 516             }
 517
 518           if (inside_parens)
 519             {
 520               msg (SE, _("Missing right parenthesis."));
 521               goto lossage;
 522             }
 523           mx->contents[mx->n_contents] = EOC;
 524         }
 525       else
 526         {
 527           lex_error (NULL);
 528           goto lossage;
 529         }
 530     }
 531
 532   if (token != '.')
 533     {
 534       lex_error (_("expecting end of command"));
 535       goto lossage;
 536     }
 537
 538   if (!(seen & 1))
 539     {
 540       msg (SE, _("Missing VARIABLES subcommand."));
 541       goto lossage;
 542     }
 543
 544   if (!mx->n_contents && !mx->explicit_rowtype)
 545     {
 546       msg (SW, _("CONTENTS subcommand not specified: assuming file "
 547                  "contains only CORR matrix."));
 548
 549       mx->contents[0] = CORR;
 550       mx->contents[1] = EOC;
 551       mx->n_contents = 0;
 552     }
 553
 554   if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1)
 555     {
 556       msg (SE, _("Missing CELLS subcommand.  CELLS is required "
 557                  "when ROWTYPE_ is not given in the data and "
 558                  "factors are present."));
 559       goto lossage;
 560     }
 561
 562   if (mx->explicit_rowtype && mx->single_split)
 563     {
 564       msg (SE, _("Split file values must be present in the data when "
 565                  "ROWTYPE_ is present."));
 566       goto lossage;
 567     }
 568
 569   /* Create VARNAME_. */
 570   mx->varname_ = dict_create_var_assert (default_dict, "VARNAME_", 8);
 571   attach_mxd_aux (mx->varname_, MXD_VARNAME, 0);
 572
 573   /* Sort the dictionary variables into the desired order for the
 574      system file output. */
 575   {
 576     struct variable **v;
 577     size_t nv;
 578
 579     dict_get_vars (default_dict, &v, &nv, 0);
 580     qsort (v, nv, sizeof *v, compare_variables_by_mxd_var_type);
 581     dict_reorder_vars (default_dict, v, nv);
 582     free (v);
 583   }
 584
 585   /* Set formats. */
 586   {
 587     static const struct fmt_spec fmt_tab[MXD_COUNT] =
 588       {
 589         {FMT_F, 4, 0},
 590         {FMT_A, 8, 0},
 591         {FMT_F, 4, 0},
 592         {FMT_A, 8, 0},
 593         {FMT_F, 10, 4},
 594       };
 595
 596     int i;
 597
 598     mx->first_continuous = -1;
 599     for (i = 0; i < dict_get_var_cnt (default_dict); i++)
 600       {
 601         struct variable *v = dict_get_var (default_dict, i);
 602         struct mxd_var *mv = v->aux;
 603         int type = mv->var_type;
 604
 605         assert (type >= 0 && type < MXD_COUNT);
 606         v->print = v->write = fmt_tab[type];
 607
 608         if (type == MXD_CONTINUOUS)
 609           mx->n_continuous++;
 610         if (mx->first_continuous == -1 && type == MXD_CONTINUOUS)
 611           mx->first_continuous = i;
 612       }
 613   }
 614
 615   if (mx->n_continuous == 0)
 616     {
 617       msg (SE, _("No continuous variables specified."));
 618       goto lossage;
 619     }
 620
 621   mx->reader = dfm_open_reader (fh);
 622   if (mx->reader == NULL)
 623     goto lossage;
 624
 625   if (mx->explicit_rowtype)
 626     ok = read_matrices_with_rowtype (mx);
 627   else
 628     ok = read_matrices_without_rowtype (mx);
 629
 630   dfm_close_reader (mx->reader);
 631
 632   pool_destroy (mx->container);
 633
 634   return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE;
 635
 636 lossage:
 637   discard_variables ();
 638   free (mx->factors);
 639   pool_destroy (mx->container);
 640   return CMD_CASCADING_FAILURE;
 641 }
 642
 643 /* Look up string S as a content-type name and return the
 644    corresponding enumerated value, or -1 if there is no match.  If
 645    COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
 646    as a bit-index) which can be used for determining whether a related
 647    statistic has already been used. */
 648 static int
 649 string_to_content_type (char *s, int *collide)
 650 {
 651   static const struct
 652     {
 653       int value;
 654       int collide;
 655       const char *string;
 656     }
 657   *tp,
 658   tab[] =
 659     {
 660       {N_VECTOR, 0, "N_VECTOR"},
 661       {N_VECTOR, 0, "N"},
 662       {N_SCALAR, 0, "N_SCALAR"},
 663       {N_MATRIX, 1, "N_MATRIX"},
 664       {MEAN, 2, "MEAN"},
 665       {STDDEV, 3, "STDDEV"},
 666       {STDDEV, 3, "SD"},
 667       {COUNT, 4, "COUNT"},
 668       {MSE, 5, "MSE"},
 669       {DFE, 6, "DFE"},
 670       {MAT, 7, "MAT"},
 671       {COV, 8, "COV"},
 672       {CORR, 9, "CORR"},
 673       {PROX, 10, "PROX"},
 674       {-1, -1, NULL},
 675     };
 676
 677   for (tp = tab; tp->value != -1; tp++)
 678     if (!strcasecmp (s, tp->string))
 679       {
 680         if (collide)
 681           *collide = tp->collide;
 682
 683         return tp->value;
 684       }
 685   return -1;
 686 }
 687
 688 /* Compare two variables using p.mxd.var_type and p.mxd.sub_type
 689    fields. */
 690 static int
 691 compare_variables_by_mxd_var_type (const void *a_, const void *b_)
 692 {
 693   struct variable *const *pa = a_;
 694   struct variable *const *pb = b_;
 695   const struct mxd_var *a = (*pa)->aux;
 696   const struct mxd_var *b = (*pb)->aux;
 697
 698   if (a->var_type != b->var_type)
 699     return a->var_type > b->var_type ? 1 : -1;
 700   else
 701     return a->sub_type < b->sub_type ? -1 : a->sub_type > b->sub_type;
 702 }
 703
 704 /* Attaches a struct mxd_var with the specific member values to
 705    V. */
 706 static void
 707 attach_mxd_aux (struct variable *v, int var_type, int sub_type)
 708 {
 709   struct mxd_var *mv;
 710
 711   assert (v->aux == NULL);
 712   mv = xmalloc (sizeof *mv);
 713   mv->var_type = var_type;
 714   mv->sub_type = sub_type;
 715   var_attach_aux (v, mv, var_dtor_free);
 716 }
 717 \f
 718 /* Matrix tokenizer. */
 719
 720 /* Matrix token types. */
 721 enum matrix_token_type
 722   {
 723     MNUM,               /* Number. */
 724     MSTR                /* String. */
 725   };
 726
 727 /* A MATRIX DATA parsing token. */
 728 struct matrix_token
 729   {
 730     enum matrix_token_type type;
 731     double number;       /* MNUM: token value. */
 732     char *string;        /* MSTR: token string; not null-terminated. */
 733     int length;          /* MSTR: tokstr length. */
 734   };
 735
 736 static int mget_token (struct matrix_token *, struct dfm_reader *);
 737
 738 #if DEBUGGING
 739 #define mget_token(TOKEN, READER) mget_token_dump(TOKEN, READER)
 740
 741 static void
 742 mdump_token (const struct matrix_token *token)
 743 {
 744   switch (token->type)
 745     {
 746     case MNUM:
 747       printf (" #%g", token->number);
 748       break;
 749     case MSTR:
 750       printf (" '%.*s'", token->length, token->string);
 751       break;
 752     default:
 753       assert (0);
 754     }
 755   fflush (stdout);
 756 }
 757
 758 static int
 759 mget_token_dump (struct matrix_token *token, struct dfm_reader *reader)
 760 {
 761   int result = (mget_token) (token, reader);
 762   mdump_token (token);
 763   return result;
 764 }
 765 #endif
 766
 767 /* Return the current position in READER. */
 768 static const char *
 769 context (struct dfm_reader *reader)
 770 {
 771   static char buf[32];
 772
 773   if (dfm_eof (reader))
 774     strcpy (buf, "at end of file");
 775   else
 776     {
 777       struct fixed_string line;
 778       const char *sp;
 779
 780       dfm_get_record (reader, &line);
 781       sp = ls_c_str (&line);
 782       while (sp < ls_end (&line) && isspace ((unsigned char) *sp))
 783         sp++;
 784       if (sp >= ls_end (&line))
 785         strcpy (buf, "at end of line");
 786       else
 787         {
 788           char *dp;
 789           size_t copy_cnt = 0;
 790
 791           dp = stpcpy (buf, "before `");
 792           while (sp < ls_end (&line) && !isspace ((unsigned char) *sp)
 793                  && copy_cnt < 10)
 794             {
 795               *dp++ = *sp++;
 796               copy_cnt++;
 797             }
 798           strcpy (dp, "'");
 799         }
 800     }
 801
 802   return buf;
 803 }
 804
 805 /* Is there at least one token left in the data file? */
 806 static int
 807 another_token (struct dfm_reader *reader)
 808 {
 809   for (;;)
 810     {
 811       struct fixed_string line;
 812       const char *cp;
 813
 814       if (dfm_eof (reader))
 815         return 0;
 816       dfm_get_record (reader, &line);
 817
 818       cp = ls_c_str (&line);
 819       while (isspace ((unsigned char) *cp) && cp < ls_end (&line))
 820         cp++;
 821
 822       if (cp < ls_end (&line))
 823         {
 824           dfm_forward_columns (reader, cp - ls_c_str (&line));
 825           return 1;
 826         }
 827
 828       dfm_forward_record (reader);
 829     }
 830 }
 831
 832 /* Parse a MATRIX DATA token from READER into TOKEN. */
 833 static int
 834 (mget_token) (struct matrix_token *token, struct dfm_reader *reader)
 835 {
 836   struct fixed_string line;
 837   int first_column;
 838   char *cp;
 839
 840   if (!another_token (reader))
 841     return 0;
 842
 843   dfm_get_record (reader, &line);
 844   first_column = dfm_column_start (reader);
 845
 846   /* Three types of fields: quoted with ', quoted with ", unquoted. */
 847   cp = ls_c_str (&line);
 848   if (*cp == '\'' || *cp == '"')
 849     {
 850       int quote = *cp;
 851
 852       token->type = MSTR;
 853       token->string = ++cp;
 854       while (cp < ls_end (&line) && *cp != quote)
 855         cp++;
 856       token->length = cp - token->string;
 857       if (cp < ls_end (&line))
 858         cp++;
 859       else
 860         msg (SW, _("Scope of string exceeds line."));
 861     }
 862   else
 863     {
 864       int is_num = isdigit ((unsigned char) *cp) || *cp == '.';
 865
 866       token->string = cp++;
 867       while (cp < ls_end (&line)
 868              && !isspace ((unsigned char) *cp) && *cp != ','
 869              && *cp != '-' && *cp != '+')
 870         {
 871           if (isdigit ((unsigned char) *cp))
 872             is_num = 1;
 873
 874           if ((tolower ((unsigned char) *cp) == 'd'
 875                || tolower ((unsigned char) *cp) == 'e')
 876               && (cp[1] == '+' || cp[1] == '-'))
 877             cp += 2;
 878           else
 879             cp++;
 880         }
 881
 882       token->length = cp - token->string;
 883       assert (token->length);
 884
 885       if (is_num)
 886         {
 887           struct data_in di;
 888
 889           di.s = token->string;
 890           di.e = token->string + token->length;
 891           di.v = (union value *) &token->number;
 892           di.f1 = first_column;
 893           di.format = make_output_format (FMT_F, token->length, 0);
 894
 895           if (!data_in (&di))
 896             return 0;
 897         }
 898       else
 899         token->type = MSTR;
 900     }
 901
 902   dfm_forward_columns (reader, cp - ls_c_str (&line));
 903
 904   return 1;
 905 }
 906
 907 /* Forcibly skip the end of a line for content type CONTENT in
 908    READER. */
 909 static int
 910 force_eol (struct dfm_reader *reader, const char *content)
 911 {
 912   struct fixed_string line;
 913   const char *cp;
 914
 915   if (dfm_eof (reader))
 916     return 0;
 917   dfm_get_record (reader, &line);
 918
 919   cp = ls_c_str (&line);
 920   while (isspace ((unsigned char) *cp) && cp < ls_end (&line))
 921     cp++;
 922
 923   if (cp < ls_end (&line))
 924     {
 925       msg (SE, _("End of line expected %s while reading %s."),
 926            context (reader), content);
 927       return 0;
 928     }
 929
 930   dfm_forward_record (reader);
 931   return 1;
 932 }
 933 \f
 934 /* Back end, omitting ROWTYPE_. */
 935
 936 struct nr_aux_data
 937   {
 938     struct matrix_data_pgm *mx; /* MATRIX DATA program. */
 939     double ***data;             /* MATRIX DATA data. */
 940     double *factor_values;      /* Factor values. */
 941     int max_cell_idx;           /* Max-numbered cell that we have
 942                                    read so far, plus one. */
 943     double *split_values;       /* SPLIT FILE variable values. */
 944   };
 945
 946 static int nr_read_splits (struct nr_aux_data *, int compare);
 947 static int nr_read_factors (struct nr_aux_data *, int cell);
 948 static bool nr_output_data (struct nr_aux_data *, struct ccase *,
 949                             write_case_func *, write_case_data);
 950 static bool matrix_data_read_without_rowtype (struct case_source *source,
 951                                               struct ccase *,
 952                                               write_case_func *,
 953                                               write_case_data);
 954
 955 /* Read from the data file and write it to the active file.
 956    Returns true if successful, false if an I/O error occurred. */
 957 static bool
 958 read_matrices_without_rowtype (struct matrix_data_pgm *mx)
 959 {
 960   struct nr_aux_data nr;
 961   bool ok;
 962
 963   if (mx->cells == -1)
 964     mx->cells = 1;
 965
 966   nr.mx = mx;
 967   nr.data = NULL;
 968   nr.factor_values = xnmalloc (mx->n_factors * mx->cells,
 969                                sizeof *nr.factor_values);
 970   nr.max_cell_idx = 0;
 971   nr.split_values = xnmalloc (dict_get_split_cnt (default_dict),
 972                               sizeof *nr.split_values);
 973
 974   vfm_source = create_case_source (&matrix_data_without_rowtype_source_class, &nr);
 975
 976   ok = procedure (NULL, NULL);
 977
 978   free (nr.split_values);
 979   free (nr.factor_values);
 980
 981   return ok;
 982 }
 983
 984 /* Mirror data across the diagonal of matrix CP which contains
 985    CONTENT type data. */
 986 static void
 987 fill_matrix (struct matrix_data_pgm *mx, int content, double *cp)
 988 {
 989   int type = content_type[content];
 990
 991   if (type == 1 && mx->section != FULL)
 992     {
 993       if (mx->diag == NODIAGONAL)
 994         {
 995           const double fill = content == CORR ? 1.0 : SYSMIS;
 996           int i;
 997
 998           for (i = 0; i < mx->n_continuous; i++)
 999             cp[i * (1 + mx->n_continuous)] = fill;
1000         }
1001
1002       {
1003         int c, r;
1004
1005         if (mx->section == LOWER)
1006           {
1007             int n_lines = mx->n_continuous;
1008             if (mx->section != FULL && mx->diag == NODIAGONAL)
1009               n_lines--;
1010
1011             for (r = 1; r < n_lines; r++)
1012               for (c = 0; c < r; c++)
1013                 cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous];
1014           }
1015         else
1016           {
1017             assert (mx->section == UPPER);
1018             for (r = 1; r < mx->n_continuous; r++)
1019               for (c = 0; c < r; c++)
1020                 cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous];
1021           }
1022       }
1023     }
1024   else if (type == 2)
1025     {
1026       int c;
1027
1028       for (c = 1; c < mx->n_continuous; c++)
1029         cp[c] = cp[0];
1030     }
1031 }
1032
1033 /* Read data lines for content type CONTENT from the data file.
1034    If PER_FACTOR is nonzero, then factor information is read from
1035    the data file.  Data is for cell number CELL. */
1036 static int
1037 nr_read_data_lines (struct nr_aux_data *nr,
1038                     int per_factor, int cell, int content, int compare)
1039 {
1040   struct matrix_data_pgm *mx = nr->mx;
1041   const int type = content_type[content];               /* Content type. */
1042   int n_lines; /* Number of lines to parse from data file for this type. */
1043   double *cp;                   /* Current position in vector or matrix. */
1044   int i;
1045
1046   if (type != 1)
1047     n_lines = 1;
1048   else
1049     {
1050       n_lines = mx->n_continuous;
1051       if (mx->section != FULL && mx->diag == NODIAGONAL)
1052         n_lines--;
1053     }
1054
1055   cp = nr->data[content][cell];
1056   if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL)
1057     cp += mx->n_continuous;
1058
1059   for (i = 0; i < n_lines; i++)
1060     {
1061       int n_cols;
1062
1063       if (!nr_read_splits (nr, 1))
1064         return 0;
1065       if (per_factor && !nr_read_factors (nr, cell))
1066         return 0;
1067       compare = 1;
1068
1069       switch (type)
1070         {
1071         case 0:
1072           n_cols = mx->n_continuous;
1073           break;
1074         case 1:
1075           switch (mx->section)
1076             {
1077             case LOWER:
1078               n_cols = i + 1;
1079               break;
1080             case UPPER:
1081               cp += i;
1082               n_cols = mx->n_continuous - i;
1083               if (mx->diag == NODIAGONAL)
1084                 {
1085                   n_cols--;
1086                   cp++;
1087                 }
1088               break;
1089             case FULL:
1090               n_cols = mx->n_continuous;
1091               break;
1092             default:
1093               assert (0);
1094               abort ();
1095             }
1096           break;
1097         case 2:
1098           n_cols = 1;
1099           break;
1100         default:
1101           assert (0);
1102           abort ();
1103         }
1104
1105       {
1106         int j;
1107
1108         for (j = 0; j < n_cols; j++)
1109           {
1110             struct matrix_token token;
1111             if (!mget_token (&token, mx->reader))
1112               return 0;
1113             if (token.type != MNUM)
1114               {
1115                 msg (SE, _("expecting value for %s %s"),
1116                      dict_get_var (default_dict, j)->name,
1117                      context (mx->reader));
1118                 return 0;
1119               }
1120
1121             *cp++ = token.number;
1122           }
1123         if (mx->fmt != FREE
1124             && !force_eol (mx->reader, content_names[content]))
1125           return 0;
1126         debug_printf (("\n"));
1127       }
1128
1129       if (mx->section == LOWER)
1130         cp += mx->n_continuous - n_cols;
1131     }
1132
1133   fill_matrix (mx, content, nr->data[content][cell]);
1134
1135   return 1;
1136 }
1137
1138 /* When ROWTYPE_ does not appear in the data, reads the matrices and
1139    writes them to the output file.
1140    Returns true if successful, false if an I/O error occurred. */
1141 static bool
1142 matrix_data_read_without_rowtype (struct case_source *source,
1143                                   struct ccase *c,
1144                                   write_case_func *write_case,
1145                                   write_case_data wc_data)
1146 {
1147   struct nr_aux_data *nr = source->aux;
1148   struct matrix_data_pgm *mx = nr->mx;
1149
1150   {
1151     int *cp;
1152
1153     nr->data = pool_nalloc (mx->container, PROX + 1, sizeof *nr->data);
1154
1155     {
1156       int i;
1157
1158       for (i = 0; i <= PROX; i++)
1159         nr->data[i] = NULL;
1160     }
1161
1162     for (cp = mx->contents; *cp != EOC; cp++)
1163       if (*cp != LPAREN && *cp != RPAREN)
1164         {
1165           int per_factor = mx->is_per_factor[*cp];
1166           int n_entries;
1167
1168           n_entries = mx->n_continuous;
1169           if (content_type[*cp] == 1)
1170             n_entries *= mx->n_continuous;
1171
1172           {
1173             int n_vectors = per_factor ? mx->cells : 1;
1174             int i;
1175
1176             nr->data[*cp] = pool_nalloc (mx->container,
1177                                          n_vectors, sizeof **nr->data);
1178
1179             for (i = 0; i < n_vectors; i++)
1180               nr->data[*cp][i] = pool_nalloc (mx->container,
1181                                               n_entries, sizeof ***nr->data);
1182           }
1183         }
1184   }
1185
1186   for (;;)
1187     {
1188       int *bp, *ep, *np;
1189
1190       if (!nr_read_splits (nr, 0))
1191         return true;
1192
1193       for (bp = mx->contents; *bp != EOC; bp = np)
1194         {
1195           int per_factor;
1196
1197           /* Trap the CONTENTS that we should parse in this pass
1198              between bp and ep.  Set np to the starting bp for next
1199              iteration. */
1200           if (*bp == LPAREN)
1201             {
1202               ep = ++bp;
1203               while (*ep != RPAREN)
1204                 ep++;
1205               np = &ep[1];
1206               per_factor = 1;
1207             }
1208           else
1209             {
1210               ep = &bp[1];
1211               while (*ep != EOC && *ep != LPAREN)
1212                 ep++;
1213               np = ep;
1214               per_factor = 0;
1215             }
1216
1217           {
1218             int i;
1219
1220             for (i = 0; i < (per_factor ? mx->cells : 1); i++)
1221               {
1222                 int *cp;
1223
1224                 for (cp = bp; cp < ep; cp++)
1225                   if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp))
1226                     return true;
1227               }
1228           }
1229         }
1230
1231       if (!nr_output_data (nr, c, write_case, wc_data))
1232         return false;
1233
1234       if (dict_get_split_cnt (default_dict) == 0
1235           || !another_token (mx->reader))
1236         return true;
1237     }
1238 }
1239
1240 /* Read the split file variables.  If COMPARE is 1, compares the
1241    values read to the last values read and returns 1 if they're equal,
1242    0 otherwise. */
1243 static int
1244 nr_read_splits (struct nr_aux_data *nr, int compare)
1245 {
1246   struct matrix_data_pgm *mx = nr->mx;
1247   static int just_read = 0; /* FIXME: WTF? */
1248   size_t split_cnt;
1249   size_t i;
1250
1251   if (compare && just_read)
1252     {
1253       just_read = 0;
1254       return 1;
1255     }
1256
1257   if (dict_get_split_vars (default_dict) == NULL)
1258     return 1;
1259
1260   if (mx->single_split)
1261     {
1262       if (!compare)
1263         {
1264           struct mxd_var *mv = dict_get_split_vars (default_dict)[0]->aux;
1265           nr->split_values[0] = ++mv->sub_type;
1266         }
1267       return 1;
1268     }
1269
1270   if (!compare)
1271     just_read = 1;
1272
1273   split_cnt = dict_get_split_cnt (default_dict);
1274   for (i = 0; i < split_cnt; i++)
1275     {
1276       struct matrix_token token;
1277       if (!mget_token (&token, mx->reader))
1278         return 0;
1279       if (token.type != MNUM)
1280         {
1281           msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
1282                context (mx->reader));
1283           return 0;
1284         }
1285
1286       if (!compare)
1287         nr->split_values[i] = token.number;
1288       else if (nr->split_values[i] != token.number)
1289         {
1290           msg (SE, _("Expecting value %g for %s."),
1291                nr->split_values[i],
1292                dict_get_split_vars (default_dict)[i]->name);
1293           return 0;
1294         }
1295     }
1296
1297   return 1;
1298 }
1299
1300 /* Read the factors for cell CELL.  If COMPARE is 1, compares the
1301    values read to the last values read and returns 1 if they're equal,
1302    0 otherwise. */
1303 static int
1304 nr_read_factors (struct nr_aux_data *nr, int cell)
1305 {
1306   struct matrix_data_pgm *mx = nr->mx;
1307   int compare;
1308
1309   if (mx->n_factors == 0)
1310     return 1;
1311
1312   assert (nr->max_cell_idx >= cell);
1313   if (cell != nr->max_cell_idx)
1314     compare = 1;
1315   else
1316     {
1317       compare = 0;
1318       nr->max_cell_idx++;
1319     }
1320
1321   {
1322     size_t i;
1323
1324     for (i = 0; i < mx->n_factors; i++)
1325       {
1326         struct matrix_token token;
1327         if (!mget_token (&token, mx->reader))
1328           return 0;
1329         if (token.type != MNUM)
1330           {
1331             msg (SE, _("Syntax error expecting factor value %s."),
1332                  context (mx->reader));
1333             return 0;
1334           }
1335
1336         if (!compare)
1337           nr->factor_values[i + mx->n_factors * cell] = token.number;
1338         else if (nr->factor_values[i + mx->n_factors * cell] != token.number)
1339           {
1340             msg (SE, _("Syntax error expecting value %g for %s %s."),
1341                  nr->factor_values[i + mx->n_factors * cell],
1342                  mx->factors[i]->name, context (mx->reader));
1343             return 0;
1344           }
1345       }
1346   }
1347
1348   return 1;
1349 }
1350
1351 /* Write the contents of a cell having content type CONTENT and data
1352    CP to the active file.
1353    Returns true if successful, false if an I/O error occurred. */
1354 static bool
1355 dump_cell_content (struct matrix_data_pgm *mx, int content, double *cp,
1356                    struct ccase *c,
1357                    write_case_func *write_case, write_case_data wc_data)
1358 {
1359   int type = content_type[content];
1360
1361   {
1362     buf_copy_str_rpad (case_data_rw (c, mx->rowtype_->fv)->s, 8,
1363                        content_names[content]);
1364
1365     if (type != 1)
1366       memset (case_data_rw (c, mx->varname_->fv)->s, ' ', 8);
1367   }
1368
1369   {
1370     int n_lines = (type == 1) ? mx->n_continuous : 1;
1371     int i;
1372
1373     for (i = 0; i < n_lines; i++)
1374       {
1375         int j;
1376
1377         for (j = 0; j < mx->n_continuous; j++)
1378           {
1379             int fv = dict_get_var (default_dict, mx->first_continuous + j)->fv;
1380             case_data_rw (c, fv)->f = *cp;
1381             cp++;
1382           }
1383         if (type == 1)
1384           buf_copy_str_rpad (case_data_rw (c, mx->varname_->fv)->s, 8,
1385                              dict_get_var (default_dict,
1386                                            mx->first_continuous + i)->name);
1387         if (!write_case (wc_data))
1388           return false;
1389       }
1390   }
1391   return true;
1392 }
1393
1394 /* Finally dump out everything from nr_data[] to the output file. */
1395 static bool
1396 nr_output_data (struct nr_aux_data *nr, struct ccase *c,
1397                 write_case_func *write_case, write_case_data wc_data)
1398 {
1399   struct matrix_data_pgm *mx = nr->mx;
1400
1401   {
1402     struct variable *const *split;
1403     size_t split_cnt;
1404     size_t i;
1405
1406     split_cnt = dict_get_split_cnt (default_dict);
1407     split = dict_get_split_vars (default_dict);
1408     for (i = 0; i < split_cnt; i++)
1409       case_data_rw (c, split[i]->fv)->f = nr->split_values[i];
1410   }
1411
1412   if (mx->n_factors)
1413     {
1414       int cell;
1415
1416       for (cell = 0; cell < mx->cells; cell++)
1417         {
1418           {
1419             size_t factor;
1420
1421             for (factor = 0; factor < mx->n_factors; factor++)
1422               {
1423                 case_data_rw (c, mx->factors[factor]->fv)->f
1424                   = nr->factor_values[factor + cell * mx->n_factors];
1425                 debug_printf (("f:%s ", mx->factors[factor]->name));
1426               }
1427           }
1428
1429           {
1430             int content;
1431
1432             for (content = 0; content <= PROX; content++)
1433               if (mx->is_per_factor[content])
1434                 {
1435                   assert (nr->data[content] != NULL
1436                           && nr->data[content][cell] != NULL);
1437
1438                   if (!dump_cell_content (mx, content, nr->data[content][cell],
1439                                           c, write_case, wc_data))
1440                     return false;
1441                 }
1442           }
1443         }
1444     }
1445
1446   {
1447     int content;
1448
1449     {
1450       size_t factor;
1451
1452       for (factor = 0; factor < mx->n_factors; factor++)
1453         case_data_rw (c, mx->factors[factor]->fv)->f = SYSMIS;
1454     }
1455
1456     for (content = 0; content <= PROX; content++)
1457       if (!mx->is_per_factor[content] && nr->data[content] != NULL)
1458         {
1459           if (!dump_cell_content (mx, content, nr->data[content][0],
1460                                   c, write_case, wc_data))
1461             return false;
1462         }
1463   }
1464
1465   return true;
1466 }
1467 \f
1468 /* Back end, with ROWTYPE_. */
1469
1470 /* All the data for one set of factor values. */
1471 struct factor_data
1472   {
1473     double *factors;
1474     int n_rows[PROX + 1];
1475     double *data[PROX + 1];
1476     struct factor_data *next;
1477   };
1478
1479 /* With ROWTYPE_ auxiliary data. */
1480 struct wr_aux_data
1481   {
1482     struct matrix_data_pgm *mx;         /* MATRIX DATA program. */
1483     int content;                        /* Type of current row. */
1484     double *split_values;               /* SPLIT FILE variable values. */
1485     struct factor_data *data;           /* All the data. */
1486     struct factor_data *current;        /* Current factor. */
1487   };
1488
1489 static int wr_read_splits (struct wr_aux_data *, struct ccase *,
1490                            write_case_func *, write_case_data);
1491 static bool wr_output_data (struct wr_aux_data *, struct ccase *,
1492                            write_case_func *, write_case_data);
1493 static int wr_read_rowtype (struct wr_aux_data *,
1494                             const struct matrix_token *, struct dfm_reader *);
1495 static int wr_read_factors (struct wr_aux_data *);
1496 static int wr_read_indeps (struct wr_aux_data *);
1497 static bool matrix_data_read_with_rowtype (struct case_source *,
1498                                            struct ccase *,
1499                                            write_case_func *,
1500                                            write_case_data);
1501
1502 /* When ROWTYPE_ appears in the data, reads the matrices and writes
1503    them to the output file.
1504    Returns true if successful, false if an I/O error occurred. */
1505 static bool
1506 read_matrices_with_rowtype (struct matrix_data_pgm *mx)
1507 {
1508   struct wr_aux_data wr;
1509   bool ok;
1510
1511   wr.mx = mx;
1512   wr.content = -1;
1513   wr.split_values = NULL;
1514   wr.data = NULL;
1515   wr.current = NULL;
1516   mx->cells = 0;
1517
1518   vfm_source = create_case_source (&matrix_data_with_rowtype_source_class,
1519                                    &wr);
1520   ok = procedure (NULL, NULL);
1521
1522   free (wr.split_values);
1523   return ok;
1524 }
1525
1526 /* Read from the data file and write it to the active file.
1527    Returns true if successful, false if an I/O error occurred. */
1528 static bool
1529 matrix_data_read_with_rowtype (struct case_source *source,
1530                                struct ccase *c,
1531                                write_case_func *write_case,
1532                                write_case_data wc_data)
1533 {
1534   struct wr_aux_data *wr = source->aux;
1535   struct matrix_data_pgm *mx = wr->mx;
1536
1537   do
1538     {
1539       if (!wr_read_splits (wr, c, write_case, wc_data))
1540         return true;
1541
1542       if (!wr_read_factors (wr))
1543         return true;
1544
1545       if (!wr_read_indeps (wr))
1546         return true;
1547     }
1548   while (another_token (mx->reader));
1549
1550   return wr_output_data (wr, c, write_case, wc_data);
1551 }
1552
1553 /* Read the split file variables.  If they differ from the previous
1554    set of split variables then output the data.  Returns success. */
1555 static int
1556 wr_read_splits (struct wr_aux_data *wr,
1557                 struct ccase *c,
1558                 write_case_func *write_case, write_case_data wc_data)
1559 {
1560   struct matrix_data_pgm *mx = wr->mx;
1561   int compare;
1562   size_t split_cnt;
1563
1564   split_cnt = dict_get_split_cnt (default_dict);
1565   if (split_cnt == 0)
1566     return 1;
1567
1568   if (wr->split_values)
1569     compare = 1;
1570   else
1571     {
1572       compare = 0;
1573       wr->split_values = xnmalloc (split_cnt, sizeof *wr->split_values);
1574     }
1575
1576   {
1577     int different = 0;
1578     int i;
1579
1580     for (i = 0; i < split_cnt; i++)
1581       {
1582         struct matrix_token token;
1583         if (!mget_token (&token, mx->reader))
1584           return 0;
1585         if (token.type != MNUM)
1586           {
1587             msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
1588                  context (mx->reader));
1589             return 0;
1590           }
1591
1592         if (compare && wr->split_values[i] != token.number && !different)
1593           {
1594             if (!wr_output_data (wr, c, write_case, wc_data))
1595               return 0;
1596             different = 1;
1597             mx->cells = 0;
1598           }
1599         wr->split_values[i] = token.number;
1600       }
1601   }
1602
1603   return 1;
1604 }
1605
1606 /* Compares doubles A and B, treating SYSMIS as greatest. */
1607 static int
1608 compare_doubles (const void *a_, const void *b_, void *aux UNUSED)
1609 {
1610   const double *a = a_;
1611   const double *b = b_;
1612
1613   if (*a == *b)
1614     return 0;
1615   else if (*a == SYSMIS)
1616     return 1;
1617   else if (*b == SYSMIS)
1618     return -1;
1619   else if (*a > *b)
1620     return 1;
1621   else
1622     return -1;
1623 }
1624
1625 /* Return strcmp()-type comparison of the MX->n_factors factors at _A and
1626    _B.  Sort missing values toward the end. */
1627 static int
1628 compare_factors (const void *a_, const void *b_, void *mx_)
1629 {
1630   struct matrix_data_pgm *mx = mx_;
1631   struct factor_data *const *pa = a_;
1632   struct factor_data *const *pb = b_;
1633   const double *a = (*pa)->factors;
1634   const double *b = (*pb)->factors;
1635
1636   return lexicographical_compare_3way (a, mx->n_factors,
1637                                        b, mx->n_factors,
1638                                        sizeof *a,
1639                                        compare_doubles, NULL);
1640 }
1641
1642 /* Write out the data for the current split file to the active
1643    file.
1644    Returns true if successful, false if an I/O error occurred. */
1645 static bool
1646 wr_output_data (struct wr_aux_data *wr,
1647                 struct ccase *c,
1648                 write_case_func *write_case, write_case_data wc_data)
1649 {
1650   struct matrix_data_pgm *mx = wr->mx;
1651   bool ok = true;
1652
1653   {
1654     struct variable *const *split;
1655     size_t split_cnt;
1656     size_t i;
1657
1658     split_cnt = dict_get_split_cnt (default_dict);
1659     split = dict_get_split_vars (default_dict);
1660     for (i = 0; i < split_cnt; i++)
1661       case_data_rw (c, split[i]->fv)->f = wr->split_values[i];
1662   }
1663
1664   /* Sort the wr->data list. */
1665   {
1666     struct factor_data **factors;
1667     struct factor_data *iter;
1668     int i;
1669
1670     factors = xnmalloc (mx->cells, sizeof *factors);
1671
1672     for (i = 0, iter = wr->data; iter; iter = iter->next, i++)
1673       factors[i] = iter;
1674
1675     sort (factors, mx->cells, sizeof *factors, compare_factors, mx);
1676
1677     wr->data = factors[0];
1678     for (i = 0; i < mx->cells - 1; i++)
1679       factors[i]->next = factors[i + 1];
1680     factors[mx->cells - 1]->next = NULL;
1681
1682     free (factors);
1683   }
1684
1685   /* Write out records for every set of factor values. */
1686   {
1687     struct factor_data *iter;
1688
1689     for (iter = wr->data; iter; iter = iter->next)
1690       {
1691         {
1692           size_t factor;
1693
1694           for (factor = 0; factor < mx->n_factors; factor++)
1695             case_data_rw (c, mx->factors[factor]->fv)->f
1696               = iter->factors[factor];
1697         }
1698
1699         {
1700           int content;
1701
1702           for (content = 0; content <= PROX; content++)
1703             {
1704               if (!iter->n_rows[content])
1705                 continue;
1706
1707               {
1708                 int type = content_type[content];
1709                 int n_lines = (type == 1
1710                                ? (mx->n_continuous
1711                                   - (mx->section != FULL && mx->diag == NODIAGONAL))
1712                                : 1);
1713
1714                 if (n_lines != iter->n_rows[content])
1715                   {
1716                     msg (SE, _("Expected %d lines of data for %s content; "
1717                                "actually saw %d lines.  No data will be "
1718                                "output for this content."),
1719                          n_lines, content_names[content],
1720                          iter->n_rows[content]);
1721                     continue;
1722                   }
1723               }
1724
1725               fill_matrix (mx, content, iter->data[content]);
1726
1727               ok = dump_cell_content (mx, content, iter->data[content],
1728                                       c, write_case, wc_data);
1729               if (!ok)
1730                 break;
1731             }
1732         }
1733       }
1734   }
1735
1736   pool_destroy (mx->container);
1737   mx->container = pool_create ();
1738
1739   wr->data = wr->current = NULL;
1740
1741   return ok;
1742 }
1743
1744 /* Sets ROWTYPE_ based on the given TOKEN read from READER.
1745    Return success. */
1746 static int
1747 wr_read_rowtype (struct wr_aux_data *wr,
1748                  const struct matrix_token *token,
1749                  struct dfm_reader *reader)
1750 {
1751   if (wr->content != -1)
1752     {
1753       msg (SE, _("Multiply specified ROWTYPE_ %s."), context (reader));
1754       return 0;
1755     }
1756   if (token->type != MSTR)
1757     {
1758       msg (SE, _("Syntax error %s expecting ROWTYPE_ string."),
1759            context (reader));
1760       return 0;
1761     }
1762
1763   {
1764     char s[16];
1765     char *cp;
1766
1767     memcpy (s, token->string, min (15, token->length));
1768     s[min (15, token->length)] = 0;
1769
1770     for (cp = s; *cp; cp++)
1771       *cp = toupper ((unsigned char) *cp);
1772
1773     wr->content = string_to_content_type (s, NULL);
1774   }
1775
1776   if (wr->content == -1)
1777     {
1778       msg (SE, _("Syntax error %s."), context (reader));
1779       return 0;
1780     }
1781
1782   return 1;
1783 }
1784
1785 /* Read the factors for the current row.  Select a set of factors and
1786    point wr_current to it. */
1787 static int
1788 wr_read_factors (struct wr_aux_data *wr)
1789 {
1790   struct matrix_data_pgm *mx = wr->mx;
1791   double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors);
1792
1793   wr->content = -1;
1794   {
1795     size_t i;
1796
1797     for (i = 0; i < mx->n_factors; i++)
1798       {
1799         struct matrix_token token;
1800         if (!mget_token (&token, mx->reader))
1801           goto lossage;
1802         if (token.type == MSTR)
1803           {
1804             if (!wr_read_rowtype (wr, &token, mx->reader))
1805               goto lossage;
1806             if (!mget_token (&token, mx->reader))
1807               goto lossage;
1808           }
1809         if (token.type != MNUM)
1810           {
1811             msg (SE, _("Syntax error expecting factor value %s."),
1812                  context (mx->reader));
1813             goto lossage;
1814           }
1815
1816         factor_values[i] = token.number;
1817       }
1818   }
1819   if (wr->content == -1)
1820     {
1821       struct matrix_token token;
1822       if (!mget_token (&token, mx->reader))
1823         goto lossage;
1824       if (!wr_read_rowtype (wr, &token, mx->reader))
1825         goto lossage;
1826     }
1827
1828   /* Try the most recent factor first as a simple caching
1829      mechanism. */
1830   if (wr->current)
1831     {
1832       size_t i;
1833
1834       for (i = 0; i < mx->n_factors; i++)
1835         if (factor_values[i] != wr->current->factors[i])
1836           goto cache_miss;
1837       goto winnage;
1838     }
1839
1840   /* Linear search through the list. */
1841 cache_miss:
1842   {
1843     struct factor_data *iter;
1844
1845     for (iter = wr->data; iter; iter = iter->next)
1846       {
1847         size_t i;
1848
1849         for (i = 0; i < mx->n_factors; i++)
1850           if (factor_values[i] != iter->factors[i])
1851             goto next_item;
1852
1853         wr->current = iter;
1854         goto winnage;
1855
1856       next_item: ;
1857       }
1858   }
1859
1860   /* Not found.  Make a new item. */
1861   {
1862     struct factor_data *new = pool_alloc (mx->container, sizeof *new);
1863
1864     new->factors = pool_nalloc (mx->container,
1865                                 mx->n_factors, sizeof *new->factors);
1866
1867     {
1868       size_t i;
1869
1870       for (i = 0; i < mx->n_factors; i++)
1871         new->factors[i] = factor_values[i];
1872     }
1873
1874     {
1875       int i;
1876
1877       for (i = 0; i <= PROX; i++)
1878         {
1879           new->n_rows[i] = 0;
1880           new->data[i] = NULL;
1881         }
1882     }
1883
1884     new->next = wr->data;
1885     wr->data = wr->current = new;
1886     mx->cells++;
1887   }
1888
1889 winnage:
1890   local_free (factor_values);
1891   return 1;
1892
1893 lossage:
1894   local_free (factor_values);
1895   return 0;
1896 }
1897
1898 /* Read the independent variables into wr->current. */
1899 static int
1900 wr_read_indeps (struct wr_aux_data *wr)
1901 {
1902   struct matrix_data_pgm *mx = wr->mx;
1903   struct factor_data *c = wr->current;
1904   const int type = content_type[wr->content];
1905   const int n_rows = c->n_rows[wr->content];
1906   double *cp;
1907   int n_cols;
1908
1909   /* Allocate room for data if necessary. */
1910   if (c->data[wr->content] == NULL)
1911     {
1912       int n_items = mx->n_continuous;
1913       if (type == 1)
1914         n_items *= mx->n_continuous;
1915
1916       c->data[wr->content] = pool_nalloc (mx->container,
1917                                           n_items, sizeof **c->data);
1918     }
1919
1920   cp = &c->data[wr->content][n_rows * mx->n_continuous];
1921
1922   /* Figure out how much to read from this line. */
1923   switch (type)
1924     {
1925     case 0:
1926     case 2:
1927       if (n_rows > 0)
1928         {
1929           msg (SE, _("Duplicate specification for %s."),
1930                content_names[wr->content]);
1931           return 0;
1932         }
1933       if (type == 0)
1934         n_cols = mx->n_continuous;
1935       else
1936         n_cols = 1;
1937       break;
1938     case 1:
1939       if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL))
1940         {
1941           msg (SE, _("Too many rows of matrix data for %s."),
1942                content_names[wr->content]);
1943           return 0;
1944         }
1945
1946       switch (mx->section)
1947         {
1948         case LOWER:
1949           n_cols = n_rows + 1;
1950           if (mx->diag == NODIAGONAL)
1951             cp += mx->n_continuous;
1952           break;
1953         case UPPER:
1954           cp += n_rows;
1955           n_cols = mx->n_continuous - n_rows;
1956           if (mx->diag == NODIAGONAL)
1957             {
1958               n_cols--;
1959               cp++;
1960             }
1961           break;
1962         case FULL:
1963           n_cols = mx->n_continuous;
1964           break;
1965         default:
1966           assert (0);
1967           abort ();
1968         }
1969       break;
1970     default:
1971       assert (0);
1972       abort ();
1973     }
1974   c->n_rows[wr->content]++;
1975
1976   debug_printf ((" (c=%p,r=%d,n=%d)", c, n_rows + 1, n_cols));
1977
1978   /* Read N_COLS items at CP. */
1979   {
1980     int j;
1981
1982     for (j = 0; j < n_cols; j++)
1983       {
1984         struct matrix_token token;
1985         if (!mget_token (&token, mx->reader))
1986           return 0;
1987         if (token.type != MNUM)
1988           {
1989             msg (SE, _("Syntax error expecting value for %s %s."),
1990                  dict_get_var (default_dict, mx->first_continuous + j)->name,
1991                  context (mx->reader));
1992             return 0;
1993           }
1994
1995         *cp++ = token.number;
1996       }
1997     if (mx->fmt != FREE
1998         && !force_eol (mx->reader, content_names[wr->content]))
1999       return 0;
2000     debug_printf (("\n"));
2001   }
2002
2003   return 1;
2004 }
2005 \f
2006 /* Matrix source. */
2007
2008 static const struct case_source_class matrix_data_with_rowtype_source_class =
2009   {
2010     "MATRIX DATA",
2011     NULL,
2012     matrix_data_read_with_rowtype,
2013     NULL,
2014   };
2015
2016 static const struct case_source_class
2017 matrix_data_without_rowtype_source_class =
2018   {
2019     "MATRIX DATA",
2020     NULL,
2021     matrix_data_read_without_rowtype,
2022     NULL,
2023   };
2024