src/matrix-data.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <assert.h>
  22 #include <stdlib.h>
  23 #include <ctype.h>
  24 #include <float.h>
  25 #include "algorithm.h"
  26 #include "alloc.h"
  27 #include "command.h"
  28 #include "data-in.h"
  29 #include "dfm.h"
  30 #include "error.h"
  31 #include "file-handle.h"
  32 #include "lexer.h"
  33 #include "misc.h"
  34 #include "pool.h"
  35 #include "str.h"
  36 #include "var.h"
  37 #include "vfm.h"
  38
  39 #include "debug-print.h"
  40
  41 /* FIXME: /N subcommand not implemented.  It should be pretty simple,
  42    too. */
  43
  44 /* Format type enums. */
  45 enum
  46   {
  47     LIST,
  48     FREE
  49   };
  50
  51 /* Matrix section enums. */
  52 enum
  53   {
  54     LOWER,
  55     UPPER,
  56     FULL
  57   };
  58
  59 /* Diagonal inclusion enums. */
  60 enum
  61   {
  62     DIAGONAL,
  63     NODIAGONAL
  64   };
  65
  66 /* CONTENTS types. */
  67 enum
  68   {
  69     N_VECTOR,
  70     N_SCALAR,
  71     N_MATRIX,
  72     MEAN,
  73     STDDEV,
  74     COUNT,
  75     MSE,
  76     DFE,
  77     MAT,
  78     COV,
  79     CORR,
  80     PROX,
  81
  82     LPAREN,
  83     RPAREN,
  84     EOC
  85   };
  86
  87 /* 0=vector, 1=matrix, 2=scalar. */
  88 static int content_type[PROX + 1] =
  89   {
  90     0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
  91   };
  92
  93 /* Name of each content type. */
  94 static const char *content_names[PROX + 1] =
  95   {
  96     "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
  97     "DFE", "MAT", "COV", "CORR", "PROX",
  98   };
  99
 100 /* The data file to be read. */
 101 static struct file_handle *data_file;
 102
 103 /* Format type. */
 104 static int fmt;                 /* LIST or FREE. */
 105 static int section;             /* LOWER or UPPER or FULL. */
 106 static int diag;                /* DIAGONAL or NODIAGONAL. */
 107
 108 /* Arena used for all the MATRIX DATA allocations. */
 109 static struct pool *container;
 110
 111 /* ROWTYPE_ specified explicitly in data? */
 112 static int explicit_rowtype;
 113
 114 /* ROWTYPE_, VARNAME_ variables. */
 115 static struct variable *rowtype_, *varname_;
 116
 117 /* Is is per-factor data? */
 118 int is_per_factor[PROX + 1];
 119
 120 /* Single SPLIT FILE variable. */
 121 static struct variable *single_split;
 122
 123 /* Factor variables.  */
 124 static int n_factors;
 125 static struct variable **factors;
 126
 127 /* Number of cells, or -1 if none. */
 128 static int cells;
 129
 130 /* Population N specified by user. */
 131 static int pop_n;
 132
 133 /* CONTENTS subcommand. */
 134 static int contents[EOC * 3 + 1];
 135 static int n_contents;
 136
 137 /* Number of continuous variables. */
 138 static int n_continuous;
 139
 140 /* Index into default_dict.var of first continuous variables. */
 141 static int first_continuous;
 142
 143 static int compare_variables_by_mxd_vartype (const void *pa,
 144                                              const void *pb);
 145 static void read_matrices_without_rowtype (void);
 146 static void read_matrices_with_rowtype (void);
 147 static int string_to_content_type (char *, int *);
 148
 149 #if DEBUGGING
 150 static void debug_print (void);
 151 #endif
 152
 153 int
 154 cmd_matrix_data (void)
 155 {
 156   unsigned seen = 0;
 157
 158   lex_match_id ("MATRIX");
 159   lex_match_id ("DATA");
 160
 161   container = pool_create ();
 162
 163   discard_variables ();
 164
 165   data_file = inline_file;
 166   fmt = LIST;
 167   section = LOWER;
 168   diag = DIAGONAL;
 169   single_split = NULL;
 170   n_factors = 0;
 171   factors = NULL;
 172   cells = -1;
 173   pop_n = -1;
 174   n_contents = 0;
 175   while (token != '.')
 176     {
 177       lex_match ('/');
 178
 179       if (lex_match_id ("VARIABLES"))
 180         {
 181           char **v;
 182           int nv;
 183
 184           if (seen & 1)
 185             {
 186               msg (SE, _("VARIABLES subcommand multiply specified."));
 187               goto lossage;
 188             }
 189           seen |= 1;
 190
 191           lex_match ('=');
 192           if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
 193             goto lossage;
 194
 195           {
 196             int i;
 197
 198             for (i = 0; i < nv; i++)
 199               if (!strcmp (v[i], "VARNAME_"))
 200                 {
 201                   msg (SE, _("VARNAME_ cannot be explicitly specified on "
 202                              "VARIABLES."));
 203                   for (i = 0; i < nv; i++)
 204                     free (v[i]);
 205                   free (v);
 206                   goto lossage;
 207                 }
 208           }
 209
 210           {
 211             int i;
 212
 213             for (i = 0; i < nv; i++)
 214               {
 215                 struct variable *new_var;
 216
 217                 if (strcmp (v[i], "ROWTYPE_"))
 218                   {
 219                     new_var = dict_create_var (default_dict, v[i], 0);
 220                     assert (new_var != NULL);
 221                     new_var->p.mxd.vartype = MXD_CONTINUOUS;
 222                     new_var->p.mxd.subtype = i;
 223                   }
 224                 else
 225                   explicit_rowtype = 1;
 226                 free (v[i]);
 227               }
 228             free (v);
 229           }
 230
 231           {
 232             rowtype_ = dict_create_var (default_dict, "ROWTYPE_", 8);
 233             assert (rowtype_ != NULL);
 234             rowtype_->p.mxd.vartype = MXD_ROWTYPE;
 235             rowtype_->p.mxd.subtype = 0;
 236           }
 237         }
 238       else if (lex_match_id ("FILE"))
 239         {
 240           lex_match ('=');
 241           data_file = fh_parse_file_handle ();
 242           if (!data_file)
 243             goto lossage;
 244         }
 245       else if (lex_match_id ("FORMAT"))
 246         {
 247           lex_match ('=');
 248
 249           while (token == T_ID)
 250             {
 251               if (lex_match_id ("LIST"))
 252                 fmt = LIST;
 253               else if (lex_match_id ("FREE"))
 254                 fmt = FREE;
 255               else if (lex_match_id ("LOWER"))
 256                 section = LOWER;
 257               else if (lex_match_id ("UPPER"))
 258                 section = UPPER;
 259               else if (lex_match_id ("FULL"))
 260                 section = FULL;
 261               else if (lex_match_id ("DIAGONAL"))
 262                 diag = DIAGONAL;
 263               else if (lex_match_id ("NODIAGONAL"))
 264                 diag = NODIAGONAL;
 265               else
 266                 {
 267                   lex_error (_("in FORMAT subcommand"));
 268                   goto lossage;
 269                 }
 270             }
 271         }
 272       else if (lex_match_id ("SPLIT"))
 273         {
 274           lex_match ('=');
 275
 276           if (seen & 2)
 277             {
 278               msg (SE, _("SPLIT subcommand multiply specified."));
 279               goto lossage;
 280             }
 281           seen |= 2;
 282
 283           if (token != T_ID)
 284             {
 285               lex_error (_("in SPLIT subcommand"));
 286               goto lossage;
 287             }
 288
 289           if (dict_lookup_var (default_dict, tokid) == NULL
 290               && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
 291             {
 292               if (!strcmp (tokid, "ROWTYPE_") || !strcmp (tokid, "VARNAME_"))
 293                 {
 294                   msg (SE, _("Split variable may not be named ROWTYPE_ "
 295                              "or VARNAME_."));
 296                   goto lossage;
 297                 }
 298
 299               single_split = dict_create_var (default_dict, tokid, 0);
 300               assert (single_split != NULL);
 301               lex_get ();
 302
 303               single_split->p.mxd.vartype = MXD_CONTINUOUS;
 304
 305               dict_set_split_vars (default_dict, &single_split, 1);
 306             }
 307           else
 308             {
 309               struct variable **split;
 310               int n;
 311
 312               if (!parse_variables (default_dict, &split, &n, PV_NO_DUPLICATE))
 313                 goto lossage;
 314
 315               dict_set_split_vars (default_dict, split, n);
 316             }
 317
 318           {
 319             struct variable *const *split = dict_get_split_vars (default_dict);
 320             size_t split_cnt = dict_get_split_cnt (default_dict);
 321             int i;
 322
 323             for (i = 0; i < split_cnt; i++)
 324               {
 325                 if (split[i]->p.mxd.vartype != MXD_CONTINUOUS)
 326                   {
 327                     msg (SE, _("Split variable %s is already another type."),
 328                          tokid);
 329                     goto lossage;
 330                   }
 331                 split[i]->p.mxd.vartype = MXD_SPLIT;
 332                 split[i]->p.mxd.subtype = i;
 333               }
 334           }
 335         }
 336       else if (lex_match_id ("FACTORS"))
 337         {
 338           lex_match ('=');
 339
 340           if (seen & 4)
 341             {
 342               msg (SE, _("FACTORS subcommand multiply specified."));
 343               goto lossage;
 344             }
 345           seen |= 4;
 346
 347           if (!parse_variables (default_dict, &factors, &n_factors, PV_NONE))
 348             goto lossage;
 349
 350           {
 351             int i;
 352
 353             for (i = 0; i < n_factors; i++)
 354               {
 355                 if (factors[i]->p.mxd.vartype != MXD_CONTINUOUS)
 356                   {
 357                     msg (SE, _("Factor variable %s is already another type."),
 358                          tokid);
 359                     goto lossage;
 360                   }
 361                 factors[i]->p.mxd.vartype = MXD_FACTOR;
 362                 factors[i]->p.mxd.subtype = i;
 363               }
 364           }
 365         }
 366       else if (lex_match_id ("CELLS"))
 367         {
 368           lex_match ('=');
 369
 370           if (cells != -1)
 371             {
 372               msg (SE, _("CELLS subcommand multiply specified."));
 373               goto lossage;
 374             }
 375
 376           if (!lex_integer_p () || lex_integer () < 1)
 377             {
 378               lex_error (_("expecting positive integer"));
 379               goto lossage;
 380             }
 381
 382           cells = lex_integer ();
 383           lex_get ();
 384         }
 385       else if (lex_match_id ("N"))
 386         {
 387           lex_match ('=');
 388
 389           if (pop_n != -1)
 390             {
 391               msg (SE, _("N subcommand multiply specified."));
 392               goto lossage;
 393             }
 394
 395           if (!lex_integer_p () || lex_integer () < 1)
 396             {
 397               lex_error (_("expecting positive integer"));
 398               goto lossage;
 399             }
 400
 401           pop_n = lex_integer ();
 402           lex_get ();
 403         }
 404       else if (lex_match_id ("CONTENTS"))
 405         {
 406           int inside_parens = 0;
 407           unsigned collide = 0;
 408           int item;
 409
 410           if (seen & 8)
 411             {
 412               msg (SE, _("CONTENTS subcommand multiply specified."));
 413               goto lossage;
 414             }
 415           seen |= 8;
 416
 417           lex_match ('=');
 418
 419           {
 420             int i;
 421
 422             for (i = 0; i <= PROX; i++)
 423               is_per_factor[i] = 0;
 424           }
 425
 426           for (;;)
 427             {
 428               if (lex_match ('('))
 429                 {
 430                   if (inside_parens)
 431                     {
 432                       msg (SE, _("Nested parentheses not allowed."));
 433                       goto lossage;
 434                     }
 435                   inside_parens = 1;
 436                   item = LPAREN;
 437                 }
 438               else if (lex_match (')'))
 439                 {
 440                   if (!inside_parens)
 441                     {
 442                       msg (SE, _("Mismatched right parenthesis (`(')."));
 443                       goto lossage;
 444                     }
 445                   if (contents[n_contents - 1] == LPAREN)
 446                     {
 447                       msg (SE, _("Empty parentheses not allowed."));
 448                       goto lossage;
 449                     }
 450                   inside_parens = 0;
 451                   item = RPAREN;
 452                 }
 453               else
 454                 {
 455                   int content_type;
 456                   int collide_index;
 457
 458                   if (token != T_ID)
 459                     {
 460                       lex_error (_("in CONTENTS subcommand"));
 461                       goto lossage;
 462                     }
 463
 464                   content_type = string_to_content_type (tokid,
 465                                                          &collide_index);
 466                   if (content_type == -1)
 467                     {
 468                       lex_error (_("in CONTENTS subcommand"));
 469                       goto lossage;
 470                     }
 471                   lex_get ();
 472
 473                   if (collide & (1 << collide_index))
 474                     {
 475                       msg (SE, _("Content multiply specified for %s."),
 476                            content_names[content_type]);
 477                       goto lossage;
 478                     }
 479                   collide |= (1 << collide_index);
 480
 481                   item = content_type;
 482                   is_per_factor[item] = inside_parens;
 483                 }
 484               contents[n_contents++] = item;
 485
 486               if (token == '/' || token == '.')
 487                 break;
 488             }
 489
 490           if (inside_parens)
 491             {
 492               msg (SE, _("Missing right parenthesis."));
 493               goto lossage;
 494             }
 495           contents[n_contents] = EOC;
 496         }
 497       else
 498         {
 499           lex_error (NULL);
 500           goto lossage;
 501         }
 502     }
 503
 504   if (token != '.')
 505     {
 506       lex_error (_("expecting end of command"));
 507       goto lossage;
 508     }
 509
 510   if (!(seen & 1))
 511     {
 512       msg (SE, _("Missing VARIABLES subcommand."));
 513       goto lossage;
 514     }
 515
 516   if (!n_contents && !explicit_rowtype)
 517     {
 518       msg (SW, _("CONTENTS subcommand not specified: assuming file "
 519                  "contains only CORR matrix."));
 520
 521       contents[0] = CORR;
 522       contents[1] = EOC;
 523       n_contents = 0;
 524     }
 525
 526   if (n_factors && !explicit_rowtype && cells == -1)
 527     {
 528       msg (SE, _("Missing CELLS subcommand.  CELLS is required "
 529                  "when ROWTYPE_ is not given in the data and "
 530                  "factors are present."));
 531       goto lossage;
 532     }
 533
 534   if (explicit_rowtype && single_split)
 535     {
 536       msg (SE, _("Split file values must be present in the data when "
 537                  "ROWTYPE_ is present."));
 538       goto lossage;
 539     }
 540
 541   /* Create VARNAME_. */
 542   {
 543     varname_ = dict_create_var (default_dict, "VARNAME_", 8);
 544     assert (varname_ != NULL);
 545     varname_->p.mxd.vartype = MXD_VARNAME;
 546     varname_->p.mxd.subtype = 0;
 547   }
 548
 549   /* Sort the dictionary variables into the desired order for the
 550      system file output. */
 551   {
 552     struct variable **v;
 553     size_t nv;
 554
 555     dict_get_vars (default_dict, &v, &nv, 0);
 556     qsort (v, nv, sizeof *v, compare_variables_by_mxd_vartype);
 557     dict_reorder_vars (default_dict, v, nv);
 558     free (v);
 559   }
 560
 561   /* Set formats. */
 562   {
 563     static const struct fmt_spec fmt_tab[MXD_COUNT] =
 564       {
 565         {FMT_F, 4, 0},
 566         {FMT_A, 8, 0},
 567         {FMT_F, 4, 0},
 568         {FMT_A, 8, 0},
 569         {FMT_F, 10, 4},
 570       };
 571
 572     int i;
 573
 574     first_continuous = -1;
 575     for (i = 0; i < dict_get_var_cnt (default_dict); i++)
 576       {
 577         struct variable *v = dict_get_var (default_dict, i);
 578         int type = v->p.mxd.vartype;
 579
 580         assert (type >= 0 && type < MXD_COUNT);
 581         v->print = v->write = fmt_tab[type];
 582
 583         if (type == MXD_CONTINUOUS)
 584           n_continuous++;
 585         if (first_continuous == -1 && type == MXD_CONTINUOUS)
 586           first_continuous = i;
 587       }
 588   }
 589
 590   if (n_continuous == 0)
 591     {
 592       msg (SE, _("No continuous variables specified."));
 593       goto lossage;
 594     }
 595
 596 #if DEBUGGING
 597   debug_print ();
 598 #endif
 599
 600   if (explicit_rowtype)
 601     read_matrices_with_rowtype ();
 602   else
 603     read_matrices_without_rowtype ();
 604
 605   pool_destroy (container);
 606
 607   return CMD_SUCCESS;
 608
 609 lossage:
 610   discard_variables ();
 611   free (factors);
 612   pool_destroy (container);
 613   return CMD_FAILURE;
 614 }
 615
 616 /* Look up string S as a content-type name and return the
 617    corresponding enumerated value, or -1 if there is no match.  If
 618    COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
 619    as a bit-index) which can be used for determining whether a related
 620    statistic has already been used. */
 621 static int
 622 string_to_content_type (char *s, int *collide)
 623 {
 624   static const struct
 625     {
 626       int value;
 627       int collide;
 628       const char *string;
 629     }
 630   *tp,
 631   tab[] =
 632     {
 633       {N_VECTOR, 0, "N_VECTOR"},
 634       {N_VECTOR, 0, "N"},
 635       {N_SCALAR, 0, "N_SCALAR"},
 636       {N_MATRIX, 1, "N_MATRIX"},
 637       {MEAN, 2, "MEAN"},
 638       {STDDEV, 3, "STDDEV"},
 639       {STDDEV, 3, "SD"},
 640       {COUNT, 4, "COUNT"},
 641       {MSE, 5, "MSE"},
 642       {DFE, 6, "DFE"},
 643       {MAT, 7, "MAT"},
 644       {COV, 8, "COV"},
 645       {CORR, 9, "CORR"},
 646       {PROX, 10, "PROX"},
 647       {-1, -1, NULL},
 648     };
 649
 650   for (tp = tab; tp->value != -1; tp++)
 651     if (!strcmp (s, tp->string))
 652       {
 653         if (collide)
 654           *collide = tp->collide;
 655
 656         return tp->value;
 657       }
 658   return -1;
 659 }
 660
 661 /* Compare two variables using p.mxd.vartype and p.mxd.subtype
 662    fields. */
 663 static int
 664 compare_variables_by_mxd_vartype (const void *a_, const void *b_)
 665 {
 666   struct variable *const *pa = a_;
 667   struct variable *const *pb = b_;
 668   const struct matrix_data_proc *a = &(*pa)->p.mxd;
 669   const struct matrix_data_proc *b = &(*pb)->p.mxd;
 670
 671   if (a->vartype != b->vartype)
 672     return a->vartype > b->vartype ? 1 : -1;
 673   else
 674     return a->subtype < b->subtype ? -1 : a->subtype > b->subtype;
 675 }
 676
 677 #if DEBUGGING
 678 /* Print out the command as input. */
 679 static void
 680 debug_print (void)
 681 {
 682   printf ("MATRIX DATA\n\t/VARIABLES=");
 683
 684   {
 685     int i;
 686
 687     for (i = 0; i < default_dict.nvar; i++)
 688       printf ("%s ", default_dict.var[i]->name);
 689   }
 690   printf ("\n");
 691
 692   printf ("\t/FORMAT=");
 693   if (fmt == LIST)
 694     printf ("LIST");
 695   else if (fmt == FREE)
 696     printf ("FREE");
 697   else
 698     assert (0);
 699   if (section == LOWER)
 700     printf (" LOWER");
 701   else if (section == UPPER)
 702     printf (" UPPER");
 703   else if (section == FULL)
 704     printf (" FULL");
 705   else
 706     assert (0);
 707   if (diag == DIAGONAL)
 708     printf (" DIAGONAL\n");
 709   else if (diag == NODIAGONAL)
 710     printf (" NODIAGONAL\n");
 711   else
 712     assert (0);
 713
 714   if (dict_get_split_cnt (default_dict) != 0)
 715     {
 716       int i;
 717
 718       printf ("\t/SPLIT=");
 719       for (i = 0; i < dict_get_split_cnt (default_dict); i++)
 720         printf ("%s ", dict_get_split_vars (default_dict)[i]->name);
 721       if (single_split)
 722         printf ("\t/* single split");
 723       printf ("\n");
 724     }
 725
 726   if (n_factors)
 727     {
 728       int i;
 729
 730       printf ("\t/FACTORS=");
 731       for (i = 0; i < n_factors; i++)
 732         printf ("%s ", factors[i]->name);
 733       printf ("\n");
 734     }
 735
 736   if (cells != -1)
 737     printf ("\t/CELLS=%d\n", cells);
 738
 739   if (pop_n != -1)
 740     printf ("\t/N=%d\n", pop_n);
 741
 742   if (n_contents)
 743     {
 744       int i;
 745       int space = 0;
 746
 747       printf ("\t/CONTENTS=");
 748       for (i = 0; i < n_contents; i++)
 749         {
 750           if (contents[i] == LPAREN)
 751             {
 752               if (space)
 753                 printf (" ");
 754               printf ("(");
 755               space = 0;
 756             }
 757           else if (contents[i] == RPAREN)
 758             {
 759               printf (")");
 760               space = 1;
 761             }
 762           else
 763             {
 764
 765               assert (contents[i] >= 0 && contents[i] <= PROX);
 766               if (space)
 767                 printf (" ");
 768               printf ("%s", content_names[contents[i]]);
 769               space = 1;
 770             }
 771         }
 772       printf ("\n");
 773     }
 774 }
 775 #endif /* DEBUGGING */
 776 \f
 777 /* Matrix tokenizer. */
 778
 779 /* Matrix token types. */
 780 enum
 781   {
 782     MNULL,              /* No token. */
 783     MNUM,               /* Number. */
 784     MSTR,               /* String. */
 785     MSTOP               /* End of file. */
 786   };
 787
 788 /* Current matrix token. */
 789 static int mtoken;
 790
 791 /* Token string if applicable; not null-terminated. */
 792 static char *mtokstr;
 793
 794 /* Length of mtokstr in characters. */
 795 static int mtoklen;
 796
 797 /* Token value if applicable. */
 798 static double mtokval;
 799
 800 static int mget_token (void);
 801
 802 #if DEBUGGING
 803 #define mget_token() mget_token_dump()
 804
 805 static int
 806 mget_token_dump (void)
 807 {
 808   int result = (mget_token) ();
 809   mdump_token ();
 810   return result;
 811 }
 812
 813 static void
 814 mdump_token (void)
 815 {
 816   switch (mtoken)
 817     {
 818     case MNULL:
 819       printf (" <NULLTOK>");
 820       break;
 821     case MNUM:
 822       printf (" #%g", mtokval);
 823       break;
 824     case MSTR:
 825       printf (" #'%.*s'", mtoklen, mtokstr);
 826       break;
 827     case MSTOP:
 828       printf (" <STOP>");
 829       break;
 830     default:
 831       assert (0);
 832     }
 833   fflush (stdout);
 834 }
 835 #endif
 836
 837 /* Return the current position in the data file. */
 838 static const char *
 839 context (void)
 840 {
 841   static char buf[32];
 842   int len;
 843   char *p = dfm_get_record (data_file, &len);
 844
 845   if (!p || !len)
 846     strcpy (buf, "at end of line");
 847   else
 848     {
 849       char *cp = buf;
 850       int n_copy = min (10, len);
 851       cp = stpcpy (buf, "before `");
 852       while (n_copy && isspace ((unsigned char) *p))
 853         p++, n_copy++;
 854       while (n_copy && !isspace ((unsigned char) *p))
 855         *cp++ = *p++, n_copy--;
 856       *cp++ = '\'';
 857       *cp = 0;
 858     }
 859
 860   return buf;
 861 }
 862
 863 /* Is there at least one token left in the data file? */
 864 static int
 865 another_token (void)
 866 {
 867   char *cp, *ep;
 868   int len;
 869
 870   if (mtoken == MSTOP)
 871     return 0;
 872
 873   for (;;)
 874     {
 875       cp = dfm_get_record (data_file, &len);
 876       if (!cp)
 877         return 0;
 878
 879       ep = cp + len;
 880       while (isspace ((unsigned char) *cp) && cp < ep)
 881         cp++;
 882
 883       if (cp < ep)
 884         break;
 885
 886       dfm_fwd_record (data_file);
 887     }
 888
 889   dfm_set_record (data_file, cp);
 890
 891   return 1;
 892 }
 893
 894 /* Parse a MATRIX DATA token from data_file into mtok*. */
 895 static int
 896 (mget_token) (void)
 897 {
 898   char *cp, *ep;
 899   int len;
 900   int first_column;
 901
 902   for (;;)
 903     {
 904       cp = dfm_get_record (data_file, &len);
 905       if (!cp)
 906         {
 907           if (mtoken == MSTOP)
 908             return 0;
 909           mtoken = MSTOP;
 910           return 1;
 911         }
 912
 913       ep = cp + len;
 914       while (isspace ((unsigned char) *cp) && cp < ep)
 915         cp++;
 916
 917       if (cp < ep)
 918         break;
 919
 920       dfm_fwd_record (data_file);
 921     }
 922
 923   dfm_set_record (data_file, cp);
 924   first_column = dfm_get_cur_col (data_file) + 1;
 925
 926   /* Three types of fields: quoted with ', quoted with ", unquoted. */
 927   if (*cp == '\'' || *cp == '"')
 928     {
 929       int quote = *cp;
 930
 931       mtoken = MSTR;
 932       mtokstr = ++cp;
 933       while (cp < ep && *cp != quote)
 934         cp++;
 935       mtoklen = cp - mtokstr;
 936       if (cp < ep)
 937         cp++;
 938       else
 939         msg (SW, _("Scope of string exceeds line."));
 940     }
 941   else
 942     {
 943       int is_num = isdigit ((unsigned char) *cp) || *cp == '.';
 944
 945       mtokstr = cp++;
 946       while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ','
 947              && *cp != '-' && *cp != '+')
 948         {
 949           if (isdigit ((unsigned char) *cp))
 950             is_num = 1;
 951
 952           if ((tolower ((unsigned char) *cp) == 'd'
 953                || tolower ((unsigned char) *cp) == 'e')
 954               && (cp[1] == '+' || cp[1] == '-'))
 955             cp += 2;
 956           else
 957             cp++;
 958         }
 959
 960       mtoklen = cp - mtokstr;
 961       assert (mtoklen);
 962
 963       if (is_num)
 964         {
 965           struct data_in di;
 966
 967           di.s = mtokstr;
 968           di.e = mtokstr + mtoklen;
 969           di.v = (union value *) &mtokval;
 970           di.f1 = first_column;
 971           di.format.type = FMT_F;
 972           di.format.w = mtoklen;
 973           di.format.d = 0;
 974
 975           if (!data_in (&di))
 976             return 0;
 977         }
 978       else
 979         mtoken = MSTR;
 980     }
 981
 982   dfm_set_record (data_file, cp);
 983
 984   return 1;
 985 }
 986
 987 /* Forcibly skip the end of a line for content type CONTENT in
 988    data_file. */
 989 static int
 990 force_eol (const char *content)
 991 {
 992   char *cp;
 993   int len;
 994
 995   if (fmt == FREE)
 996     return 1;
 997
 998   cp = dfm_get_record (data_file, &len);
 999   if (!cp)
1000     return 0;
1001   while (len && isspace (*cp))
1002     cp++, len--;
1003
1004   if (len)
1005     {
1006       msg (SE, _("End of line expected %s while reading %s."),
1007            context (), content);
1008       return 0;
1009     }
1010
1011   dfm_fwd_record (data_file);
1012
1013   return 1;
1014 }
1015 \f
1016 /* Back end, omitting ROWTYPE_. */
1017
1018 /* MATRIX DATA data. */
1019 static double ***nr_data;
1020
1021 /* Factor values. */
1022 static double *nr_factor_values;
1023
1024 /* Largest-numbered cell that we have read in thus far, plus one. */
1025 static int max_cell_index;
1026
1027 /* SPLIT FILE variable values. */
1028 static double *split_values;
1029
1030 static int nr_read_splits (int compare);
1031 static int nr_read_factors (int cell);
1032 static void nr_output_data (write_case_func *, write_case_data);
1033 static void matrix_data_read_without_rowtype (write_case_func *,
1034                                               write_case_data);
1035
1036 /* Read from the data file and write it to the active file. */
1037 static void
1038 read_matrices_without_rowtype (void)
1039 {
1040   if (cells == -1)
1041     cells = 1;
1042
1043   mtoken = MNULL;
1044   split_values = xmalloc (sizeof *split_values
1045                           * dict_get_split_cnt (default_dict));
1046   nr_factor_values = xmalloc (sizeof *nr_factor_values * n_factors * cells);
1047   max_cell_index = 0;
1048
1049   matrix_data_source.read = matrix_data_read_without_rowtype;
1050   vfm_source = &matrix_data_source;
1051
1052   procedure (NULL, NULL, NULL, NULL);
1053
1054   free (split_values);
1055   free (nr_factor_values);
1056
1057   fh_close_handle (data_file);
1058 }
1059
1060 /* Mirror data across the diagonal of matrix CP which contains
1061    CONTENT type data. */
1062 static void
1063 fill_matrix (int content, double *cp)
1064 {
1065   int type = content_type[content];
1066
1067   if (type == 1 && section != FULL)
1068     {
1069       if (diag == NODIAGONAL)
1070         {
1071           const double fill = content == CORR ? 1.0 : SYSMIS;
1072           int i;
1073
1074           for (i = 0; i < n_continuous; i++)
1075             cp[i * (1 + n_continuous)] = fill;
1076         }
1077
1078       {
1079         int c, r;
1080
1081         if (section == LOWER)
1082           {
1083             int n_lines = n_continuous;
1084             if (section != FULL && diag == NODIAGONAL)
1085               n_lines--;
1086
1087             for (r = 1; r < n_lines; r++)
1088               for (c = 0; c < r; c++)
1089                 cp[r + c * n_continuous] = cp[c + r * n_continuous];
1090           }
1091         else
1092           {
1093             assert (section == UPPER);
1094             for (r = 1; r < n_continuous; r++)
1095               for (c = 0; c < r; c++)
1096                 cp[c + r * n_continuous] = cp[r + c * n_continuous];
1097           }
1098       }
1099     }
1100   else if (type == 2)
1101     {
1102       int c;
1103
1104       for (c = 1; c < n_continuous; c++)
1105         cp[c] = cp[0];
1106     }
1107 }
1108
1109 /* Read data lines for content type CONTENT from the data file.  If
1110    PER_FACTOR is nonzero, then factor information is read from the
1111    data file.  Data is for cell number CELL. */
1112 static int
1113 nr_read_data_lines (int per_factor, int cell, int content, int compare)
1114 {
1115   /* Content type. */
1116   const int type = content_type[content];
1117
1118   /* Number of lines that must be parsed from the data file for this
1119      content type. */
1120   int n_lines;
1121
1122   /* Current position in vector or matrix. */
1123   double *cp;
1124
1125   /* Counter. */
1126   int i;
1127
1128   if (type != 1)
1129     n_lines = 1;
1130   else
1131     {
1132       n_lines = n_continuous;
1133       if (section != FULL && diag == NODIAGONAL)
1134         n_lines--;
1135     }
1136
1137   cp = nr_data[content][cell];
1138   if (type == 1 && section == LOWER && diag == NODIAGONAL)
1139     cp += n_continuous;
1140
1141   for (i = 0; i < n_lines; i++)
1142     {
1143       int n_cols;
1144
1145       if (!nr_read_splits (1))
1146         return 0;
1147       if (per_factor && !nr_read_factors (cell))
1148         return 0;
1149       compare = 1;
1150
1151       switch (type)
1152         {
1153         case 0:
1154           n_cols = n_continuous;
1155           break;
1156         case 1:
1157           switch (section)
1158             {
1159             case LOWER:
1160               n_cols = i + 1;
1161               break;
1162             case UPPER:
1163               cp += i;
1164               n_cols = n_continuous - i;
1165               if (diag == NODIAGONAL)
1166                 {
1167                   n_cols--;
1168                   cp++;
1169                 }
1170               break;
1171             case FULL:
1172               n_cols = n_continuous;
1173               break;
1174             default:
1175               assert (0);
1176             }
1177           break;
1178         case 2:
1179           n_cols = 1;
1180           break;
1181         default:
1182           assert (0);
1183         }
1184
1185       {
1186         int j;
1187
1188         for (j = 0; j < n_cols; j++)
1189           {
1190             if (!mget_token ())
1191               return 0;
1192             if (mtoken != MNUM)
1193               {
1194                 msg (SE, _("expecting value for %s %s"),
1195                      dict_get_var (default_dict, j)->name, context ());
1196                 return 0;
1197               }
1198
1199             *cp++ = mtokval;
1200           }
1201         if (!force_eol (content_names[content]))
1202           return 0;
1203         debug_printf (("\n"));
1204       }
1205
1206       if (section == LOWER)
1207         cp += n_continuous - n_cols;
1208     }
1209
1210   fill_matrix (content, nr_data[content][cell]);
1211
1212   return 1;
1213 }
1214
1215 /* When ROWTYPE_ does not appear in the data, reads the matrices and
1216    writes them to the output file.  Returns success. */
1217 static void
1218 matrix_data_read_without_rowtype (write_case_func *write_case,
1219                                   write_case_data wc_data)
1220 {
1221   {
1222     int *cp;
1223
1224     nr_data = pool_alloc (container, (PROX + 1) * sizeof *nr_data);
1225
1226     {
1227       int i;
1228
1229       for (i = 0; i <= PROX; i++)
1230         nr_data[i] = NULL;
1231     }
1232
1233     for (cp = contents; *cp != EOC; cp++)
1234       if (*cp != LPAREN && *cp != RPAREN)
1235         {
1236           int per_factor = is_per_factor[*cp];
1237           int n_entries;
1238
1239           n_entries = n_continuous;
1240           if (content_type[*cp] == 1)
1241             n_entries *= n_continuous;
1242
1243           {
1244             int n_vectors = per_factor ? cells : 1;
1245             int i;
1246
1247             nr_data[*cp] = pool_alloc (container,
1248                                        n_vectors * sizeof **nr_data);
1249
1250             for (i = 0; i < n_vectors; i++)
1251               nr_data[*cp][i] = pool_alloc (container,
1252                                             n_entries * sizeof ***nr_data);
1253           }
1254         }
1255   }
1256
1257   for (;;)
1258     {
1259       int *bp, *ep, *np;
1260
1261       if (!nr_read_splits (0))
1262         return;
1263
1264       for (bp = contents; *bp != EOC; bp = np)
1265         {
1266           int per_factor;
1267
1268           /* Trap the CONTENTS that we should parse in this pass
1269              between bp and ep.  Set np to the starting bp for next
1270              iteration. */
1271           if (*bp == LPAREN)
1272             {
1273               ep = ++bp;
1274               while (*ep != RPAREN)
1275                 ep++;
1276               np = &ep[1];
1277               per_factor = 1;
1278             }
1279           else
1280             {
1281               ep = &bp[1];
1282               while (*ep != EOC && *ep != LPAREN)
1283                 ep++;
1284               np = ep;
1285               per_factor = 0;
1286             }
1287
1288           {
1289             int i;
1290
1291             for (i = 0; i < (per_factor ? cells : 1); i++)
1292               {
1293                 int *cp;
1294
1295                 for (cp = bp; cp < ep; cp++)
1296                   if (!nr_read_data_lines (per_factor, i, *cp, cp != bp))
1297                     return;
1298               }
1299           }
1300         }
1301
1302       nr_output_data (write_case, wc_data);
1303
1304       if (dict_get_split_cnt (default_dict) == 0 || !another_token ())
1305         return;
1306     }
1307 }
1308
1309 /* Read the split file variables.  If COMPARE is 1, compares the
1310    values read to the last values read and returns 1 if they're equal,
1311    0 otherwise. */
1312 static int
1313 nr_read_splits (int compare)
1314 {
1315   static int just_read = 0;
1316   size_t split_cnt;
1317   size_t i;
1318
1319   if (compare && just_read)
1320     {
1321       just_read = 0;
1322       return 1;
1323     }
1324
1325   if (dict_get_split_vars (default_dict) == NULL)
1326     return 1;
1327
1328   if (single_split)
1329     {
1330       if (!compare)
1331         split_values[0]
1332           = ++dict_get_split_vars (default_dict)[0]->p.mxd.subtype;
1333       return 1;
1334     }
1335
1336   if (!compare)
1337     just_read = 1;
1338
1339   split_cnt = dict_get_split_cnt (default_dict);
1340   for (i = 0; i < split_cnt; i++)
1341     {
1342       if (!mget_token ())
1343         return 0;
1344       if (mtoken != MNUM)
1345         {
1346           msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
1347                context ());
1348           return 0;
1349         }
1350
1351       if (!compare)
1352         split_values[i] = mtokval;
1353       else if (split_values[i] != mtokval)
1354         {
1355           msg (SE, _("Expecting value %g for %s."),
1356                split_values[i], dict_get_split_vars (default_dict)[i]->name);
1357           return 0;
1358         }
1359     }
1360
1361   return 1;
1362 }
1363
1364 /* Read the factors for cell CELL.  If COMPARE is 1, compares the
1365    values read to the last values read and returns 1 if they're equal,
1366    0 otherwise. */
1367 static int
1368 nr_read_factors (int cell)
1369 {
1370   int compare;
1371
1372   if (n_factors == 0)
1373     return 1;
1374
1375   assert (max_cell_index >= cell);
1376   if (cell != max_cell_index)
1377     compare = 1;
1378   else
1379     {
1380       compare = 0;
1381       max_cell_index++;
1382     }
1383
1384   {
1385     int i;
1386
1387     for (i = 0; i < n_factors; i++)
1388       {
1389         if (!mget_token ())
1390           return 0;
1391         if (mtoken != MNUM)
1392           {
1393             msg (SE, _("Syntax error expecting factor value %s."),
1394                  context ());
1395             return 0;
1396           }
1397
1398         if (!compare)
1399           nr_factor_values[i + n_factors * cell] = mtokval;
1400         else if (nr_factor_values[i + n_factors * cell] != mtokval)
1401           {
1402             msg (SE, _("Syntax error expecting value %g for %s %s."),
1403                  nr_factor_values[i + n_factors * cell],
1404                  factors[i]->name, context ());
1405             return 0;
1406           }
1407       }
1408   }
1409
1410   return 1;
1411 }
1412
1413 /* Write the contents of a cell having content type CONTENT and data
1414    CP to the active file. */
1415 static void
1416 dump_cell_content (int content, double *cp,
1417                    write_case_func *write_case, write_case_data wc_data)
1418 {
1419   int type = content_type[content];
1420
1421   {
1422     st_bare_pad_copy (temp_case->data[rowtype_->fv].s,
1423                       content_names[content], 8);
1424
1425     if (type != 1)
1426       memset (&temp_case->data[varname_->fv].s, ' ', 8);
1427   }
1428
1429   {
1430     int n_lines = (type == 1) ? n_continuous : 1;
1431     int i;
1432
1433     for (i = 0; i < n_lines; i++)
1434       {
1435         int j;
1436
1437         for (j = 0; j < n_continuous; j++)
1438           {
1439             int fv = dict_get_var (default_dict, first_continuous + j)->fv;
1440             temp_case->data[fv].f = *cp;
1441             cp++;
1442           }
1443         if (type == 1)
1444           st_bare_pad_copy (temp_case->data[varname_->fv].s,
1445                             dict_get_var (default_dict,
1446                                           first_continuous + i)->name,
1447                             8);
1448         write_case (wc_data);
1449       }
1450   }
1451 }
1452
1453 /* Finally dump out everything from nr_data[] to the output file. */
1454 static void
1455 nr_output_data (write_case_func *write_case, write_case_data wc_data)
1456 {
1457   {
1458     struct variable *const *split;
1459     size_t split_cnt;
1460     size_t i;
1461
1462     split_cnt = dict_get_split_cnt (default_dict);
1463     for (i = 0; i < split_cnt; i++)
1464       temp_case->data[split[i]->fv].f = split_values[i];
1465   }
1466
1467   if (n_factors)
1468     {
1469       int cell;
1470
1471       for (cell = 0; cell < cells; cell++)
1472         {
1473           {
1474             int factor;
1475
1476             for (factor = 0; factor < n_factors; factor++)
1477               {
1478                 temp_case->data[factors[factor]->fv].f
1479                   = nr_factor_values[factor + cell * n_factors];
1480                 debug_printf (("f:%s ", factors[factor]->name));
1481               }
1482           }
1483
1484           {
1485             int content;
1486
1487             for (content = 0; content <= PROX; content++)
1488               if (is_per_factor[content])
1489                 {
1490                   assert (nr_data[content] != NULL
1491                           && nr_data[content][cell] != NULL);
1492
1493                   dump_cell_content (content, nr_data[content][cell],
1494                                      write_case, wc_data);
1495                 }
1496           }
1497         }
1498     }
1499
1500   {
1501     int content;
1502
1503     {
1504       int factor;
1505
1506       for (factor = 0; factor < n_factors; factor++)
1507         temp_case->data[factors[factor]->fv].f = SYSMIS;
1508     }
1509
1510     for (content = 0; content <= PROX; content++)
1511       if (!is_per_factor[content] && nr_data[content] != NULL)
1512         dump_cell_content (content, nr_data[content][0],
1513                            write_case, wc_data);
1514   }
1515 }
1516 \f
1517 /* Back end, with ROWTYPE_. */
1518
1519 /* Type of current row. */
1520 static int wr_content;
1521
1522 /* All the data for one set of factor values. */
1523 struct factor_data
1524   {
1525     double *factors;
1526     int n_rows[PROX + 1];
1527     double *data[PROX + 1];
1528     struct factor_data *next;
1529   };
1530
1531 /* All the data, period. */
1532 struct factor_data *wr_data;
1533
1534 /* Current factor. */
1535 struct factor_data *wr_current;
1536
1537 static int wr_read_splits (write_case_func *, write_case_data);
1538 static int wr_output_data (write_case_func *, write_case_data);
1539 static int wr_read_rowtype (void);
1540 static int wr_read_factors (void);
1541 static int wr_read_indeps (void);
1542 static void matrix_data_read_with_rowtype (write_case_func *,
1543                                            write_case_data);
1544
1545 /* When ROWTYPE_ appears in the data, reads the matrices and writes
1546    them to the output file. */
1547 static void
1548 read_matrices_with_rowtype (void)
1549 {
1550   mtoken = MNULL;
1551   wr_data = wr_current = NULL;
1552   split_values = NULL;
1553   cells = 0;
1554
1555   matrix_data_source.read = matrix_data_read_with_rowtype;
1556   vfm_source = &matrix_data_source;
1557
1558   procedure (NULL, NULL, NULL, NULL);
1559
1560   free (split_values);
1561   fh_close_handle (data_file);
1562 }
1563
1564 /* Read from the data file and write it to the active file. */
1565 static void
1566 matrix_data_read_with_rowtype (write_case_func *write_case,
1567                                write_case_data wc_data)
1568 {
1569   do
1570     {
1571       if (!wr_read_splits (write_case, wc_data))
1572         return;
1573
1574       if (!wr_read_factors ())
1575         return;
1576
1577       if (!wr_read_indeps ())
1578         return;
1579     }
1580   while (another_token ());
1581
1582   wr_output_data (write_case, wc_data);
1583 }
1584
1585 /* Read the split file variables.  If they differ from the previous
1586    set of split variables then output the data.  Returns success. */
1587 static int
1588 wr_read_splits (write_case_func *write_case, write_case_data wc_data)
1589 {
1590   int compare;
1591   size_t split_cnt;
1592
1593   split_cnt = dict_get_split_cnt (default_dict);
1594   if (split_cnt == 0)
1595     return 1;
1596
1597   if (split_values)
1598     compare = 1;
1599   else
1600     {
1601       compare = 0;
1602       split_values = xmalloc (split_cnt * sizeof *split_values);
1603     }
1604
1605   {
1606     int different = 0;
1607     size_t split_cnt;
1608     int i;
1609
1610     for (i = 0; i < split_cnt; i++)
1611       {
1612         if (!mget_token ())
1613           return 0;
1614         if (mtoken != MNUM)
1615           {
1616             msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
1617                  context ());
1618             return 0;
1619           }
1620
1621         if (compare && split_values[i] != mtokval && !different)
1622           {
1623             if (!wr_output_data (write_case, wc_data))
1624               return 0;
1625             different = 1;
1626             cells = 0;
1627           }
1628         split_values[i] = mtokval;
1629       }
1630   }
1631
1632   return 1;
1633 }
1634
1635 /* Compares doubles A and B, treating SYSMIS as greatest. */
1636 static int
1637 compare_doubles (const void *a_, const void *b_, void *aux UNUSED)
1638 {
1639   const double *a = a_;
1640   const double *b = b_;
1641
1642   if (*a == *b)
1643     return 0;
1644   else if (*a == SYSMIS)
1645     return 1;
1646   else if (*b == SYSMIS)
1647     return -1;
1648   else if (*a > *b)
1649     return 1;
1650   else
1651     return -1;
1652 }
1653
1654 /* Return strcmp()-type comparison of the n_factors factors at _A and
1655    _B.  Sort missing values toward the end. */
1656 static int
1657 compare_factors (const void *a_, const void *b_)
1658 {
1659   struct factor_data *const *pa = a_;
1660   struct factor_data *const *pb = b_;
1661   const double *a = (*pa)->factors;
1662   const double *b = (*pb)->factors;
1663
1664   return lexicographical_compare (a, n_factors,
1665                                   b, n_factors,
1666                                   sizeof *a,
1667                                   compare_doubles, NULL);
1668 }
1669
1670 /* Write out the data for the current split file to the active
1671    file. */
1672 static int
1673 wr_output_data (write_case_func *write_case, write_case_data wc_data)
1674 {
1675   {
1676     struct variable *const *split;
1677     size_t split_cnt;
1678     size_t i;
1679
1680     split_cnt = dict_get_split_cnt (default_dict);
1681     for (i = 0; i < split_cnt; i++)
1682       temp_case->data[split[i]->fv].f = split_values[i];
1683   }
1684
1685   /* Sort the wr_data list. */
1686   {
1687     struct factor_data **factors;
1688     struct factor_data *iter;
1689     int i;
1690
1691     factors = xmalloc (sizeof *factors * cells);
1692
1693     for (i = 0, iter = wr_data; iter; iter = iter->next, i++)
1694       factors[i] = iter;
1695
1696     qsort (factors, cells, sizeof *factors, compare_factors);
1697
1698     wr_data = factors[0];
1699     for (i = 0; i < cells - 1; i++)
1700       factors[i]->next = factors[i + 1];
1701     factors[cells - 1]->next = NULL;
1702
1703     free (factors);
1704   }
1705
1706   /* Write out records for every set of factor values. */
1707   {
1708     struct factor_data *iter;
1709
1710     for (iter = wr_data; iter; iter = iter->next)
1711       {
1712         {
1713           int factor;
1714
1715           for (factor = 0; factor < n_factors; factor++)
1716             {
1717               temp_case->data[factors[factor]->fv].f
1718                 = iter->factors[factor];
1719               debug_printf (("f:%s ", factors[factor]->name));
1720             }
1721         }
1722
1723         {
1724           int content;
1725
1726           for (content = 0; content <= PROX; content++)
1727             {
1728               if (!iter->n_rows[content])
1729                 continue;
1730
1731               {
1732                 int type = content_type[content];
1733                 int n_lines = (type == 1
1734                                ? (n_continuous
1735                                   - (section != FULL && diag == NODIAGONAL))
1736                                : 1);
1737
1738                 if (n_lines != iter->n_rows[content])
1739                   {
1740                     msg (SE, _("Expected %d lines of data for %s content; "
1741                                "actually saw %d lines.  No data will be "
1742                                "output for this content."),
1743                          n_lines, content_names[content],
1744                          iter->n_rows[content]);
1745                     continue;
1746                   }
1747               }
1748
1749               fill_matrix (content, iter->data[content]);
1750
1751               dump_cell_content (content, iter->data[content],
1752                                  write_case, wc_data);
1753             }
1754         }
1755       }
1756   }
1757
1758   pool_destroy (container);
1759   container = pool_create ();
1760
1761   wr_data = wr_current = NULL;
1762
1763   return 1;
1764 }
1765
1766 /* Read ROWTYPE_ from the data file.  Return success. */
1767 static int
1768 wr_read_rowtype (void)
1769 {
1770   if (wr_content != -1)
1771     {
1772       msg (SE, _("Multiply specified ROWTYPE_ %s."), context ());
1773       return 0;
1774     }
1775   if (mtoken != MSTR)
1776     {
1777       msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), context ());
1778       return 0;
1779     }
1780
1781   {
1782     char s[16];
1783     char *cp;
1784
1785     memcpy (s, mtokstr, min (15, mtoklen));
1786     s[min (15, mtoklen)] = 0;
1787
1788     for (cp = s; *cp; cp++)
1789       *cp = toupper ((unsigned char) *cp);
1790
1791     wr_content = string_to_content_type (s, NULL);
1792   }
1793
1794   if (wr_content == -1)
1795     {
1796       msg (SE, _("Syntax error %s."), context ());
1797       return 0;
1798     }
1799
1800   return 1;
1801 }
1802
1803 /* Read the factors for the current row.  Select a set of factors and
1804    point wr_current to it. */
1805 static int
1806 wr_read_factors (void)
1807 {
1808   double *factor_values = local_alloc (sizeof *factor_values * n_factors);
1809
1810   wr_content = -1;
1811   {
1812     int i;
1813
1814     for (i = 0; i < n_factors; i++)
1815       {
1816         if (!mget_token ())
1817           goto lossage;
1818         if (mtoken == MSTR)
1819           {
1820             if (!wr_read_rowtype ())
1821               goto lossage;
1822             if (!mget_token ())
1823               goto lossage;
1824           }
1825         if (mtoken != MNUM)
1826           {
1827             msg (SE, _("Syntax error expecting factor value %s."),
1828                  context ());
1829             goto lossage;
1830           }
1831
1832         factor_values[i] = mtokval;
1833       }
1834   }
1835   if (wr_content == -1)
1836     {
1837       if (!mget_token ())
1838         goto lossage;
1839       if (!wr_read_rowtype ())
1840         goto lossage;
1841     }
1842
1843   /* Try the most recent factor first as a simple caching
1844      mechanism. */
1845   if (wr_current)
1846     {
1847       int i;
1848
1849       for (i = 0; i < n_factors; i++)
1850         if (factor_values[i] != wr_current->factors[i])
1851           goto cache_miss;
1852       goto winnage;
1853     }
1854
1855   /* Linear search through the list. */
1856 cache_miss:
1857   {
1858     struct factor_data *iter;
1859
1860     for (iter = wr_data; iter; iter = iter->next)
1861       {
1862         int i;
1863
1864         for (i = 0; i < n_factors; i++)
1865           if (factor_values[i] != iter->factors[i])
1866             goto next_item;
1867
1868         wr_current = iter;
1869         goto winnage;
1870
1871       next_item: ;
1872       }
1873   }
1874
1875   /* Not found.  Make a new item. */
1876   {
1877     struct factor_data *new = pool_alloc (container, sizeof *new);
1878
1879     new->factors = pool_alloc (container, sizeof *new->factors * n_factors);
1880
1881     {
1882       int i;
1883
1884       for (i = 0; i < n_factors; i++)
1885         new->factors[i] = factor_values[i];
1886     }
1887
1888     {
1889       int i;
1890
1891       for (i = 0; i <= PROX; i++)
1892         {
1893           new->n_rows[i] = 0;
1894           new->data[i] = NULL;
1895         }
1896     }
1897
1898     new->next = wr_data;
1899     wr_data = wr_current = new;
1900     cells++;
1901   }
1902
1903 winnage:
1904   local_free (factor_values);
1905   return 1;
1906
1907 lossage:
1908   local_free (factor_values);
1909   return 0;
1910 }
1911
1912 /* Read the independent variables into wr_current. */
1913 static int
1914 wr_read_indeps (void)
1915 {
1916   struct factor_data *c = wr_current;
1917   const int type = content_type[wr_content];
1918   const int n_rows = c->n_rows[wr_content];
1919   double *cp;
1920   int n_cols;
1921
1922   /* Allocate room for data if necessary. */
1923   if (c->data[wr_content] == NULL)
1924     {
1925       int n_items = n_continuous;
1926       if (type == 1)
1927         n_items *= n_continuous;
1928
1929       c->data[wr_content] = pool_alloc (container,
1930                                         sizeof **c->data * n_items);
1931     }
1932
1933   cp = &c->data[wr_content][n_rows * n_continuous];
1934
1935   /* Figure out how much to read from this line. */
1936   switch (type)
1937     {
1938     case 0:
1939     case 2:
1940       if (n_rows > 0)
1941         {
1942           msg (SE, _("Duplicate specification for %s."),
1943                content_names[wr_content]);
1944           return 0;
1945         }
1946       if (type == 0)
1947         n_cols = n_continuous;
1948       else
1949         n_cols = 1;
1950       break;
1951     case 1:
1952       if (n_rows >= n_continuous - (section != FULL && diag == NODIAGONAL))
1953         {
1954           msg (SE, _("Too many rows of matrix data for %s."),
1955                content_names[wr_content]);
1956           return 0;
1957         }
1958
1959       switch (section)
1960         {
1961         case LOWER:
1962           n_cols = n_rows + 1;
1963           if (diag == NODIAGONAL)
1964             cp += n_continuous;
1965           break;
1966         case UPPER:
1967           cp += n_rows;
1968           n_cols = n_continuous - n_rows;
1969           if (diag == NODIAGONAL)
1970             {
1971               n_cols--;
1972               cp++;
1973             }
1974           break;
1975         case FULL:
1976           n_cols = n_continuous;
1977           break;
1978         default:
1979           assert (0);
1980         }
1981       break;
1982     default:
1983       assert (0);
1984     }
1985   c->n_rows[wr_content]++;
1986
1987   debug_printf ((" (c=%p,r=%d,n=%d)", c, n_rows + 1, n_cols));
1988
1989   /* Read N_COLS items at CP. */
1990   {
1991     int j;
1992
1993     for (j = 0; j < n_cols; j++)
1994       {
1995         if (!mget_token ())
1996           return 0;
1997         if (mtoken != MNUM)
1998           {
1999             msg (SE, _("Syntax error expecting value for %s %s."),
2000                  dict_get_var (default_dict, first_continuous + j)->name,
2001                  context ());
2002             return 0;
2003           }
2004
2005         *cp++ = mtokval;
2006       }
2007     if (!force_eol (content_names[wr_content]))
2008       return 0;
2009     debug_printf (("\n"));
2010   }
2011
2012   return 1;
2013 }
2014 \f
2015 /* Matrix source. */
2016
2017 struct case_stream matrix_data_source =
2018   {
2019     NULL,
2020     NULL,
2021     NULL,
2022     NULL,
2023     NULL,
2024     NULL,
2025     "MATRIX DATA",
2026   };
2027