src/matrix-data.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include <assert.h>
  22 #include <stdlib.h>
  23 #include <ctype.h>
  24 #include <float.h>
  25 #include "algorithm.h"
  26 #include "alloc.h"
  27 #include "command.h"
  28 #include "data-in.h"
  29 #include "dfm.h"
  30 #include "error.h"
  31 #include "file-handle.h"
  32 #include "lexer.h"
  33 #include "misc.h"
  34 #include "pool.h"
  35 #include "str.h"
  36 #include "var.h"
  37 #include "vfm.h"
  38
  39 #include "debug-print.h"
  40
  41 /* FIXME: /N subcommand not implemented.  It should be pretty simple,
  42    too. */
  43
  44 /* Format type enums. */
  45 enum
  46   {
  47     LIST,
  48     FREE
  49   };
  50
  51 /* Matrix section enums. */
  52 enum
  53   {
  54     LOWER,
  55     UPPER,
  56     FULL
  57   };
  58
  59 /* Diagonal inclusion enums. */
  60 enum
  61   {
  62     DIAGONAL,
  63     NODIAGONAL
  64   };
  65
  66 /* CONTENTS types. */
  67 enum
  68   {
  69     N_VECTOR,
  70     N_SCALAR,
  71     N_MATRIX,
  72     MEAN,
  73     STDDEV,
  74     COUNT,
  75     MSE,
  76     DFE,
  77     MAT,
  78     COV,
  79     CORR,
  80     PROX,
  81
  82     LPAREN,
  83     RPAREN,
  84     EOC
  85   };
  86
  87 /* 0=vector, 1=matrix, 2=scalar. */
  88 static int content_type[PROX + 1] =
  89   {
  90     0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
  91   };
  92
  93 /* Name of each content type. */
  94 static const char *content_names[PROX + 1] =
  95   {
  96     "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
  97     "DFE", "MAT", "COV", "CORR", "PROX",
  98   };
  99
 100 /* The data file to be read. */
 101 static struct file_handle *data_file;
 102
 103 /* Format type. */
 104 static int fmt;                 /* LIST or FREE. */
 105 static int section;             /* LOWER or UPPER or FULL. */
 106 static int diag;                /* DIAGONAL or NODIAGONAL. */
 107
 108 /* Arena used for all the MATRIX DATA allocations. */
 109 static struct pool *container;
 110
 111 /* ROWTYPE_ specified explicitly in data? */
 112 static int explicit_rowtype;
 113
 114 /* ROWTYPE_, VARNAME_ variables. */
 115 static struct variable *rowtype_, *varname_;
 116
 117 /* Is is per-factor data? */
 118 int is_per_factor[PROX + 1];
 119
 120 /* Single SPLIT FILE variable. */
 121 static struct variable *single_split;
 122
 123 /* Factor variables.  */
 124 static int n_factors;
 125 static struct variable **factors;
 126
 127 /* Number of cells, or -1 if none. */
 128 static int cells;
 129
 130 /* Population N specified by user. */
 131 static int pop_n;
 132
 133 /* CONTENTS subcommand. */
 134 static int contents[EOC * 3 + 1];
 135 static int n_contents;
 136
 137 /* Number of continuous variables. */
 138 static int n_continuous;
 139
 140 /* Index into default_dict.var of first continuous variables. */
 141 static int first_continuous;
 142
 143 static int compare_variables_by_mxd_vartype (const void *pa,
 144                                              const void *pb);
 145 static void read_matrices_without_rowtype (void);
 146 static void read_matrices_with_rowtype (void);
 147 static int string_to_content_type (char *, int *);
 148
 149 #if DEBUGGING
 150 static void debug_print (void);
 151 #endif
 152
 153 int
 154 cmd_matrix_data (void)
 155 {
 156   unsigned seen = 0;
 157
 158   lex_match_id ("MATRIX");
 159   lex_match_id ("DATA");
 160
 161   container = pool_create ();
 162
 163   discard_variables ();
 164
 165   data_file = inline_file;
 166   fmt = LIST;
 167   section = LOWER;
 168   diag = DIAGONAL;
 169   single_split = NULL;
 170   n_factors = 0;
 171   factors = NULL;
 172   cells = -1;
 173   pop_n = -1;
 174   n_contents = 0;
 175   while (token != '.')
 176     {
 177       lex_match ('/');
 178
 179       if (lex_match_id ("VARIABLES"))
 180         {
 181           char **v;
 182           int nv;
 183
 184           if (seen & 1)
 185             {
 186               msg (SE, _("VARIABLES subcommand multiply specified."));
 187               goto lossage;
 188             }
 189           seen |= 1;
 190
 191           lex_match ('=');
 192           if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
 193             goto lossage;
 194
 195           {
 196             int i;
 197
 198             for (i = 0; i < nv; i++)
 199               if (!strcmp (v[i], "VARNAME_"))
 200                 {
 201                   msg (SE, _("VARNAME_ cannot be explicitly specified on "
 202                              "VARIABLES."));
 203                   for (i = 0; i < nv; i++)
 204                     free (v[i]);
 205                   free (v);
 206                   goto lossage;
 207                 }
 208           }
 209
 210           {
 211             int i;
 212
 213             for (i = 0; i < nv; i++)
 214               {
 215                 struct variable *new_var;
 216
 217                 if (strcmp (v[i], "ROWTYPE_"))
 218                   {
 219                     new_var = dict_create_var_assert (default_dict, v[i], 0);
 220                     new_var->p.mxd.vartype = MXD_CONTINUOUS;
 221                     new_var->p.mxd.subtype = i;
 222                   }
 223                 else
 224                   explicit_rowtype = 1;
 225                 free (v[i]);
 226               }
 227             free (v);
 228           }
 229
 230           {
 231             rowtype_ = dict_create_var_assert (default_dict, "ROWTYPE_", 8);
 232             rowtype_->p.mxd.vartype = MXD_ROWTYPE;
 233             rowtype_->p.mxd.subtype = 0;
 234           }
 235         }
 236       else if (lex_match_id ("FILE"))
 237         {
 238           lex_match ('=');
 239           data_file = fh_parse_file_handle ();
 240           if (!data_file)
 241             goto lossage;
 242         }
 243       else if (lex_match_id ("FORMAT"))
 244         {
 245           lex_match ('=');
 246
 247           while (token == T_ID)
 248             {
 249               if (lex_match_id ("LIST"))
 250                 fmt = LIST;
 251               else if (lex_match_id ("FREE"))
 252                 fmt = FREE;
 253               else if (lex_match_id ("LOWER"))
 254                 section = LOWER;
 255               else if (lex_match_id ("UPPER"))
 256                 section = UPPER;
 257               else if (lex_match_id ("FULL"))
 258                 section = FULL;
 259               else if (lex_match_id ("DIAGONAL"))
 260                 diag = DIAGONAL;
 261               else if (lex_match_id ("NODIAGONAL"))
 262                 diag = NODIAGONAL;
 263               else
 264                 {
 265                   lex_error (_("in FORMAT subcommand"));
 266                   goto lossage;
 267                 }
 268             }
 269         }
 270       else if (lex_match_id ("SPLIT"))
 271         {
 272           lex_match ('=');
 273
 274           if (seen & 2)
 275             {
 276               msg (SE, _("SPLIT subcommand multiply specified."));
 277               goto lossage;
 278             }
 279           seen |= 2;
 280
 281           if (token != T_ID)
 282             {
 283               lex_error (_("in SPLIT subcommand"));
 284               goto lossage;
 285             }
 286
 287           if (dict_lookup_var (default_dict, tokid) == NULL
 288               && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
 289             {
 290               if (!strcmp (tokid, "ROWTYPE_") || !strcmp (tokid, "VARNAME_"))
 291                 {
 292                   msg (SE, _("Split variable may not be named ROWTYPE_ "
 293                              "or VARNAME_."));
 294                   goto lossage;
 295                 }
 296
 297               single_split = dict_create_var_assert (default_dict, tokid, 0);
 298               lex_get ();
 299
 300               single_split->p.mxd.vartype = MXD_CONTINUOUS;
 301
 302               dict_set_split_vars (default_dict, &single_split, 1);
 303             }
 304           else
 305             {
 306               struct variable **split;
 307               int n;
 308
 309               if (!parse_variables (default_dict, &split, &n, PV_NO_DUPLICATE))
 310                 goto lossage;
 311
 312               dict_set_split_vars (default_dict, split, n);
 313             }
 314
 315           {
 316             struct variable *const *split = dict_get_split_vars (default_dict);
 317             size_t split_cnt = dict_get_split_cnt (default_dict);
 318             int i;
 319
 320             for (i = 0; i < split_cnt; i++)
 321               {
 322                 if (split[i]->p.mxd.vartype != MXD_CONTINUOUS)
 323                   {
 324                     msg (SE, _("Split variable %s is already another type."),
 325                          tokid);
 326                     goto lossage;
 327                   }
 328                 split[i]->p.mxd.vartype = MXD_SPLIT;
 329                 split[i]->p.mxd.subtype = i;
 330               }
 331           }
 332         }
 333       else if (lex_match_id ("FACTORS"))
 334         {
 335           lex_match ('=');
 336
 337           if (seen & 4)
 338             {
 339               msg (SE, _("FACTORS subcommand multiply specified."));
 340               goto lossage;
 341             }
 342           seen |= 4;
 343
 344           if (!parse_variables (default_dict, &factors, &n_factors, PV_NONE))
 345             goto lossage;
 346
 347           {
 348             int i;
 349
 350             for (i = 0; i < n_factors; i++)
 351               {
 352                 if (factors[i]->p.mxd.vartype != MXD_CONTINUOUS)
 353                   {
 354                     msg (SE, _("Factor variable %s is already another type."),
 355                          tokid);
 356                     goto lossage;
 357                   }
 358                 factors[i]->p.mxd.vartype = MXD_FACTOR;
 359                 factors[i]->p.mxd.subtype = i;
 360               }
 361           }
 362         }
 363       else if (lex_match_id ("CELLS"))
 364         {
 365           lex_match ('=');
 366
 367           if (cells != -1)
 368             {
 369               msg (SE, _("CELLS subcommand multiply specified."));
 370               goto lossage;
 371             }
 372
 373           if (!lex_integer_p () || lex_integer () < 1)
 374             {
 375               lex_error (_("expecting positive integer"));
 376               goto lossage;
 377             }
 378
 379           cells = lex_integer ();
 380           lex_get ();
 381         }
 382       else if (lex_match_id ("N"))
 383         {
 384           lex_match ('=');
 385
 386           if (pop_n != -1)
 387             {
 388               msg (SE, _("N subcommand multiply specified."));
 389               goto lossage;
 390             }
 391
 392           if (!lex_integer_p () || lex_integer () < 1)
 393             {
 394               lex_error (_("expecting positive integer"));
 395               goto lossage;
 396             }
 397
 398           pop_n = lex_integer ();
 399           lex_get ();
 400         }
 401       else if (lex_match_id ("CONTENTS"))
 402         {
 403           int inside_parens = 0;
 404           unsigned collide = 0;
 405           int item;
 406
 407           if (seen & 8)
 408             {
 409               msg (SE, _("CONTENTS subcommand multiply specified."));
 410               goto lossage;
 411             }
 412           seen |= 8;
 413
 414           lex_match ('=');
 415
 416           {
 417             int i;
 418
 419             for (i = 0; i <= PROX; i++)
 420               is_per_factor[i] = 0;
 421           }
 422
 423           for (;;)
 424             {
 425               if (lex_match ('('))
 426                 {
 427                   if (inside_parens)
 428                     {
 429                       msg (SE, _("Nested parentheses not allowed."));
 430                       goto lossage;
 431                     }
 432                   inside_parens = 1;
 433                   item = LPAREN;
 434                 }
 435               else if (lex_match (')'))
 436                 {
 437                   if (!inside_parens)
 438                     {
 439                       msg (SE, _("Mismatched right parenthesis (`(')."));
 440                       goto lossage;
 441                     }
 442                   if (contents[n_contents - 1] == LPAREN)
 443                     {
 444                       msg (SE, _("Empty parentheses not allowed."));
 445                       goto lossage;
 446                     }
 447                   inside_parens = 0;
 448                   item = RPAREN;
 449                 }
 450               else
 451                 {
 452                   int content_type;
 453                   int collide_index;
 454
 455                   if (token != T_ID)
 456                     {
 457                       lex_error (_("in CONTENTS subcommand"));
 458                       goto lossage;
 459                     }
 460
 461                   content_type = string_to_content_type (tokid,
 462                                                          &collide_index);
 463                   if (content_type == -1)
 464                     {
 465                       lex_error (_("in CONTENTS subcommand"));
 466                       goto lossage;
 467                     }
 468                   lex_get ();
 469
 470                   if (collide & (1 << collide_index))
 471                     {
 472                       msg (SE, _("Content multiply specified for %s."),
 473                            content_names[content_type]);
 474                       goto lossage;
 475                     }
 476                   collide |= (1 << collide_index);
 477
 478                   item = content_type;
 479                   is_per_factor[item] = inside_parens;
 480                 }
 481               contents[n_contents++] = item;
 482
 483               if (token == '/' || token == '.')
 484                 break;
 485             }
 486
 487           if (inside_parens)
 488             {
 489               msg (SE, _("Missing right parenthesis."));
 490               goto lossage;
 491             }
 492           contents[n_contents] = EOC;
 493         }
 494       else
 495         {
 496           lex_error (NULL);
 497           goto lossage;
 498         }
 499     }
 500
 501   if (token != '.')
 502     {
 503       lex_error (_("expecting end of command"));
 504       goto lossage;
 505     }
 506
 507   if (!(seen & 1))
 508     {
 509       msg (SE, _("Missing VARIABLES subcommand."));
 510       goto lossage;
 511     }
 512
 513   if (!n_contents && !explicit_rowtype)
 514     {
 515       msg (SW, _("CONTENTS subcommand not specified: assuming file "
 516                  "contains only CORR matrix."));
 517
 518       contents[0] = CORR;
 519       contents[1] = EOC;
 520       n_contents = 0;
 521     }
 522
 523   if (n_factors && !explicit_rowtype && cells == -1)
 524     {
 525       msg (SE, _("Missing CELLS subcommand.  CELLS is required "
 526                  "when ROWTYPE_ is not given in the data and "
 527                  "factors are present."));
 528       goto lossage;
 529     }
 530
 531   if (explicit_rowtype && single_split)
 532     {
 533       msg (SE, _("Split file values must be present in the data when "
 534                  "ROWTYPE_ is present."));
 535       goto lossage;
 536     }
 537
 538   /* Create VARNAME_. */
 539   {
 540     varname_ = dict_create_var_assert (default_dict, "VARNAME_", 8);
 541     varname_->p.mxd.vartype = MXD_VARNAME;
 542     varname_->p.mxd.subtype = 0;
 543   }
 544
 545   /* Sort the dictionary variables into the desired order for the
 546      system file output. */
 547   {
 548     struct variable **v;
 549     size_t nv;
 550
 551     dict_get_vars (default_dict, &v, &nv, 0);
 552     qsort (v, nv, sizeof *v, compare_variables_by_mxd_vartype);
 553     dict_reorder_vars (default_dict, v, nv);
 554     free (v);
 555   }
 556
 557   /* Set formats. */
 558   {
 559     static const struct fmt_spec fmt_tab[MXD_COUNT] =
 560       {
 561         {FMT_F, 4, 0},
 562         {FMT_A, 8, 0},
 563         {FMT_F, 4, 0},
 564         {FMT_A, 8, 0},
 565         {FMT_F, 10, 4},
 566       };
 567
 568     int i;
 569
 570     first_continuous = -1;
 571     for (i = 0; i < dict_get_var_cnt (default_dict); i++)
 572       {
 573         struct variable *v = dict_get_var (default_dict, i);
 574         int type = v->p.mxd.vartype;
 575
 576         assert (type >= 0 && type < MXD_COUNT);
 577         v->print = v->write = fmt_tab[type];
 578
 579         if (type == MXD_CONTINUOUS)
 580           n_continuous++;
 581         if (first_continuous == -1 && type == MXD_CONTINUOUS)
 582           first_continuous = i;
 583       }
 584   }
 585
 586   if (n_continuous == 0)
 587     {
 588       msg (SE, _("No continuous variables specified."));
 589       goto lossage;
 590     }
 591
 592 #if DEBUGGING
 593   debug_print ();
 594 #endif
 595
 596   if (explicit_rowtype)
 597     read_matrices_with_rowtype ();
 598   else
 599     read_matrices_without_rowtype ();
 600
 601   pool_destroy (container);
 602
 603   return CMD_SUCCESS;
 604
 605 lossage:
 606   discard_variables ();
 607   free (factors);
 608   pool_destroy (container);
 609   return CMD_FAILURE;
 610 }
 611
 612 /* Look up string S as a content-type name and return the
 613    corresponding enumerated value, or -1 if there is no match.  If
 614    COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
 615    as a bit-index) which can be used for determining whether a related
 616    statistic has already been used. */
 617 static int
 618 string_to_content_type (char *s, int *collide)
 619 {
 620   static const struct
 621     {
 622       int value;
 623       int collide;
 624       const char *string;
 625     }
 626   *tp,
 627   tab[] =
 628     {
 629       {N_VECTOR, 0, "N_VECTOR"},
 630       {N_VECTOR, 0, "N"},
 631       {N_SCALAR, 0, "N_SCALAR"},
 632       {N_MATRIX, 1, "N_MATRIX"},
 633       {MEAN, 2, "MEAN"},
 634       {STDDEV, 3, "STDDEV"},
 635       {STDDEV, 3, "SD"},
 636       {COUNT, 4, "COUNT"},
 637       {MSE, 5, "MSE"},
 638       {DFE, 6, "DFE"},
 639       {MAT, 7, "MAT"},
 640       {COV, 8, "COV"},
 641       {CORR, 9, "CORR"},
 642       {PROX, 10, "PROX"},
 643       {-1, -1, NULL},
 644     };
 645
 646   for (tp = tab; tp->value != -1; tp++)
 647     if (!strcmp (s, tp->string))
 648       {
 649         if (collide)
 650           *collide = tp->collide;
 651
 652         return tp->value;
 653       }
 654   return -1;
 655 }
 656
 657 /* Compare two variables using p.mxd.vartype and p.mxd.subtype
 658    fields. */
 659 static int
 660 compare_variables_by_mxd_vartype (const void *a_, const void *b_)
 661 {
 662   struct variable *const *pa = a_;
 663   struct variable *const *pb = b_;
 664   const struct matrix_data_proc *a = &(*pa)->p.mxd;
 665   const struct matrix_data_proc *b = &(*pb)->p.mxd;
 666
 667   if (a->vartype != b->vartype)
 668     return a->vartype > b->vartype ? 1 : -1;
 669   else
 670     return a->subtype < b->subtype ? -1 : a->subtype > b->subtype;
 671 }
 672
 673 #if DEBUGGING
 674 /* Print out the command as input. */
 675 static void
 676 debug_print (void)
 677 {
 678   printf ("MATRIX DATA\n\t/VARIABLES=");
 679
 680   {
 681     int i;
 682
 683     for (i = 0; i < default_dict.nvar; i++)
 684       printf ("%s ", default_dict.var[i]->name);
 685   }
 686   printf ("\n");
 687
 688   printf ("\t/FORMAT=");
 689   if (fmt == LIST)
 690     printf ("LIST");
 691   else if (fmt == FREE)
 692     printf ("FREE");
 693   else
 694     assert (0);
 695   if (section == LOWER)
 696     printf (" LOWER");
 697   else if (section == UPPER)
 698     printf (" UPPER");
 699   else if (section == FULL)
 700     printf (" FULL");
 701   else
 702     assert (0);
 703   if (diag == DIAGONAL)
 704     printf (" DIAGONAL\n");
 705   else if (diag == NODIAGONAL)
 706     printf (" NODIAGONAL\n");
 707   else
 708     assert (0);
 709
 710   if (dict_get_split_cnt (default_dict) != 0)
 711     {
 712       int i;
 713
 714       printf ("\t/SPLIT=");
 715       for (i = 0; i < dict_get_split_cnt (default_dict); i++)
 716         printf ("%s ", dict_get_split_vars (default_dict)[i]->name);
 717       if (single_split)
 718         printf ("\t/* single split");
 719       printf ("\n");
 720     }
 721
 722   if (n_factors)
 723     {
 724       int i;
 725
 726       printf ("\t/FACTORS=");
 727       for (i = 0; i < n_factors; i++)
 728         printf ("%s ", factors[i]->name);
 729       printf ("\n");
 730     }
 731
 732   if (cells != -1)
 733     printf ("\t/CELLS=%d\n", cells);
 734
 735   if (pop_n != -1)
 736     printf ("\t/N=%d\n", pop_n);
 737
 738   if (n_contents)
 739     {
 740       int i;
 741       int space = 0;
 742
 743       printf ("\t/CONTENTS=");
 744       for (i = 0; i < n_contents; i++)
 745         {
 746           if (contents[i] == LPAREN)
 747             {
 748               if (space)
 749                 printf (" ");
 750               printf ("(");
 751               space = 0;
 752             }
 753           else if (contents[i] == RPAREN)
 754             {
 755               printf (")");
 756               space = 1;
 757             }
 758           else
 759             {
 760
 761               assert (contents[i] >= 0 && contents[i] <= PROX);
 762               if (space)
 763                 printf (" ");
 764               printf ("%s", content_names[contents[i]]);
 765               space = 1;
 766             }
 767         }
 768       printf ("\n");
 769     }
 770 }
 771 #endif /* DEBUGGING */
 772 \f
 773 /* Matrix tokenizer. */
 774
 775 /* Matrix token types. */
 776 enum
 777   {
 778     MNULL,              /* No token. */
 779     MNUM,               /* Number. */
 780     MSTR,               /* String. */
 781     MSTOP               /* End of file. */
 782   };
 783
 784 /* Current matrix token. */
 785 static int mtoken;
 786
 787 /* Token string if applicable; not null-terminated. */
 788 static char *mtokstr;
 789
 790 /* Length of mtokstr in characters. */
 791 static int mtoklen;
 792
 793 /* Token value if applicable. */
 794 static double mtokval;
 795
 796 static int mget_token (void);
 797
 798 #if DEBUGGING
 799 #define mget_token() mget_token_dump()
 800
 801 static int
 802 mget_token_dump (void)
 803 {
 804   int result = (mget_token) ();
 805   mdump_token ();
 806   return result;
 807 }
 808
 809 static void
 810 mdump_token (void)
 811 {
 812   switch (mtoken)
 813     {
 814     case MNULL:
 815       printf (" <NULLTOK>");
 816       break;
 817     case MNUM:
 818       printf (" #%g", mtokval);
 819       break;
 820     case MSTR:
 821       printf (" #'%.*s'", mtoklen, mtokstr);
 822       break;
 823     case MSTOP:
 824       printf (" <STOP>");
 825       break;
 826     default:
 827       assert (0);
 828     }
 829   fflush (stdout);
 830 }
 831 #endif
 832
 833 /* Return the current position in the data file. */
 834 static const char *
 835 context (void)
 836 {
 837   static char buf[32];
 838   int len;
 839   char *p = dfm_get_record (data_file, &len);
 840
 841   if (!p || !len)
 842     strcpy (buf, "at end of line");
 843   else
 844     {
 845       char *cp = buf;
 846       int n_copy = min (10, len);
 847       cp = stpcpy (buf, "before `");
 848       while (n_copy && isspace ((unsigned char) *p))
 849         p++, n_copy++;
 850       while (n_copy && !isspace ((unsigned char) *p))
 851         *cp++ = *p++, n_copy--;
 852       *cp++ = '\'';
 853       *cp = 0;
 854     }
 855
 856   return buf;
 857 }
 858
 859 /* Is there at least one token left in the data file? */
 860 static int
 861 another_token (void)
 862 {
 863   char *cp, *ep;
 864   int len;
 865
 866   if (mtoken == MSTOP)
 867     return 0;
 868
 869   for (;;)
 870     {
 871       cp = dfm_get_record (data_file, &len);
 872       if (!cp)
 873         return 0;
 874
 875       ep = cp + len;
 876       while (isspace ((unsigned char) *cp) && cp < ep)
 877         cp++;
 878
 879       if (cp < ep)
 880         break;
 881
 882       dfm_fwd_record (data_file);
 883     }
 884
 885   dfm_set_record (data_file, cp);
 886
 887   return 1;
 888 }
 889
 890 /* Parse a MATRIX DATA token from data_file into mtok*. */
 891 static int
 892 (mget_token) (void)
 893 {
 894   char *cp, *ep;
 895   int len;
 896   int first_column;
 897
 898   for (;;)
 899     {
 900       cp = dfm_get_record (data_file, &len);
 901       if (!cp)
 902         {
 903           if (mtoken == MSTOP)
 904             return 0;
 905           mtoken = MSTOP;
 906           return 1;
 907         }
 908
 909       ep = cp + len;
 910       while (isspace ((unsigned char) *cp) && cp < ep)
 911         cp++;
 912
 913       if (cp < ep)
 914         break;
 915
 916       dfm_fwd_record (data_file);
 917     }
 918
 919   dfm_set_record (data_file, cp);
 920   first_column = dfm_get_cur_col (data_file) + 1;
 921
 922   /* Three types of fields: quoted with ', quoted with ", unquoted. */
 923   if (*cp == '\'' || *cp == '"')
 924     {
 925       int quote = *cp;
 926
 927       mtoken = MSTR;
 928       mtokstr = ++cp;
 929       while (cp < ep && *cp != quote)
 930         cp++;
 931       mtoklen = cp - mtokstr;
 932       if (cp < ep)
 933         cp++;
 934       else
 935         msg (SW, _("Scope of string exceeds line."));
 936     }
 937   else
 938     {
 939       int is_num = isdigit ((unsigned char) *cp) || *cp == '.';
 940
 941       mtokstr = cp++;
 942       while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ','
 943              && *cp != '-' && *cp != '+')
 944         {
 945           if (isdigit ((unsigned char) *cp))
 946             is_num = 1;
 947
 948           if ((tolower ((unsigned char) *cp) == 'd'
 949                || tolower ((unsigned char) *cp) == 'e')
 950               && (cp[1] == '+' || cp[1] == '-'))
 951             cp += 2;
 952           else
 953             cp++;
 954         }
 955
 956       mtoklen = cp - mtokstr;
 957       assert (mtoklen);
 958
 959       if (is_num)
 960         {
 961           struct data_in di;
 962
 963           di.s = mtokstr;
 964           di.e = mtokstr + mtoklen;
 965           di.v = (union value *) &mtokval;
 966           di.f1 = first_column;
 967           di.format.type = FMT_F;
 968           di.format.w = mtoklen;
 969           di.format.d = 0;
 970
 971           if (!data_in (&di))
 972             return 0;
 973         }
 974       else
 975         mtoken = MSTR;
 976     }
 977
 978   dfm_set_record (data_file, cp);
 979
 980   return 1;
 981 }
 982
 983 /* Forcibly skip the end of a line for content type CONTENT in
 984    data_file. */
 985 static int
 986 force_eol (const char *content)
 987 {
 988   char *cp;
 989   int len;
 990
 991   if (fmt == FREE)
 992     return 1;
 993
 994   cp = dfm_get_record (data_file, &len);
 995   if (!cp)
 996     return 0;
 997   while (len && isspace (*cp))
 998     cp++, len--;
 999
1000   if (len)
1001     {
1002       msg (SE, _("End of line expected %s while reading %s."),
1003            context (), content);
1004       return 0;
1005     }
1006
1007   dfm_fwd_record (data_file);
1008
1009   return 1;
1010 }
1011 \f
1012 /* Back end, omitting ROWTYPE_. */
1013
1014 /* MATRIX DATA data. */
1015 static double ***nr_data;
1016
1017 /* Factor values. */
1018 static double *nr_factor_values;
1019
1020 /* Largest-numbered cell that we have read in thus far, plus one. */
1021 static int max_cell_index;
1022
1023 /* SPLIT FILE variable values. */
1024 static double *split_values;
1025
1026 static int nr_read_splits (int compare);
1027 static int nr_read_factors (int cell);
1028 static void nr_output_data (write_case_func *, write_case_data);
1029 static void matrix_data_read_without_rowtype (write_case_func *,
1030                                               write_case_data);
1031
1032 /* Read from the data file and write it to the active file. */
1033 static void
1034 read_matrices_without_rowtype (void)
1035 {
1036   if (cells == -1)
1037     cells = 1;
1038
1039   mtoken = MNULL;
1040   split_values = xmalloc (sizeof *split_values
1041                           * dict_get_split_cnt (default_dict));
1042   nr_factor_values = xmalloc (sizeof *nr_factor_values * n_factors * cells);
1043   max_cell_index = 0;
1044
1045   matrix_data_source.read = matrix_data_read_without_rowtype;
1046   vfm_source = &matrix_data_source;
1047
1048   procedure (NULL, NULL, NULL, NULL);
1049
1050   free (split_values);
1051   free (nr_factor_values);
1052
1053   fh_close_handle (data_file);
1054 }
1055
1056 /* Mirror data across the diagonal of matrix CP which contains
1057    CONTENT type data. */
1058 static void
1059 fill_matrix (int content, double *cp)
1060 {
1061   int type = content_type[content];
1062
1063   if (type == 1 && section != FULL)
1064     {
1065       if (diag == NODIAGONAL)
1066         {
1067           const double fill = content == CORR ? 1.0 : SYSMIS;
1068           int i;
1069
1070           for (i = 0; i < n_continuous; i++)
1071             cp[i * (1 + n_continuous)] = fill;
1072         }
1073
1074       {
1075         int c, r;
1076
1077         if (section == LOWER)
1078           {
1079             int n_lines = n_continuous;
1080             if (section != FULL && diag == NODIAGONAL)
1081               n_lines--;
1082
1083             for (r = 1; r < n_lines; r++)
1084               for (c = 0; c < r; c++)
1085                 cp[r + c * n_continuous] = cp[c + r * n_continuous];
1086           }
1087         else
1088           {
1089             assert (section == UPPER);
1090             for (r = 1; r < n_continuous; r++)
1091               for (c = 0; c < r; c++)
1092                 cp[c + r * n_continuous] = cp[r + c * n_continuous];
1093           }
1094       }
1095     }
1096   else if (type == 2)
1097     {
1098       int c;
1099
1100       for (c = 1; c < n_continuous; c++)
1101         cp[c] = cp[0];
1102     }
1103 }
1104
1105 /* Read data lines for content type CONTENT from the data file.  If
1106    PER_FACTOR is nonzero, then factor information is read from the
1107    data file.  Data is for cell number CELL. */
1108 static int
1109 nr_read_data_lines (int per_factor, int cell, int content, int compare)
1110 {
1111   /* Content type. */
1112   const int type = content_type[content];
1113
1114   /* Number of lines that must be parsed from the data file for this
1115      content type. */
1116   int n_lines;
1117
1118   /* Current position in vector or matrix. */
1119   double *cp;
1120
1121   /* Counter. */
1122   int i;
1123
1124   if (type != 1)
1125     n_lines = 1;
1126   else
1127     {
1128       n_lines = n_continuous;
1129       if (section != FULL && diag == NODIAGONAL)
1130         n_lines--;
1131     }
1132
1133   cp = nr_data[content][cell];
1134   if (type == 1 && section == LOWER && diag == NODIAGONAL)
1135     cp += n_continuous;
1136
1137   for (i = 0; i < n_lines; i++)
1138     {
1139       int n_cols;
1140
1141       if (!nr_read_splits (1))
1142         return 0;
1143       if (per_factor && !nr_read_factors (cell))
1144         return 0;
1145       compare = 1;
1146
1147       switch (type)
1148         {
1149         case 0:
1150           n_cols = n_continuous;
1151           break;
1152         case 1:
1153           switch (section)
1154             {
1155             case LOWER:
1156               n_cols = i + 1;
1157               break;
1158             case UPPER:
1159               cp += i;
1160               n_cols = n_continuous - i;
1161               if (diag == NODIAGONAL)
1162                 {
1163                   n_cols--;
1164                   cp++;
1165                 }
1166               break;
1167             case FULL:
1168               n_cols = n_continuous;
1169               break;
1170             default:
1171               assert (0);
1172             }
1173           break;
1174         case 2:
1175           n_cols = 1;
1176           break;
1177         default:
1178           assert (0);
1179         }
1180
1181       {
1182         int j;
1183
1184         for (j = 0; j < n_cols; j++)
1185           {
1186             if (!mget_token ())
1187               return 0;
1188             if (mtoken != MNUM)
1189               {
1190                 msg (SE, _("expecting value for %s %s"),
1191                      dict_get_var (default_dict, j)->name, context ());
1192                 return 0;
1193               }
1194
1195             *cp++ = mtokval;
1196           }
1197         if (!force_eol (content_names[content]))
1198           return 0;
1199         debug_printf (("\n"));
1200       }
1201
1202       if (section == LOWER)
1203         cp += n_continuous - n_cols;
1204     }
1205
1206   fill_matrix (content, nr_data[content][cell]);
1207
1208   return 1;
1209 }
1210
1211 /* When ROWTYPE_ does not appear in the data, reads the matrices and
1212    writes them to the output file.  Returns success. */
1213 static void
1214 matrix_data_read_without_rowtype (write_case_func *write_case,
1215                                   write_case_data wc_data)
1216 {
1217   {
1218     int *cp;
1219
1220     nr_data = pool_alloc (container, (PROX + 1) * sizeof *nr_data);
1221
1222     {
1223       int i;
1224
1225       for (i = 0; i <= PROX; i++)
1226         nr_data[i] = NULL;
1227     }
1228
1229     for (cp = contents; *cp != EOC; cp++)
1230       if (*cp != LPAREN && *cp != RPAREN)
1231         {
1232           int per_factor = is_per_factor[*cp];
1233           int n_entries;
1234
1235           n_entries = n_continuous;
1236           if (content_type[*cp] == 1)
1237             n_entries *= n_continuous;
1238
1239           {
1240             int n_vectors = per_factor ? cells : 1;
1241             int i;
1242
1243             nr_data[*cp] = pool_alloc (container,
1244                                        n_vectors * sizeof **nr_data);
1245
1246             for (i = 0; i < n_vectors; i++)
1247               nr_data[*cp][i] = pool_alloc (container,
1248                                             n_entries * sizeof ***nr_data);
1249           }
1250         }
1251   }
1252
1253   for (;;)
1254     {
1255       int *bp, *ep, *np;
1256
1257       if (!nr_read_splits (0))
1258         return;
1259
1260       for (bp = contents; *bp != EOC; bp = np)
1261         {
1262           int per_factor;
1263
1264           /* Trap the CONTENTS that we should parse in this pass
1265              between bp and ep.  Set np to the starting bp for next
1266              iteration. */
1267           if (*bp == LPAREN)
1268             {
1269               ep = ++bp;
1270               while (*ep != RPAREN)
1271                 ep++;
1272               np = &ep[1];
1273               per_factor = 1;
1274             }
1275           else
1276             {
1277               ep = &bp[1];
1278               while (*ep != EOC && *ep != LPAREN)
1279                 ep++;
1280               np = ep;
1281               per_factor = 0;
1282             }
1283
1284           {
1285             int i;
1286
1287             for (i = 0; i < (per_factor ? cells : 1); i++)
1288               {
1289                 int *cp;
1290
1291                 for (cp = bp; cp < ep; cp++)
1292                   if (!nr_read_data_lines (per_factor, i, *cp, cp != bp))
1293                     return;
1294               }
1295           }
1296         }
1297
1298       nr_output_data (write_case, wc_data);
1299
1300       if (dict_get_split_cnt (default_dict) == 0 || !another_token ())
1301         return;
1302     }
1303 }
1304
1305 /* Read the split file variables.  If COMPARE is 1, compares the
1306    values read to the last values read and returns 1 if they're equal,
1307    0 otherwise. */
1308 static int
1309 nr_read_splits (int compare)
1310 {
1311   static int just_read = 0;
1312   size_t split_cnt;
1313   size_t i;
1314
1315   if (compare && just_read)
1316     {
1317       just_read = 0;
1318       return 1;
1319     }
1320
1321   if (dict_get_split_vars (default_dict) == NULL)
1322     return 1;
1323
1324   if (single_split)
1325     {
1326       if (!compare)
1327         split_values[0]
1328           = ++dict_get_split_vars (default_dict)[0]->p.mxd.subtype;
1329       return 1;
1330     }
1331
1332   if (!compare)
1333     just_read = 1;
1334
1335   split_cnt = dict_get_split_cnt (default_dict);
1336   for (i = 0; i < split_cnt; i++)
1337     {
1338       if (!mget_token ())
1339         return 0;
1340       if (mtoken != MNUM)
1341         {
1342           msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
1343                context ());
1344           return 0;
1345         }
1346
1347       if (!compare)
1348         split_values[i] = mtokval;
1349       else if (split_values[i] != mtokval)
1350         {
1351           msg (SE, _("Expecting value %g for %s."),
1352                split_values[i], dict_get_split_vars (default_dict)[i]->name);
1353           return 0;
1354         }
1355     }
1356
1357   return 1;
1358 }
1359
1360 /* Read the factors for cell CELL.  If COMPARE is 1, compares the
1361    values read to the last values read and returns 1 if they're equal,
1362    0 otherwise. */
1363 static int
1364 nr_read_factors (int cell)
1365 {
1366   int compare;
1367
1368   if (n_factors == 0)
1369     return 1;
1370
1371   assert (max_cell_index >= cell);
1372   if (cell != max_cell_index)
1373     compare = 1;
1374   else
1375     {
1376       compare = 0;
1377       max_cell_index++;
1378     }
1379
1380   {
1381     int i;
1382
1383     for (i = 0; i < n_factors; i++)
1384       {
1385         if (!mget_token ())
1386           return 0;
1387         if (mtoken != MNUM)
1388           {
1389             msg (SE, _("Syntax error expecting factor value %s."),
1390                  context ());
1391             return 0;
1392           }
1393
1394         if (!compare)
1395           nr_factor_values[i + n_factors * cell] = mtokval;
1396         else if (nr_factor_values[i + n_factors * cell] != mtokval)
1397           {
1398             msg (SE, _("Syntax error expecting value %g for %s %s."),
1399                  nr_factor_values[i + n_factors * cell],
1400                  factors[i]->name, context ());
1401             return 0;
1402           }
1403       }
1404   }
1405
1406   return 1;
1407 }
1408
1409 /* Write the contents of a cell having content type CONTENT and data
1410    CP to the active file. */
1411 static void
1412 dump_cell_content (int content, double *cp,
1413                    write_case_func *write_case, write_case_data wc_data)
1414 {
1415   int type = content_type[content];
1416
1417   {
1418     st_bare_pad_copy (temp_case->data[rowtype_->fv].s,
1419                       content_names[content], 8);
1420
1421     if (type != 1)
1422       memset (&temp_case->data[varname_->fv].s, ' ', 8);
1423   }
1424
1425   {
1426     int n_lines = (type == 1) ? n_continuous : 1;
1427     int i;
1428
1429     for (i = 0; i < n_lines; i++)
1430       {
1431         int j;
1432
1433         for (j = 0; j < n_continuous; j++)
1434           {
1435             int fv = dict_get_var (default_dict, first_continuous + j)->fv;
1436             temp_case->data[fv].f = *cp;
1437             cp++;
1438           }
1439         if (type == 1)
1440           st_bare_pad_copy (temp_case->data[varname_->fv].s,
1441                             dict_get_var (default_dict,
1442                                           first_continuous + i)->name,
1443                             8);
1444         write_case (wc_data);
1445       }
1446   }
1447 }
1448
1449 /* Finally dump out everything from nr_data[] to the output file. */
1450 static void
1451 nr_output_data (write_case_func *write_case, write_case_data wc_data)
1452 {
1453   {
1454     struct variable *const *split;
1455     size_t split_cnt;
1456     size_t i;
1457
1458     split_cnt = dict_get_split_cnt (default_dict);
1459     for (i = 0; i < split_cnt; i++)
1460       temp_case->data[split[i]->fv].f = split_values[i];
1461   }
1462
1463   if (n_factors)
1464     {
1465       int cell;
1466
1467       for (cell = 0; cell < cells; cell++)
1468         {
1469           {
1470             int factor;
1471
1472             for (factor = 0; factor < n_factors; factor++)
1473               {
1474                 temp_case->data[factors[factor]->fv].f
1475                   = nr_factor_values[factor + cell * n_factors];
1476                 debug_printf (("f:%s ", factors[factor]->name));
1477               }
1478           }
1479
1480           {
1481             int content;
1482
1483             for (content = 0; content <= PROX; content++)
1484               if (is_per_factor[content])
1485                 {
1486                   assert (nr_data[content] != NULL
1487                           && nr_data[content][cell] != NULL);
1488
1489                   dump_cell_content (content, nr_data[content][cell],
1490                                      write_case, wc_data);
1491                 }
1492           }
1493         }
1494     }
1495
1496   {
1497     int content;
1498
1499     {
1500       int factor;
1501
1502       for (factor = 0; factor < n_factors; factor++)
1503         temp_case->data[factors[factor]->fv].f = SYSMIS;
1504     }
1505
1506     for (content = 0; content <= PROX; content++)
1507       if (!is_per_factor[content] && nr_data[content] != NULL)
1508         dump_cell_content (content, nr_data[content][0],
1509                            write_case, wc_data);
1510   }
1511 }
1512 \f
1513 /* Back end, with ROWTYPE_. */
1514
1515 /* Type of current row. */
1516 static int wr_content;
1517
1518 /* All the data for one set of factor values. */
1519 struct factor_data
1520   {
1521     double *factors;
1522     int n_rows[PROX + 1];
1523     double *data[PROX + 1];
1524     struct factor_data *next;
1525   };
1526
1527 /* All the data, period. */
1528 struct factor_data *wr_data;
1529
1530 /* Current factor. */
1531 struct factor_data *wr_current;
1532
1533 static int wr_read_splits (write_case_func *, write_case_data);
1534 static int wr_output_data (write_case_func *, write_case_data);
1535 static int wr_read_rowtype (void);
1536 static int wr_read_factors (void);
1537 static int wr_read_indeps (void);
1538 static void matrix_data_read_with_rowtype (write_case_func *,
1539                                            write_case_data);
1540
1541 /* When ROWTYPE_ appears in the data, reads the matrices and writes
1542    them to the output file. */
1543 static void
1544 read_matrices_with_rowtype (void)
1545 {
1546   mtoken = MNULL;
1547   wr_data = wr_current = NULL;
1548   split_values = NULL;
1549   cells = 0;
1550
1551   matrix_data_source.read = matrix_data_read_with_rowtype;
1552   vfm_source = &matrix_data_source;
1553
1554   procedure (NULL, NULL, NULL, NULL);
1555
1556   free (split_values);
1557   fh_close_handle (data_file);
1558 }
1559
1560 /* Read from the data file and write it to the active file. */
1561 static void
1562 matrix_data_read_with_rowtype (write_case_func *write_case,
1563                                write_case_data wc_data)
1564 {
1565   do
1566     {
1567       if (!wr_read_splits (write_case, wc_data))
1568         return;
1569
1570       if (!wr_read_factors ())
1571         return;
1572
1573       if (!wr_read_indeps ())
1574         return;
1575     }
1576   while (another_token ());
1577
1578   wr_output_data (write_case, wc_data);
1579 }
1580
1581 /* Read the split file variables.  If they differ from the previous
1582    set of split variables then output the data.  Returns success. */
1583 static int
1584 wr_read_splits (write_case_func *write_case, write_case_data wc_data)
1585 {
1586   int compare;
1587   size_t split_cnt;
1588
1589   split_cnt = dict_get_split_cnt (default_dict);
1590   if (split_cnt == 0)
1591     return 1;
1592
1593   if (split_values)
1594     compare = 1;
1595   else
1596     {
1597       compare = 0;
1598       split_values = xmalloc (split_cnt * sizeof *split_values);
1599     }
1600
1601   {
1602     int different = 0;
1603     size_t split_cnt;
1604     int i;
1605
1606     for (i = 0; i < split_cnt; i++)
1607       {
1608         if (!mget_token ())
1609           return 0;
1610         if (mtoken != MNUM)
1611           {
1612             msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
1613                  context ());
1614             return 0;
1615           }
1616
1617         if (compare && split_values[i] != mtokval && !different)
1618           {
1619             if (!wr_output_data (write_case, wc_data))
1620               return 0;
1621             different = 1;
1622             cells = 0;
1623           }
1624         split_values[i] = mtokval;
1625       }
1626   }
1627
1628   return 1;
1629 }
1630
1631 /* Compares doubles A and B, treating SYSMIS as greatest. */
1632 static int
1633 compare_doubles (const void *a_, const void *b_, void *aux UNUSED)
1634 {
1635   const double *a = a_;
1636   const double *b = b_;
1637
1638   if (*a == *b)
1639     return 0;
1640   else if (*a == SYSMIS)
1641     return 1;
1642   else if (*b == SYSMIS)
1643     return -1;
1644   else if (*a > *b)
1645     return 1;
1646   else
1647     return -1;
1648 }
1649
1650 /* Return strcmp()-type comparison of the n_factors factors at _A and
1651    _B.  Sort missing values toward the end. */
1652 static int
1653 compare_factors (const void *a_, const void *b_)
1654 {
1655   struct factor_data *const *pa = a_;
1656   struct factor_data *const *pb = b_;
1657   const double *a = (*pa)->factors;
1658   const double *b = (*pb)->factors;
1659
1660   return lexicographical_compare (a, n_factors,
1661                                   b, n_factors,
1662                                   sizeof *a,
1663                                   compare_doubles, NULL);
1664 }
1665
1666 /* Write out the data for the current split file to the active
1667    file. */
1668 static int
1669 wr_output_data (write_case_func *write_case, write_case_data wc_data)
1670 {
1671   {
1672     struct variable *const *split;
1673     size_t split_cnt;
1674     size_t i;
1675
1676     split_cnt = dict_get_split_cnt (default_dict);
1677     for (i = 0; i < split_cnt; i++)
1678       temp_case->data[split[i]->fv].f = split_values[i];
1679   }
1680
1681   /* Sort the wr_data list. */
1682   {
1683     struct factor_data **factors;
1684     struct factor_data *iter;
1685     int i;
1686
1687     factors = xmalloc (sizeof *factors * cells);
1688
1689     for (i = 0, iter = wr_data; iter; iter = iter->next, i++)
1690       factors[i] = iter;
1691
1692     qsort (factors, cells, sizeof *factors, compare_factors);
1693
1694     wr_data = factors[0];
1695     for (i = 0; i < cells - 1; i++)
1696       factors[i]->next = factors[i + 1];
1697     factors[cells - 1]->next = NULL;
1698
1699     free (factors);
1700   }
1701
1702   /* Write out records for every set of factor values. */
1703   {
1704     struct factor_data *iter;
1705
1706     for (iter = wr_data; iter; iter = iter->next)
1707       {
1708         {
1709           int factor;
1710
1711           for (factor = 0; factor < n_factors; factor++)
1712             {
1713               temp_case->data[factors[factor]->fv].f
1714                 = iter->factors[factor];
1715               debug_printf (("f:%s ", factors[factor]->name));
1716             }
1717         }
1718
1719         {
1720           int content;
1721
1722           for (content = 0; content <= PROX; content++)
1723             {
1724               if (!iter->n_rows[content])
1725                 continue;
1726
1727               {
1728                 int type = content_type[content];
1729                 int n_lines = (type == 1
1730                                ? (n_continuous
1731                                   - (section != FULL && diag == NODIAGONAL))
1732                                : 1);
1733
1734                 if (n_lines != iter->n_rows[content])
1735                   {
1736                     msg (SE, _("Expected %d lines of data for %s content; "
1737                                "actually saw %d lines.  No data will be "
1738                                "output for this content."),
1739                          n_lines, content_names[content],
1740                          iter->n_rows[content]);
1741                     continue;
1742                   }
1743               }
1744
1745               fill_matrix (content, iter->data[content]);
1746
1747               dump_cell_content (content, iter->data[content],
1748                                  write_case, wc_data);
1749             }
1750         }
1751       }
1752   }
1753
1754   pool_destroy (container);
1755   container = pool_create ();
1756
1757   wr_data = wr_current = NULL;
1758
1759   return 1;
1760 }
1761
1762 /* Read ROWTYPE_ from the data file.  Return success. */
1763 static int
1764 wr_read_rowtype (void)
1765 {
1766   if (wr_content != -1)
1767     {
1768       msg (SE, _("Multiply specified ROWTYPE_ %s."), context ());
1769       return 0;
1770     }
1771   if (mtoken != MSTR)
1772     {
1773       msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), context ());
1774       return 0;
1775     }
1776
1777   {
1778     char s[16];
1779     char *cp;
1780
1781     memcpy (s, mtokstr, min (15, mtoklen));
1782     s[min (15, mtoklen)] = 0;
1783
1784     for (cp = s; *cp; cp++)
1785       *cp = toupper ((unsigned char) *cp);
1786
1787     wr_content = string_to_content_type (s, NULL);
1788   }
1789
1790   if (wr_content == -1)
1791     {
1792       msg (SE, _("Syntax error %s."), context ());
1793       return 0;
1794     }
1795
1796   return 1;
1797 }
1798
1799 /* Read the factors for the current row.  Select a set of factors and
1800    point wr_current to it. */
1801 static int
1802 wr_read_factors (void)
1803 {
1804   double *factor_values = local_alloc (sizeof *factor_values * n_factors);
1805
1806   wr_content = -1;
1807   {
1808     int i;
1809
1810     for (i = 0; i < n_factors; i++)
1811       {
1812         if (!mget_token ())
1813           goto lossage;
1814         if (mtoken == MSTR)
1815           {
1816             if (!wr_read_rowtype ())
1817               goto lossage;
1818             if (!mget_token ())
1819               goto lossage;
1820           }
1821         if (mtoken != MNUM)
1822           {
1823             msg (SE, _("Syntax error expecting factor value %s."),
1824                  context ());
1825             goto lossage;
1826           }
1827
1828         factor_values[i] = mtokval;
1829       }
1830   }
1831   if (wr_content == -1)
1832     {
1833       if (!mget_token ())
1834         goto lossage;
1835       if (!wr_read_rowtype ())
1836         goto lossage;
1837     }
1838
1839   /* Try the most recent factor first as a simple caching
1840      mechanism. */
1841   if (wr_current)
1842     {
1843       int i;
1844
1845       for (i = 0; i < n_factors; i++)
1846         if (factor_values[i] != wr_current->factors[i])
1847           goto cache_miss;
1848       goto winnage;
1849     }
1850
1851   /* Linear search through the list. */
1852 cache_miss:
1853   {
1854     struct factor_data *iter;
1855
1856     for (iter = wr_data; iter; iter = iter->next)
1857       {
1858         int i;
1859
1860         for (i = 0; i < n_factors; i++)
1861           if (factor_values[i] != iter->factors[i])
1862             goto next_item;
1863
1864         wr_current = iter;
1865         goto winnage;
1866
1867       next_item: ;
1868       }
1869   }
1870
1871   /* Not found.  Make a new item. */
1872   {
1873     struct factor_data *new = pool_alloc (container, sizeof *new);
1874
1875     new->factors = pool_alloc (container, sizeof *new->factors * n_factors);
1876
1877     {
1878       int i;
1879
1880       for (i = 0; i < n_factors; i++)
1881         new->factors[i] = factor_values[i];
1882     }
1883
1884     {
1885       int i;
1886
1887       for (i = 0; i <= PROX; i++)
1888         {
1889           new->n_rows[i] = 0;
1890           new->data[i] = NULL;
1891         }
1892     }
1893
1894     new->next = wr_data;
1895     wr_data = wr_current = new;
1896     cells++;
1897   }
1898
1899 winnage:
1900   local_free (factor_values);
1901   return 1;
1902
1903 lossage:
1904   local_free (factor_values);
1905   return 0;
1906 }
1907
1908 /* Read the independent variables into wr_current. */
1909 static int
1910 wr_read_indeps (void)
1911 {
1912   struct factor_data *c = wr_current;
1913   const int type = content_type[wr_content];
1914   const int n_rows = c->n_rows[wr_content];
1915   double *cp;
1916   int n_cols;
1917
1918   /* Allocate room for data if necessary. */
1919   if (c->data[wr_content] == NULL)
1920     {
1921       int n_items = n_continuous;
1922       if (type == 1)
1923         n_items *= n_continuous;
1924
1925       c->data[wr_content] = pool_alloc (container,
1926                                         sizeof **c->data * n_items);
1927     }
1928
1929   cp = &c->data[wr_content][n_rows * n_continuous];
1930
1931   /* Figure out how much to read from this line. */
1932   switch (type)
1933     {
1934     case 0:
1935     case 2:
1936       if (n_rows > 0)
1937         {
1938           msg (SE, _("Duplicate specification for %s."),
1939                content_names[wr_content]);
1940           return 0;
1941         }
1942       if (type == 0)
1943         n_cols = n_continuous;
1944       else
1945         n_cols = 1;
1946       break;
1947     case 1:
1948       if (n_rows >= n_continuous - (section != FULL && diag == NODIAGONAL))
1949         {
1950           msg (SE, _("Too many rows of matrix data for %s."),
1951                content_names[wr_content]);
1952           return 0;
1953         }
1954
1955       switch (section)
1956         {
1957         case LOWER:
1958           n_cols = n_rows + 1;
1959           if (diag == NODIAGONAL)
1960             cp += n_continuous;
1961           break;
1962         case UPPER:
1963           cp += n_rows;
1964           n_cols = n_continuous - n_rows;
1965           if (diag == NODIAGONAL)
1966             {
1967               n_cols--;
1968               cp++;
1969             }
1970           break;
1971         case FULL:
1972           n_cols = n_continuous;
1973           break;
1974         default:
1975           assert (0);
1976         }
1977       break;
1978     default:
1979       assert (0);
1980     }
1981   c->n_rows[wr_content]++;
1982
1983   debug_printf ((" (c=%p,r=%d,n=%d)", c, n_rows + 1, n_cols));
1984
1985   /* Read N_COLS items at CP. */
1986   {
1987     int j;
1988
1989     for (j = 0; j < n_cols; j++)
1990       {
1991         if (!mget_token ())
1992           return 0;
1993         if (mtoken != MNUM)
1994           {
1995             msg (SE, _("Syntax error expecting value for %s %s."),
1996                  dict_get_var (default_dict, first_continuous + j)->name,
1997                  context ());
1998             return 0;
1999           }
2000
2001         *cp++ = mtokval;
2002       }
2003     if (!force_eol (content_names[wr_content]))
2004       return 0;
2005     debug_printf (("\n"));
2006   }
2007
2008   return 1;
2009 }
2010 \f
2011 /* Matrix source. */
2012
2013 struct case_stream matrix_data_source =
2014   {
2015     NULL,
2016     NULL,
2017     NULL,
2018     NULL,
2019     NULL,
2020     NULL,
2021     "MATRIX DATA",
2022   };
2023