src/matrix-data.c

   1 /* PSPP - computes sample statistics.
   2    Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
   3    Written by Ben Pfaff <blp@gnu.org>.
   4
   5    This program is free software; you can redistribute it and/or
   6    modify it under the terms of the GNU General Public License as
   7    published by the Free Software Foundation; either version 2 of the
   8    License, or (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful, but
  11    WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13    General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  18    02111-1307, USA. */
  19
  20 #include <config.h>
  21 #include "error.h"
  22 #include <stdlib.h>
  23 #include <ctype.h>
  24 #include <float.h>
  25 #include "algorithm.h"
  26 #include "alloc.h"
  27 #include "case.h"
  28 #include "command.h"
  29 #include "data-in.h"
  30 #include "dfm.h"
  31 #include "error.h"
  32 #include "file-handle.h"
  33 #include "lexer.h"
  34 #include "misc.h"
  35 #include "pool.h"
  36 #include "str.h"
  37 #include "var.h"
  38 #include "vfm.h"
  39
  40 #include "debug-print.h"
  41
  42 /* FIXME: /N subcommand not implemented.  It should be pretty simple,
  43    too. */
  44
  45 /* Format type enums. */
  46 enum format_type
  47   {
  48     LIST,
  49     FREE
  50   };
  51
  52 /* Matrix section enums. */
  53 enum matrix_section
  54   {
  55     LOWER,
  56     UPPER,
  57     FULL
  58   };
  59
  60 /* Diagonal inclusion enums. */
  61 enum include_diagonal
  62   {
  63     DIAGONAL,
  64     NODIAGONAL
  65   };
  66
  67 /* CONTENTS types. */
  68 enum content_type
  69   {
  70     N_VECTOR,
  71     N_SCALAR,
  72     N_MATRIX,
  73     MEAN,
  74     STDDEV,
  75     COUNT,
  76     MSE,
  77     DFE,
  78     MAT,
  79     COV,
  80     CORR,
  81     PROX,
  82
  83     LPAREN,
  84     RPAREN,
  85     EOC
  86   };
  87
  88 /* 0=vector, 1=matrix, 2=scalar. */
  89 static const int content_type[PROX + 1] =
  90   {
  91     0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
  92   };
  93
  94 /* Name of each content type. */
  95 static const char *content_names[PROX + 1] =
  96   {
  97     "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
  98     "DFE", "MAT", "COV", "CORR", "PROX",
  99   };
 100
 101 /* A MATRIX DATA input program. */
 102 struct matrix_data_pgm
 103   {
 104     struct pool *container;     /* Arena used for all allocations. */
 105     struct file_handle *data_file; /* The data file to be read. */
 106
 107     /* Format. */
 108     enum format_type fmt;       /* LIST or FREE. */
 109     enum matrix_section section;/* LOWER or UPPER or FULL. */
 110     enum include_diagonal diag; /* DIAGONAL or NODIAGONAL. */
 111
 112     int explicit_rowtype;       /* ROWTYPE_ specified explicitly in data? */
 113     struct variable *rowtype_, *varname_; /* ROWTYPE_, VARNAME_ variables. */
 114
 115     struct variable *single_split; /* Single SPLIT FILE variable. */
 116
 117     /* Factor variables.  */
 118     int n_factors;              /* Number of factor variables. */
 119     struct variable **factors;  /* Factor variables. */
 120     int is_per_factor[PROX + 1]; /* Is there per-factor data? */
 121
 122     int cells;                  /* Number of cells, or -1 if none. */
 123
 124     int pop_n;                  /* Population N specified by user. */
 125
 126     /* CONTENTS subcommand. */
 127     int contents[EOC * 3 + 1];  /* Contents. */
 128     int n_contents;             /* Number of entries. */
 129
 130     /* Continuous variables. */
 131     int n_continuous;           /* Number of continuous variables. */
 132     int first_continuous;       /* Index into default_dict.var of
 133                                    first continuous variable. */
 134   };
 135
 136 static const struct case_source_class matrix_data_with_rowtype_source_class;
 137 static const struct case_source_class matrix_data_without_rowtype_source_class;
 138
 139 static int compare_variables_by_mxd_vartype (const void *pa,
 140                                              const void *pb);
 141 static void read_matrices_without_rowtype (struct matrix_data_pgm *);
 142 static void read_matrices_with_rowtype (struct matrix_data_pgm *);
 143 static int string_to_content_type (char *, int *);
 144
 145 int
 146 cmd_matrix_data (void)
 147 {
 148   struct pool *pool;
 149   struct matrix_data_pgm *mx;
 150
 151   unsigned seen = 0;
 152
 153   discard_variables ();
 154
 155   pool = pool_create ();
 156   mx = pool_alloc (pool, sizeof *mx);
 157   mx->container = pool;
 158   mx->data_file = inline_file;
 159   mx->fmt = LIST;
 160   mx->section = LOWER;
 161   mx->diag = DIAGONAL;
 162   mx->explicit_rowtype = 0;
 163   mx->rowtype_ = NULL;
 164   mx->varname_ = NULL;
 165   mx->single_split = NULL;
 166   mx->n_factors = 0;
 167   mx->factors = NULL;
 168   memset (mx->is_per_factor, 0, sizeof mx->is_per_factor);
 169   mx->cells = -1;
 170   mx->pop_n = -1;
 171   mx->n_contents = 0;
 172   mx->n_continuous = 0;
 173   mx->first_continuous = 0;
 174   while (token != '.')
 175     {
 176       lex_match ('/');
 177
 178       if (lex_match_id ("VARIABLES"))
 179         {
 180           char **v;
 181           int nv;
 182
 183           if (seen & 1)
 184             {
 185               msg (SE, _("VARIABLES subcommand multiply specified."));
 186               goto lossage;
 187             }
 188           seen |= 1;
 189
 190           lex_match ('=');
 191           if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
 192             goto lossage;
 193
 194           {
 195             int i;
 196
 197             for (i = 0; i < nv; i++)
 198               if (!strcmp (v[i], "VARNAME_"))
 199                 {
 200                   msg (SE, _("VARNAME_ cannot be explicitly specified on "
 201                              "VARIABLES."));
 202                   for (i = 0; i < nv; i++)
 203                     free (v[i]);
 204                   free (v);
 205                   goto lossage;
 206                 }
 207           }
 208
 209           {
 210             int i;
 211
 212             for (i = 0; i < nv; i++)
 213               {
 214                 struct variable *new_var;
 215
 216                 if (strcmp (v[i], "ROWTYPE_"))
 217                   {
 218                     new_var = dict_create_var_assert (default_dict, v[i], 0);
 219                     new_var->p.mxd.vartype = MXD_CONTINUOUS;
 220                     new_var->p.mxd.subtype = i;
 221                   }
 222                 else
 223                   mx->explicit_rowtype = 1;
 224                 free (v[i]);
 225               }
 226             free (v);
 227           }
 228
 229           {
 230             mx->rowtype_ = dict_create_var_assert (default_dict,
 231                                                    "ROWTYPE_", 8);
 232             mx->rowtype_->p.mxd.vartype = MXD_ROWTYPE;
 233             mx->rowtype_->p.mxd.subtype = 0;
 234           }
 235         }
 236       else if (lex_match_id ("FILE"))
 237         {
 238           lex_match ('=');
 239           mx->data_file = fh_parse_file_handle ();
 240           if (mx->data_file == NULL)
 241             goto lossage;
 242         }
 243       else if (lex_match_id ("FORMAT"))
 244         {
 245           lex_match ('=');
 246
 247           while (token == T_ID)
 248             {
 249               if (lex_match_id ("LIST"))
 250                 mx->fmt = LIST;
 251               else if (lex_match_id ("FREE"))
 252                 mx->fmt = FREE;
 253               else if (lex_match_id ("LOWER"))
 254                 mx->section = LOWER;
 255               else if (lex_match_id ("UPPER"))
 256                 mx->section = UPPER;
 257               else if (lex_match_id ("FULL"))
 258                 mx->section = FULL;
 259               else if (lex_match_id ("DIAGONAL"))
 260                 mx->diag = DIAGONAL;
 261               else if (lex_match_id ("NODIAGONAL"))
 262                 mx->diag = NODIAGONAL;
 263               else
 264                 {
 265                   lex_error (_("in FORMAT subcommand"));
 266                   goto lossage;
 267                 }
 268             }
 269         }
 270       else if (lex_match_id ("SPLIT"))
 271         {
 272           lex_match ('=');
 273
 274           if (seen & 2)
 275             {
 276               msg (SE, _("SPLIT subcommand multiply specified."));
 277               goto lossage;
 278             }
 279           seen |= 2;
 280
 281           if (token != T_ID)
 282             {
 283               lex_error (_("in SPLIT subcommand"));
 284               goto lossage;
 285             }
 286
 287           if (dict_lookup_var (default_dict, tokid) == NULL
 288               && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
 289             {
 290               if (!strcmp (tokid, "ROWTYPE_") || !strcmp (tokid, "VARNAME_"))
 291                 {
 292                   msg (SE, _("Split variable may not be named ROWTYPE_ "
 293                              "or VARNAME_."));
 294                   goto lossage;
 295                 }
 296
 297               mx->single_split = dict_create_var_assert (default_dict,
 298                                                          tokid, 0);
 299               lex_get ();
 300
 301               mx->single_split->p.mxd.vartype = MXD_CONTINUOUS;
 302
 303               dict_set_split_vars (default_dict, &mx->single_split, 1);
 304             }
 305           else
 306             {
 307               struct variable **split;
 308               int n;
 309
 310               if (!parse_variables (default_dict, &split, &n, PV_NO_DUPLICATE))
 311                 goto lossage;
 312
 313               dict_set_split_vars (default_dict, split, n);
 314             }
 315
 316           {
 317             struct variable *const *split = dict_get_split_vars (default_dict);
 318             size_t split_cnt = dict_get_split_cnt (default_dict);
 319             int i;
 320
 321             for (i = 0; i < split_cnt; i++)
 322               {
 323                 if (split[i]->p.mxd.vartype != MXD_CONTINUOUS)
 324                   {
 325                     msg (SE, _("Split variable %s is already another type."),
 326                          tokid);
 327                     goto lossage;
 328                   }
 329                 split[i]->p.mxd.vartype = MXD_SPLIT;
 330                 split[i]->p.mxd.subtype = i;
 331               }
 332           }
 333         }
 334       else if (lex_match_id ("FACTORS"))
 335         {
 336           lex_match ('=');
 337
 338           if (seen & 4)
 339             {
 340               msg (SE, _("FACTORS subcommand multiply specified."));
 341               goto lossage;
 342             }
 343           seen |= 4;
 344
 345           if (!parse_variables (default_dict, &mx->factors, &mx->n_factors, PV_NONE))
 346             goto lossage;
 347
 348           {
 349             int i;
 350
 351             for (i = 0; i < mx->n_factors; i++)
 352               {
 353                 if (mx->factors[i]->p.mxd.vartype != MXD_CONTINUOUS)
 354                   {
 355                     msg (SE, _("Factor variable %s is already another type."),
 356                          tokid);
 357                     goto lossage;
 358                   }
 359                 mx->factors[i]->p.mxd.vartype = MXD_FACTOR;
 360                 mx->factors[i]->p.mxd.subtype = i;
 361               }
 362           }
 363         }
 364       else if (lex_match_id ("CELLS"))
 365         {
 366           lex_match ('=');
 367
 368           if (mx->cells != -1)
 369             {
 370               msg (SE, _("CELLS subcommand multiply specified."));
 371               goto lossage;
 372             }
 373
 374           if (!lex_integer_p () || lex_integer () < 1)
 375             {
 376               lex_error (_("expecting positive integer"));
 377               goto lossage;
 378             }
 379
 380           mx->cells = lex_integer ();
 381           lex_get ();
 382         }
 383       else if (lex_match_id ("N"))
 384         {
 385           lex_match ('=');
 386
 387           if (mx->pop_n != -1)
 388             {
 389               msg (SE, _("N subcommand multiply specified."));
 390               goto lossage;
 391             }
 392
 393           if (!lex_integer_p () || lex_integer () < 1)
 394             {
 395               lex_error (_("expecting positive integer"));
 396               goto lossage;
 397             }
 398
 399           mx->pop_n = lex_integer ();
 400           lex_get ();
 401         }
 402       else if (lex_match_id ("CONTENTS"))
 403         {
 404           int inside_parens = 0;
 405           unsigned collide = 0;
 406           int item;
 407
 408           if (seen & 8)
 409             {
 410               msg (SE, _("CONTENTS subcommand multiply specified."));
 411               goto lossage;
 412             }
 413           seen |= 8;
 414
 415           lex_match ('=');
 416
 417           {
 418             int i;
 419
 420             for (i = 0; i <= PROX; i++)
 421               mx->is_per_factor[i] = 0;
 422           }
 423
 424           for (;;)
 425             {
 426               if (lex_match ('('))
 427                 {
 428                   if (inside_parens)
 429                     {
 430                       msg (SE, _("Nested parentheses not allowed."));
 431                       goto lossage;
 432                     }
 433                   inside_parens = 1;
 434                   item = LPAREN;
 435                 }
 436               else if (lex_match (')'))
 437                 {
 438                   if (!inside_parens)
 439                     {
 440                       msg (SE, _("Mismatched right parenthesis (`(')."));
 441                       goto lossage;
 442                     }
 443                   if (mx->contents[mx->n_contents - 1] == LPAREN)
 444                     {
 445                       msg (SE, _("Empty parentheses not allowed."));
 446                       goto lossage;
 447                     }
 448                   inside_parens = 0;
 449                   item = RPAREN;
 450                 }
 451               else
 452                 {
 453                   int content_type;
 454                   int collide_index;
 455
 456                   if (token != T_ID)
 457                     {
 458                       lex_error (_("in CONTENTS subcommand"));
 459                       goto lossage;
 460                     }
 461
 462                   content_type = string_to_content_type (tokid,
 463                                                          &collide_index);
 464                   if (content_type == -1)
 465                     {
 466                       lex_error (_("in CONTENTS subcommand"));
 467                       goto lossage;
 468                     }
 469                   lex_get ();
 470
 471                   if (collide & (1 << collide_index))
 472                     {
 473                       msg (SE, _("Content multiply specified for %s."),
 474                            content_names[content_type]);
 475                       goto lossage;
 476                     }
 477                   collide |= (1 << collide_index);
 478
 479                   item = content_type;
 480                   mx->is_per_factor[item] = inside_parens;
 481                 }
 482               mx->contents[mx->n_contents++] = item;
 483
 484               if (token == '/' || token == '.')
 485                 break;
 486             }
 487
 488           if (inside_parens)
 489             {
 490               msg (SE, _("Missing right parenthesis."));
 491               goto lossage;
 492             }
 493           mx->contents[mx->n_contents] = EOC;
 494         }
 495       else
 496         {
 497           lex_error (NULL);
 498           goto lossage;
 499         }
 500     }
 501
 502   if (token != '.')
 503     {
 504       lex_error (_("expecting end of command"));
 505       goto lossage;
 506     }
 507
 508   if (!(seen & 1))
 509     {
 510       msg (SE, _("Missing VARIABLES subcommand."));
 511       goto lossage;
 512     }
 513
 514   if (!mx->n_contents && !mx->explicit_rowtype)
 515     {
 516       msg (SW, _("CONTENTS subcommand not specified: assuming file "
 517                  "contains only CORR matrix."));
 518
 519       mx->contents[0] = CORR;
 520       mx->contents[1] = EOC;
 521       mx->n_contents = 0;
 522     }
 523
 524   if (mx->n_factors && !mx->explicit_rowtype && mx->cells == -1)
 525     {
 526       msg (SE, _("Missing CELLS subcommand.  CELLS is required "
 527                  "when ROWTYPE_ is not given in the data and "
 528                  "factors are present."));
 529       goto lossage;
 530     }
 531
 532   if (mx->explicit_rowtype && mx->single_split)
 533     {
 534       msg (SE, _("Split file values must be present in the data when "
 535                  "ROWTYPE_ is present."));
 536       goto lossage;
 537     }
 538
 539   /* Create VARNAME_. */
 540   {
 541     mx->varname_ = dict_create_var_assert (default_dict, "VARNAME_", 8);
 542     mx->varname_->p.mxd.vartype = MXD_VARNAME;
 543     mx->varname_->p.mxd.subtype = 0;
 544   }
 545
 546   /* Sort the dictionary variables into the desired order for the
 547      system file output. */
 548   {
 549     struct variable **v;
 550     size_t nv;
 551
 552     dict_get_vars (default_dict, &v, &nv, 0);
 553     qsort (v, nv, sizeof *v, compare_variables_by_mxd_vartype);
 554     dict_reorder_vars (default_dict, v, nv);
 555     free (v);
 556   }
 557
 558   /* Set formats. */
 559   {
 560     static const struct fmt_spec fmt_tab[MXD_COUNT] =
 561       {
 562         {FMT_F, 4, 0},
 563         {FMT_A, 8, 0},
 564         {FMT_F, 4, 0},
 565         {FMT_A, 8, 0},
 566         {FMT_F, 10, 4},
 567       };
 568
 569     int i;
 570
 571     mx->first_continuous = -1;
 572     for (i = 0; i < dict_get_var_cnt (default_dict); i++)
 573       {
 574         struct variable *v = dict_get_var (default_dict, i);
 575         int type = v->p.mxd.vartype;
 576
 577         assert (type >= 0 && type < MXD_COUNT);
 578         v->print = v->write = fmt_tab[type];
 579
 580         if (type == MXD_CONTINUOUS)
 581           mx->n_continuous++;
 582         if (mx->first_continuous == -1 && type == MXD_CONTINUOUS)
 583           mx->first_continuous = i;
 584       }
 585   }
 586
 587   if (mx->n_continuous == 0)
 588     {
 589       msg (SE, _("No continuous variables specified."));
 590       goto lossage;
 591     }
 592
 593   if (!dfm_open_for_reading (mx->data_file))
 594     goto lossage;
 595
 596   if (mx->explicit_rowtype)
 597     read_matrices_with_rowtype (mx);
 598   else
 599     read_matrices_without_rowtype (mx);
 600
 601   pool_destroy (mx->container);
 602
 603   return CMD_SUCCESS;
 604
 605 lossage:
 606   discard_variables ();
 607   free (mx->factors);
 608   pool_destroy (mx->container);
 609   return CMD_FAILURE;
 610 }
 611
 612 /* Look up string S as a content-type name and return the
 613    corresponding enumerated value, or -1 if there is no match.  If
 614    COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
 615    as a bit-index) which can be used for determining whether a related
 616    statistic has already been used. */
 617 static int
 618 string_to_content_type (char *s, int *collide)
 619 {
 620   static const struct
 621     {
 622       int value;
 623       int collide;
 624       const char *string;
 625     }
 626   *tp,
 627   tab[] =
 628     {
 629       {N_VECTOR, 0, "N_VECTOR"},
 630       {N_VECTOR, 0, "N"},
 631       {N_SCALAR, 0, "N_SCALAR"},
 632       {N_MATRIX, 1, "N_MATRIX"},
 633       {MEAN, 2, "MEAN"},
 634       {STDDEV, 3, "STDDEV"},
 635       {STDDEV, 3, "SD"},
 636       {COUNT, 4, "COUNT"},
 637       {MSE, 5, "MSE"},
 638       {DFE, 6, "DFE"},
 639       {MAT, 7, "MAT"},
 640       {COV, 8, "COV"},
 641       {CORR, 9, "CORR"},
 642       {PROX, 10, "PROX"},
 643       {-1, -1, NULL},
 644     };
 645
 646   for (tp = tab; tp->value != -1; tp++)
 647     if (!strcmp (s, tp->string))
 648       {
 649         if (collide)
 650           *collide = tp->collide;
 651
 652         return tp->value;
 653       }
 654   return -1;
 655 }
 656
 657 /* Compare two variables using p.mxd.vartype and p.mxd.subtype
 658    fields. */
 659 static int
 660 compare_variables_by_mxd_vartype (const void *a_, const void *b_)
 661 {
 662   struct variable *const *pa = a_;
 663   struct variable *const *pb = b_;
 664   const struct matrix_data_proc *a = &(*pa)->p.mxd;
 665   const struct matrix_data_proc *b = &(*pb)->p.mxd;
 666
 667   if (a->vartype != b->vartype)
 668     return a->vartype > b->vartype ? 1 : -1;
 669   else
 670     return a->subtype < b->subtype ? -1 : a->subtype > b->subtype;
 671 }
 672 \f
 673 /* Matrix tokenizer. */
 674
 675 /* Matrix token types. */
 676 enum matrix_token_type
 677   {
 678     MNUM,               /* Number. */
 679     MSTR                /* String. */
 680   };
 681
 682 /* A MATRIX DATA parsing token. */
 683 struct matrix_token
 684   {
 685     enum matrix_token_type type;
 686     double number;       /* MNUM: token value. */
 687     char *string;        /* MSTR: token string; not null-terminated. */
 688     int length;          /* MSTR: tokstr length. */
 689   };
 690
 691 static int mget_token (struct matrix_token *, struct file_handle *);
 692
 693 #if DEBUGGING
 694 #define mget_token(TOKEN, HANDLE) mget_token_dump(TOKEN, HANDLE)
 695
 696 static void
 697 mdump_token (const struct matrix_token *token)
 698 {
 699   switch (token->type)
 700     {
 701     case MNUM:
 702       printf (" #%g", token->number);
 703       break;
 704     case MSTR:
 705       printf (" '%.*s'", token->length, token->string);
 706       break;
 707     default:
 708       assert (0);
 709     }
 710   fflush (stdout);
 711 }
 712
 713 static int
 714 mget_token_dump (struct matrix_token *token, struct file_handle *data_file)
 715 {
 716   int result = (mget_token) (token, data_file);
 717   mdump_token (token);
 718   return result;
 719 }
 720 #endif
 721
 722 /* Return the current position in DATA_FILE. */
 723 static const char *
 724 context (struct file_handle *data_file)
 725 {
 726   static char buf[32];
 727
 728   if (dfm_eof (data_file))
 729     strcpy (buf, "at end of file");
 730   else
 731     {
 732       struct len_string line;
 733       const char *sp;
 734
 735       dfm_get_record (data_file, &line);
 736       sp = ls_c_str (&line);
 737       while (sp < ls_end (&line) && isspace ((unsigned char) *sp))
 738         sp++;
 739       if (sp >= ls_end (&line))
 740         strcpy (buf, "at end of line");
 741       else
 742         {
 743           char *dp;
 744           size_t copy_cnt = 0;
 745
 746           dp = stpcpy (buf, "before `");
 747           while (sp < ls_end (&line) && !isspace ((unsigned char) *sp)
 748                  && copy_cnt < 10)
 749             {
 750               *dp++ = *sp++;
 751               copy_cnt++;
 752             }
 753           strcpy (dp, "'");
 754         }
 755     }
 756
 757   return buf;
 758 }
 759
 760 /* Is there at least one token left in the data file? */
 761 static int
 762 another_token (struct file_handle *data_file)
 763 {
 764   for (;;)
 765     {
 766       struct len_string line;
 767       const char *cp;
 768
 769       if (dfm_eof (data_file))
 770         return 0;
 771       dfm_get_record (data_file, &line);
 772
 773       cp = ls_c_str (&line);
 774       while (isspace ((unsigned char) *cp) && cp < ls_end (&line))
 775         cp++;
 776
 777       if (cp < ls_end (&line))
 778         {
 779           dfm_forward_columns (data_file, cp - ls_c_str (&line));
 780           return 1;
 781         }
 782
 783       dfm_forward_record (data_file);
 784     }
 785 }
 786
 787 /* Parse a MATRIX DATA token from mx->data_file into TOKEN. */
 788 static int
 789 (mget_token) (struct matrix_token *token, struct file_handle *data_file)
 790 {
 791   struct len_string line;
 792   int first_column;
 793   char *cp;
 794
 795   if (!another_token (data_file))
 796     return 0;
 797
 798   dfm_get_record (data_file, &line);
 799   first_column = dfm_column_start (data_file);
 800
 801   /* Three types of fields: quoted with ', quoted with ", unquoted. */
 802   cp = ls_c_str (&line);
 803   if (*cp == '\'' || *cp == '"')
 804     {
 805       int quote = *cp;
 806
 807       token->type = MSTR;
 808       token->string = ++cp;
 809       while (cp < ls_end (&line) && *cp != quote)
 810         cp++;
 811       token->length = cp - token->string;
 812       if (cp < ls_end (&line))
 813         cp++;
 814       else
 815         msg (SW, _("Scope of string exceeds line."));
 816     }
 817   else
 818     {
 819       int is_num = isdigit ((unsigned char) *cp) || *cp == '.';
 820
 821       token->string = cp++;
 822       while (cp < ls_end (&line)
 823              && !isspace ((unsigned char) *cp) && *cp != ','
 824              && *cp != '-' && *cp != '+')
 825         {
 826           if (isdigit ((unsigned char) *cp))
 827             is_num = 1;
 828
 829           if ((tolower ((unsigned char) *cp) == 'd'
 830                || tolower ((unsigned char) *cp) == 'e')
 831               && (cp[1] == '+' || cp[1] == '-'))
 832             cp += 2;
 833           else
 834             cp++;
 835         }
 836
 837       token->length = cp - token->string;
 838       assert (token->length);
 839
 840       if (is_num)
 841         {
 842           struct data_in di;
 843
 844           di.s = token->string;
 845           di.e = token->string + token->length;
 846           di.v = (union value *) &token->number;
 847           di.f1 = first_column;
 848           di.format.type = FMT_F;
 849           di.format.w = token->length;
 850           di.format.d = 0;
 851
 852           if (!data_in (&di))
 853             return 0;
 854         }
 855       else
 856         token->type = MSTR;
 857     }
 858
 859   dfm_forward_columns (data_file, cp - ls_c_str (&line));
 860
 861   return 1;
 862 }
 863
 864 /* Forcibly skip the end of a line for content type CONTENT in
 865    DATA_FILE. */
 866 static int
 867 force_eol (struct file_handle *data_file, const char *content)
 868 {
 869   struct len_string line;
 870   const char *cp;
 871
 872   if (dfm_eof (data_file))
 873     return 0;
 874   dfm_get_record (data_file, &line);
 875
 876   cp = ls_c_str (&line);
 877   while (isspace ((unsigned char) *cp) && cp < ls_end (&line))
 878     cp++;
 879
 880   if (cp < ls_end (&line))
 881     {
 882       msg (SE, _("End of line expected %s while reading %s."),
 883            context (data_file), content);
 884       return 0;
 885     }
 886
 887   dfm_forward_record (data_file);
 888   return 1;
 889 }
 890 \f
 891 /* Back end, omitting ROWTYPE_. */
 892
 893 struct nr_aux_data
 894   {
 895     struct matrix_data_pgm *mx; /* MATRIX DATA program. */
 896     double ***data;             /* MATRIX DATA data. */
 897     double *factor_values;      /* Factor values. */
 898     int max_cell_idx;           /* Max-numbered cell that we have
 899                                    read so far, plus one. */
 900     double *split_values;       /* SPLIT FILE variable values. */
 901   };
 902
 903 static int nr_read_splits (struct nr_aux_data *, int compare);
 904 static int nr_read_factors (struct nr_aux_data *, int cell);
 905 static void nr_output_data (struct nr_aux_data *, struct ccase *,
 906                             write_case_func *, write_case_data);
 907 static void matrix_data_read_without_rowtype (struct case_source *source,
 908                                               struct ccase *,
 909                                               write_case_func *,
 910                                               write_case_data);
 911
 912 /* Read from the data file and write it to the active file. */
 913 static void
 914 read_matrices_without_rowtype (struct matrix_data_pgm *mx)
 915 {
 916   struct nr_aux_data nr;
 917
 918   if (mx->cells == -1)
 919     mx->cells = 1;
 920
 921   nr.mx = mx;
 922   nr.data = NULL;
 923   nr.factor_values = xmalloc (sizeof *nr.factor_values * mx->n_factors * mx->cells);
 924   nr.max_cell_idx = 0;
 925   nr.split_values = xmalloc (sizeof *nr.split_values
 926                              * dict_get_split_cnt (default_dict));
 927
 928   vfm_source = create_case_source (&matrix_data_without_rowtype_source_class,
 929                                    default_dict, &nr);
 930
 931   procedure (NULL, NULL);
 932
 933   free (nr.split_values);
 934   free (nr.factor_values);
 935
 936   fh_close_handle (mx->data_file);
 937 }
 938
 939 /* Mirror data across the diagonal of matrix CP which contains
 940    CONTENT type data. */
 941 static void
 942 fill_matrix (struct matrix_data_pgm *mx, int content, double *cp)
 943 {
 944   int type = content_type[content];
 945
 946   if (type == 1 && mx->section != FULL)
 947     {
 948       if (mx->diag == NODIAGONAL)
 949         {
 950           const double fill = content == CORR ? 1.0 : SYSMIS;
 951           int i;
 952
 953           for (i = 0; i < mx->n_continuous; i++)
 954             cp[i * (1 + mx->n_continuous)] = fill;
 955         }
 956
 957       {
 958         int c, r;
 959
 960         if (mx->section == LOWER)
 961           {
 962             int n_lines = mx->n_continuous;
 963             if (mx->section != FULL && mx->diag == NODIAGONAL)
 964               n_lines--;
 965
 966             for (r = 1; r < n_lines; r++)
 967               for (c = 0; c < r; c++)
 968                 cp[r + c * mx->n_continuous] = cp[c + r * mx->n_continuous];
 969           }
 970         else
 971           {
 972             assert (mx->section == UPPER);
 973             for (r = 1; r < mx->n_continuous; r++)
 974               for (c = 0; c < r; c++)
 975                 cp[c + r * mx->n_continuous] = cp[r + c * mx->n_continuous];
 976           }
 977       }
 978     }
 979   else if (type == 2)
 980     {
 981       int c;
 982
 983       for (c = 1; c < mx->n_continuous; c++)
 984         cp[c] = cp[0];
 985     }
 986 }
 987
 988 /* Read data lines for content type CONTENT from the data file.
 989    If PER_FACTOR is nonzero, then factor information is read from
 990    the data file.  Data is for cell number CELL. */
 991 static int
 992 nr_read_data_lines (struct nr_aux_data *nr,
 993                     int per_factor, int cell, int content, int compare)
 994 {
 995   struct matrix_data_pgm *mx = nr->mx;
 996   const int type = content_type[content];               /* Content type. */
 997   int n_lines; /* Number of lines to parse from data file for this type. */
 998   double *cp;                   /* Current position in vector or matrix. */
 999   int i;
1000
1001   if (type != 1)
1002     n_lines = 1;
1003   else
1004     {
1005       n_lines = mx->n_continuous;
1006       if (mx->section != FULL && mx->diag == NODIAGONAL)
1007         n_lines--;
1008     }
1009
1010   cp = nr->data[content][cell];
1011   if (type == 1 && mx->section == LOWER && mx->diag == NODIAGONAL)
1012     cp += mx->n_continuous;
1013
1014   for (i = 0; i < n_lines; i++)
1015     {
1016       int n_cols;
1017
1018       if (!nr_read_splits (nr, 1))
1019         return 0;
1020       if (per_factor && !nr_read_factors (nr, cell))
1021         return 0;
1022       compare = 1;
1023
1024       switch (type)
1025         {
1026         case 0:
1027           n_cols = mx->n_continuous;
1028           break;
1029         case 1:
1030           switch (mx->section)
1031             {
1032             case LOWER:
1033               n_cols = i + 1;
1034               break;
1035             case UPPER:
1036               cp += i;
1037               n_cols = mx->n_continuous - i;
1038               if (mx->diag == NODIAGONAL)
1039                 {
1040                   n_cols--;
1041                   cp++;
1042                 }
1043               break;
1044             case FULL:
1045               n_cols = mx->n_continuous;
1046               break;
1047             default:
1048               assert (0);
1049               abort ();
1050             }
1051           break;
1052         case 2:
1053           n_cols = 1;
1054           break;
1055         default:
1056           assert (0);
1057           abort ();
1058         }
1059
1060       {
1061         int j;
1062
1063         for (j = 0; j < n_cols; j++)
1064           {
1065             struct matrix_token token;
1066             if (!mget_token (&token, mx->data_file))
1067               return 0;
1068             if (token.type != MNUM)
1069               {
1070                 msg (SE, _("expecting value for %s %s"),
1071                      dict_get_var (default_dict, j)->name,
1072                      context (mx->data_file));
1073                 return 0;
1074               }
1075
1076             *cp++ = token.number;
1077           }
1078         if (mx->fmt != FREE
1079             && !force_eol (mx->data_file, content_names[content]))
1080           return 0;
1081         debug_printf (("\n"));
1082       }
1083
1084       if (mx->section == LOWER)
1085         cp += mx->n_continuous - n_cols;
1086     }
1087
1088   fill_matrix (mx, content, nr->data[content][cell]);
1089
1090   return 1;
1091 }
1092
1093 /* When ROWTYPE_ does not appear in the data, reads the matrices and
1094    writes them to the output file.  Returns success. */
1095 static void
1096 matrix_data_read_without_rowtype (struct case_source *source,
1097                                   struct ccase *c,
1098                                   write_case_func *write_case,
1099                                   write_case_data wc_data)
1100 {
1101   struct nr_aux_data *nr = source->aux;
1102   struct matrix_data_pgm *mx = nr->mx;
1103
1104   {
1105     int *cp;
1106
1107     nr->data = pool_alloc (mx->container, (PROX + 1) * sizeof *nr->data);
1108
1109     {
1110       int i;
1111
1112       for (i = 0; i <= PROX; i++)
1113         nr->data[i] = NULL;
1114     }
1115
1116     for (cp = mx->contents; *cp != EOC; cp++)
1117       if (*cp != LPAREN && *cp != RPAREN)
1118         {
1119           int per_factor = mx->is_per_factor[*cp];
1120           int n_entries;
1121
1122           n_entries = mx->n_continuous;
1123           if (content_type[*cp] == 1)
1124             n_entries *= mx->n_continuous;
1125
1126           {
1127             int n_vectors = per_factor ? mx->cells : 1;
1128             int i;
1129
1130             nr->data[*cp] = pool_alloc (mx->container,
1131                                        n_vectors * sizeof **nr->data);
1132
1133             for (i = 0; i < n_vectors; i++)
1134               nr->data[*cp][i] = pool_alloc (mx->container,
1135                                             n_entries * sizeof ***nr->data);
1136           }
1137         }
1138   }
1139
1140   for (;;)
1141     {
1142       int *bp, *ep, *np;
1143
1144       if (!nr_read_splits (nr, 0))
1145         return;
1146
1147       for (bp = mx->contents; *bp != EOC; bp = np)
1148         {
1149           int per_factor;
1150
1151           /* Trap the CONTENTS that we should parse in this pass
1152              between bp and ep.  Set np to the starting bp for next
1153              iteration. */
1154           if (*bp == LPAREN)
1155             {
1156               ep = ++bp;
1157               while (*ep != RPAREN)
1158                 ep++;
1159               np = &ep[1];
1160               per_factor = 1;
1161             }
1162           else
1163             {
1164               ep = &bp[1];
1165               while (*ep != EOC && *ep != LPAREN)
1166                 ep++;
1167               np = ep;
1168               per_factor = 0;
1169             }
1170
1171           {
1172             int i;
1173
1174             for (i = 0; i < (per_factor ? mx->cells : 1); i++)
1175               {
1176                 int *cp;
1177
1178                 for (cp = bp; cp < ep; cp++)
1179                   if (!nr_read_data_lines (nr, per_factor, i, *cp, cp != bp))
1180                     return;
1181               }
1182           }
1183         }
1184
1185       nr_output_data (nr, c, write_case, wc_data);
1186
1187       if (dict_get_split_cnt (default_dict) == 0
1188           || !another_token (mx->data_file))
1189         return;
1190     }
1191 }
1192
1193 /* Read the split file variables.  If COMPARE is 1, compares the
1194    values read to the last values read and returns 1 if they're equal,
1195    0 otherwise. */
1196 static int
1197 nr_read_splits (struct nr_aux_data *nr, int compare)
1198 {
1199   struct matrix_data_pgm *mx = nr->mx;
1200   static int just_read = 0; /* FIXME: WTF? */
1201   size_t split_cnt;
1202   size_t i;
1203
1204   if (compare && just_read)
1205     {
1206       just_read = 0;
1207       return 1;
1208     }
1209
1210   if (dict_get_split_vars (default_dict) == NULL)
1211     return 1;
1212
1213   if (mx->single_split)
1214     {
1215       if (!compare)
1216         nr->split_values[0]
1217           = ++dict_get_split_vars (default_dict)[0]->p.mxd.subtype;
1218       return 1;
1219     }
1220
1221   if (!compare)
1222     just_read = 1;
1223
1224   split_cnt = dict_get_split_cnt (default_dict);
1225   for (i = 0; i < split_cnt; i++)
1226     {
1227       struct matrix_token token;
1228       if (!mget_token (&token, mx->data_file))
1229         return 0;
1230       if (token.type != MNUM)
1231         {
1232           msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
1233                context (mx->data_file));
1234           return 0;
1235         }
1236
1237       if (!compare)
1238         nr->split_values[i] = token.number;
1239       else if (nr->split_values[i] != token.number)
1240         {
1241           msg (SE, _("Expecting value %g for %s."),
1242                nr->split_values[i],
1243                dict_get_split_vars (default_dict)[i]->name);
1244           return 0;
1245         }
1246     }
1247
1248   return 1;
1249 }
1250
1251 /* Read the factors for cell CELL.  If COMPARE is 1, compares the
1252    values read to the last values read and returns 1 if they're equal,
1253    0 otherwise. */
1254 static int
1255 nr_read_factors (struct nr_aux_data *nr, int cell)
1256 {
1257   struct matrix_data_pgm *mx = nr->mx;
1258   int compare;
1259
1260   if (mx->n_factors == 0)
1261     return 1;
1262
1263   assert (nr->max_cell_idx >= cell);
1264   if (cell != nr->max_cell_idx)
1265     compare = 1;
1266   else
1267     {
1268       compare = 0;
1269       nr->max_cell_idx++;
1270     }
1271
1272   {
1273     int i;
1274
1275     for (i = 0; i < mx->n_factors; i++)
1276       {
1277         struct matrix_token token;
1278         if (!mget_token (&token, mx->data_file))
1279           return 0;
1280         if (token.type != MNUM)
1281           {
1282             msg (SE, _("Syntax error expecting factor value %s."),
1283                  context (mx->data_file));
1284             return 0;
1285           }
1286
1287         if (!compare)
1288           nr->factor_values[i + mx->n_factors * cell] = token.number;
1289         else if (nr->factor_values[i + mx->n_factors * cell] != token.number)
1290           {
1291             msg (SE, _("Syntax error expecting value %g for %s %s."),
1292                  nr->factor_values[i + mx->n_factors * cell],
1293                  mx->factors[i]->name, context (mx->data_file));
1294             return 0;
1295           }
1296       }
1297   }
1298
1299   return 1;
1300 }
1301
1302 /* Write the contents of a cell having content type CONTENT and data
1303    CP to the active file. */
1304 static void
1305 dump_cell_content (struct matrix_data_pgm *mx, int content, double *cp,
1306                    struct ccase *c,
1307                    write_case_func *write_case, write_case_data wc_data)
1308 {
1309   int type = content_type[content];
1310
1311   {
1312     st_bare_pad_copy (case_data_rw (c, mx->rowtype_->fv)->s,
1313                       content_names[content], 8);
1314
1315     if (type != 1)
1316       memset (case_data_rw (c, mx->varname_->fv)->s, ' ', 8);
1317   }
1318
1319   {
1320     int n_lines = (type == 1) ? mx->n_continuous : 1;
1321     int i;
1322
1323     for (i = 0; i < n_lines; i++)
1324       {
1325         int j;
1326
1327         for (j = 0; j < mx->n_continuous; j++)
1328           {
1329             int fv = dict_get_var (default_dict, mx->first_continuous + j)->fv;
1330             case_data_rw (c, fv)->f = *cp;
1331             cp++;
1332           }
1333         if (type == 1)
1334           st_bare_pad_copy (case_data_rw (c, mx->varname_->fv)->s,
1335                             dict_get_var (default_dict,
1336                                           mx->first_continuous + i)->name,
1337                             8);
1338         write_case (wc_data);
1339       }
1340   }
1341 }
1342
1343 /* Finally dump out everything from nr_data[] to the output file. */
1344 static void
1345 nr_output_data (struct nr_aux_data *nr, struct ccase *c,
1346                 write_case_func *write_case, write_case_data wc_data)
1347 {
1348   struct matrix_data_pgm *mx = nr->mx;
1349
1350   {
1351     struct variable *const *split;
1352     size_t split_cnt;
1353     size_t i;
1354
1355     split_cnt = dict_get_split_cnt (default_dict);
1356     split = dict_get_split_vars (default_dict);
1357     for (i = 0; i < split_cnt; i++)
1358       case_data_rw (c, split[i]->fv)->f = nr->split_values[i];
1359   }
1360
1361   if (mx->n_factors)
1362     {
1363       int cell;
1364
1365       for (cell = 0; cell < mx->cells; cell++)
1366         {
1367           {
1368             int factor;
1369
1370             for (factor = 0; factor < mx->n_factors; factor++)
1371               {
1372                 case_data_rw (c, mx->factors[factor]->fv)->f
1373                   = nr->factor_values[factor + cell * mx->n_factors];
1374                 debug_printf (("f:%s ", mx->factors[factor]->name));
1375               }
1376           }
1377
1378           {
1379             int content;
1380
1381             for (content = 0; content <= PROX; content++)
1382               if (mx->is_per_factor[content])
1383                 {
1384                   assert (nr->data[content] != NULL
1385                           && nr->data[content][cell] != NULL);
1386
1387                   dump_cell_content (mx, content, nr->data[content][cell],
1388                                      c, write_case, wc_data);
1389                 }
1390           }
1391         }
1392     }
1393
1394   {
1395     int content;
1396
1397     {
1398       int factor;
1399
1400       for (factor = 0; factor < mx->n_factors; factor++)
1401         case_data_rw (c, mx->factors[factor]->fv)->f = SYSMIS;
1402     }
1403
1404     for (content = 0; content <= PROX; content++)
1405       if (!mx->is_per_factor[content] && nr->data[content] != NULL)
1406         dump_cell_content (mx, content, nr->data[content][0],
1407                            c, write_case, wc_data);
1408   }
1409 }
1410 \f
1411 /* Back end, with ROWTYPE_. */
1412
1413 /* All the data for one set of factor values. */
1414 struct factor_data
1415   {
1416     double *factors;
1417     int n_rows[PROX + 1];
1418     double *data[PROX + 1];
1419     struct factor_data *next;
1420   };
1421
1422 /* With ROWTYPE_ auxiliary data. */
1423 struct wr_aux_data
1424   {
1425     struct matrix_data_pgm *mx;         /* MATRIX DATA program. */
1426     int content;                        /* Type of current row. */
1427     double *split_values;               /* SPLIT FILE variable values. */
1428     struct factor_data *data;           /* All the data. */
1429     struct factor_data *current;        /* Current factor. */
1430   };
1431
1432 static int wr_read_splits (struct wr_aux_data *, struct ccase *,
1433                            write_case_func *, write_case_data);
1434 static int wr_output_data (struct wr_aux_data *, struct ccase *,
1435                            write_case_func *, write_case_data);
1436 static int wr_read_rowtype (struct wr_aux_data *,
1437                             const struct matrix_token *, struct file_handle *);
1438 static int wr_read_factors (struct wr_aux_data *);
1439 static int wr_read_indeps (struct wr_aux_data *);
1440 static void matrix_data_read_with_rowtype (struct case_source *,
1441                                            struct ccase *,
1442                                            write_case_func *,
1443                                            write_case_data);
1444
1445 /* When ROWTYPE_ appears in the data, reads the matrices and writes
1446    them to the output file. */
1447 static void
1448 read_matrices_with_rowtype (struct matrix_data_pgm *mx)
1449 {
1450   struct wr_aux_data wr;
1451
1452   wr.mx = mx;
1453   wr.content = -1;
1454   wr.split_values = NULL;
1455   wr.data = NULL;
1456   wr.current = NULL;
1457   mx->cells = 0;
1458
1459   vfm_source = create_case_source (&matrix_data_with_rowtype_source_class,
1460                                    default_dict, &wr);
1461   procedure (NULL, NULL);
1462
1463   free (wr.split_values);
1464   fh_close_handle (mx->data_file);
1465 }
1466
1467 /* Read from the data file and write it to the active file. */
1468 static void
1469 matrix_data_read_with_rowtype (struct case_source *source,
1470                                struct ccase *c,
1471                                write_case_func *write_case,
1472                                write_case_data wc_data)
1473 {
1474   struct wr_aux_data *wr = source->aux;
1475   struct matrix_data_pgm *mx = wr->mx;
1476
1477   do
1478     {
1479       if (!wr_read_splits (wr, c, write_case, wc_data))
1480         return;
1481
1482       if (!wr_read_factors (wr))
1483         return;
1484
1485       if (!wr_read_indeps (wr))
1486         return;
1487     }
1488   while (another_token (mx->data_file));
1489
1490   wr_output_data (wr, c, write_case, wc_data);
1491 }
1492
1493 /* Read the split file variables.  If they differ from the previous
1494    set of split variables then output the data.  Returns success. */
1495 static int
1496 wr_read_splits (struct wr_aux_data *wr,
1497                 struct ccase *c,
1498                 write_case_func *write_case, write_case_data wc_data)
1499 {
1500   struct matrix_data_pgm *mx = wr->mx;
1501   int compare;
1502   size_t split_cnt;
1503
1504   split_cnt = dict_get_split_cnt (default_dict);
1505   if (split_cnt == 0)
1506     return 1;
1507
1508   if (wr->split_values)
1509     compare = 1;
1510   else
1511     {
1512       compare = 0;
1513       wr->split_values = xmalloc (split_cnt * sizeof *wr->split_values);
1514     }
1515
1516   {
1517     int different = 0;
1518     int i;
1519
1520     for (i = 0; i < split_cnt; i++)
1521       {
1522         struct matrix_token token;
1523         if (!mget_token (&token, mx->data_file))
1524           return 0;
1525         if (token.type != MNUM)
1526           {
1527             msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
1528                  context (mx->data_file));
1529             return 0;
1530           }
1531
1532         if (compare && wr->split_values[i] != token.number && !different)
1533           {
1534             if (!wr_output_data (wr, c, write_case, wc_data))
1535               return 0;
1536             different = 1;
1537             mx->cells = 0;
1538           }
1539         wr->split_values[i] = token.number;
1540       }
1541   }
1542
1543   return 1;
1544 }
1545
1546 /* Compares doubles A and B, treating SYSMIS as greatest. */
1547 static int
1548 compare_doubles (const void *a_, const void *b_, void *aux UNUSED)
1549 {
1550   const double *a = a_;
1551   const double *b = b_;
1552
1553   if (*a == *b)
1554     return 0;
1555   else if (*a == SYSMIS)
1556     return 1;
1557   else if (*b == SYSMIS)
1558     return -1;
1559   else if (*a > *b)
1560     return 1;
1561   else
1562     return -1;
1563 }
1564
1565 /* Return strcmp()-type comparison of the MX->n_factors factors at _A and
1566    _B.  Sort missing values toward the end. */
1567 static int
1568 compare_factors (const void *a_, const void *b_, void *mx_)
1569 {
1570   struct matrix_data_pgm *mx = mx_;
1571   struct factor_data *const *pa = a_;
1572   struct factor_data *const *pb = b_;
1573   const double *a = (*pa)->factors;
1574   const double *b = (*pb)->factors;
1575
1576   return lexicographical_compare_3way (a, mx->n_factors,
1577                                        b, mx->n_factors,
1578                                        sizeof *a,
1579                                        compare_doubles, NULL);
1580 }
1581
1582 /* Write out the data for the current split file to the active
1583    file. */
1584 static int
1585 wr_output_data (struct wr_aux_data *wr,
1586                 struct ccase *c,
1587                 write_case_func *write_case, write_case_data wc_data)
1588 {
1589   struct matrix_data_pgm *mx = wr->mx;
1590
1591   {
1592     struct variable *const *split;
1593     size_t split_cnt;
1594     size_t i;
1595
1596     split_cnt = dict_get_split_cnt (default_dict);
1597     split = dict_get_split_vars (default_dict);
1598     for (i = 0; i < split_cnt; i++)
1599       case_data_rw (c, split[i]->fv)->f = wr->split_values[i];
1600   }
1601
1602   /* Sort the wr->data list. */
1603   {
1604     struct factor_data **factors;
1605     struct factor_data *iter;
1606     int i;
1607
1608     factors = xmalloc (sizeof *factors * mx->cells);
1609
1610     for (i = 0, iter = wr->data; iter; iter = iter->next, i++)
1611       factors[i] = iter;
1612
1613     sort (factors, mx->cells, sizeof *factors, compare_factors, mx);
1614
1615     wr->data = factors[0];
1616     for (i = 0; i < mx->cells - 1; i++)
1617       factors[i]->next = factors[i + 1];
1618     factors[mx->cells - 1]->next = NULL;
1619
1620     free (factors);
1621   }
1622
1623   /* Write out records for every set of factor values. */
1624   {
1625     struct factor_data *iter;
1626
1627     for (iter = wr->data; iter; iter = iter->next)
1628       {
1629         {
1630           int factor;
1631
1632           for (factor = 0; factor < mx->n_factors; factor++)
1633             {
1634               case_data_rw (c, mx->factors[factor]->fv)->f
1635                 = iter->factors[factor];
1636               debug_printf (("f:%s ", factors[factor]->name));
1637             }
1638         }
1639
1640         {
1641           int content;
1642
1643           for (content = 0; content <= PROX; content++)
1644             {
1645               if (!iter->n_rows[content])
1646                 continue;
1647
1648               {
1649                 int type = content_type[content];
1650                 int n_lines = (type == 1
1651                                ? (mx->n_continuous
1652                                   - (mx->section != FULL && mx->diag == NODIAGONAL))
1653                                : 1);
1654
1655                 if (n_lines != iter->n_rows[content])
1656                   {
1657                     msg (SE, _("Expected %d lines of data for %s content; "
1658                                "actually saw %d lines.  No data will be "
1659                                "output for this content."),
1660                          n_lines, content_names[content],
1661                          iter->n_rows[content]);
1662                     continue;
1663                   }
1664               }
1665
1666               fill_matrix (mx, content, iter->data[content]);
1667
1668               dump_cell_content (mx, content, iter->data[content],
1669                                  c, write_case, wc_data);
1670             }
1671         }
1672       }
1673   }
1674
1675   pool_destroy (mx->container);
1676   mx->container = pool_create ();
1677
1678   wr->data = wr->current = NULL;
1679
1680   return 1;
1681 }
1682
1683 /* Sets ROWTYPE_ based on the given TOKEN read from DATA_FILE.
1684    Return success. */
1685 static int
1686 wr_read_rowtype (struct wr_aux_data *wr,
1687                  const struct matrix_token *token,
1688                  struct file_handle *data_file)
1689 {
1690   if (wr->content != -1)
1691     {
1692       msg (SE, _("Multiply specified ROWTYPE_ %s."), context (data_file));
1693       return 0;
1694     }
1695   if (token->type != MSTR)
1696     {
1697       msg (SE, _("Syntax error %s expecting ROWTYPE_ string."),
1698            context (data_file));
1699       return 0;
1700     }
1701
1702   {
1703     char s[16];
1704     char *cp;
1705
1706     memcpy (s, token->string, min (15, token->length));
1707     s[min (15, token->length)] = 0;
1708
1709     for (cp = s; *cp; cp++)
1710       *cp = toupper ((unsigned char) *cp);
1711
1712     wr->content = string_to_content_type (s, NULL);
1713   }
1714
1715   if (wr->content == -1)
1716     {
1717       msg (SE, _("Syntax error %s."), context (data_file));
1718       return 0;
1719     }
1720
1721   return 1;
1722 }
1723
1724 /* Read the factors for the current row.  Select a set of factors and
1725    point wr_current to it. */
1726 static int
1727 wr_read_factors (struct wr_aux_data *wr)
1728 {
1729   struct matrix_data_pgm *mx = wr->mx;
1730   double *factor_values = local_alloc (sizeof *factor_values * mx->n_factors);
1731
1732   wr->content = -1;
1733   {
1734     int i;
1735
1736     for (i = 0; i < mx->n_factors; i++)
1737       {
1738         struct matrix_token token;
1739         if (!mget_token (&token, mx->data_file))
1740           goto lossage;
1741         if (token.type == MSTR)
1742           {
1743             if (!wr_read_rowtype (wr, &token, mx->data_file))
1744               goto lossage;
1745             if (!mget_token (&token, mx->data_file))
1746               goto lossage;
1747           }
1748         if (token.type != MNUM)
1749           {
1750             msg (SE, _("Syntax error expecting factor value %s."),
1751                  context (mx->data_file));
1752             goto lossage;
1753           }
1754
1755         factor_values[i] = token.number;
1756       }
1757   }
1758   if (wr->content == -1)
1759     {
1760       struct matrix_token token;
1761       if (!mget_token (&token, mx->data_file))
1762         goto lossage;
1763       if (!wr_read_rowtype (wr, &token, mx->data_file))
1764         goto lossage;
1765     }
1766
1767   /* Try the most recent factor first as a simple caching
1768      mechanism. */
1769   if (wr->current)
1770     {
1771       int i;
1772
1773       for (i = 0; i < mx->n_factors; i++)
1774         if (factor_values[i] != wr->current->factors[i])
1775           goto cache_miss;
1776       goto winnage;
1777     }
1778
1779   /* Linear search through the list. */
1780 cache_miss:
1781   {
1782     struct factor_data *iter;
1783
1784     for (iter = wr->data; iter; iter = iter->next)
1785       {
1786         int i;
1787
1788         for (i = 0; i < mx->n_factors; i++)
1789           if (factor_values[i] != iter->factors[i])
1790             goto next_item;
1791
1792         wr->current = iter;
1793         goto winnage;
1794
1795       next_item: ;
1796       }
1797   }
1798
1799   /* Not found.  Make a new item. */
1800   {
1801     struct factor_data *new = pool_alloc (mx->container, sizeof *new);
1802
1803     new->factors = pool_alloc (mx->container, sizeof *new->factors * mx->n_factors);
1804
1805     {
1806       int i;
1807
1808       for (i = 0; i < mx->n_factors; i++)
1809         new->factors[i] = factor_values[i];
1810     }
1811
1812     {
1813       int i;
1814
1815       for (i = 0; i <= PROX; i++)
1816         {
1817           new->n_rows[i] = 0;
1818           new->data[i] = NULL;
1819         }
1820     }
1821
1822     new->next = wr->data;
1823     wr->data = wr->current = new;
1824     mx->cells++;
1825   }
1826
1827 winnage:
1828   local_free (factor_values);
1829   return 1;
1830
1831 lossage:
1832   local_free (factor_values);
1833   return 0;
1834 }
1835
1836 /* Read the independent variables into wr->current. */
1837 static int
1838 wr_read_indeps (struct wr_aux_data *wr)
1839 {
1840   struct matrix_data_pgm *mx = wr->mx;
1841   struct factor_data *c = wr->current;
1842   const int type = content_type[wr->content];
1843   const int n_rows = c->n_rows[wr->content];
1844   double *cp;
1845   int n_cols;
1846
1847   /* Allocate room for data if necessary. */
1848   if (c->data[wr->content] == NULL)
1849     {
1850       int n_items = mx->n_continuous;
1851       if (type == 1)
1852         n_items *= mx->n_continuous;
1853
1854       c->data[wr->content] = pool_alloc (mx->container,
1855                                         sizeof **c->data * n_items);
1856     }
1857
1858   cp = &c->data[wr->content][n_rows * mx->n_continuous];
1859
1860   /* Figure out how much to read from this line. */
1861   switch (type)
1862     {
1863     case 0:
1864     case 2:
1865       if (n_rows > 0)
1866         {
1867           msg (SE, _("Duplicate specification for %s."),
1868                content_names[wr->content]);
1869           return 0;
1870         }
1871       if (type == 0)
1872         n_cols = mx->n_continuous;
1873       else
1874         n_cols = 1;
1875       break;
1876     case 1:
1877       if (n_rows >= mx->n_continuous - (mx->section != FULL && mx->diag == NODIAGONAL))
1878         {
1879           msg (SE, _("Too many rows of matrix data for %s."),
1880                content_names[wr->content]);
1881           return 0;
1882         }
1883
1884       switch (mx->section)
1885         {
1886         case LOWER:
1887           n_cols = n_rows + 1;
1888           if (mx->diag == NODIAGONAL)
1889             cp += mx->n_continuous;
1890           break;
1891         case UPPER:
1892           cp += n_rows;
1893           n_cols = mx->n_continuous - n_rows;
1894           if (mx->diag == NODIAGONAL)
1895             {
1896               n_cols--;
1897               cp++;
1898             }
1899           break;
1900         case FULL:
1901           n_cols = mx->n_continuous;
1902           break;
1903         default:
1904           assert (0);
1905           abort ();
1906         }
1907       break;
1908     default:
1909       assert (0);
1910       abort ();
1911     }
1912   c->n_rows[wr->content]++;
1913
1914   debug_printf ((" (c=%p,r=%d,n=%d)", c, n_rows + 1, n_cols));
1915
1916   /* Read N_COLS items at CP. */
1917   {
1918     int j;
1919
1920     for (j = 0; j < n_cols; j++)
1921       {
1922         struct matrix_token token;
1923         if (!mget_token (&token, mx->data_file))
1924           return 0;
1925         if (token.type != MNUM)
1926           {
1927             msg (SE, _("Syntax error expecting value for %s %s."),
1928                  dict_get_var (default_dict, mx->first_continuous + j)->name,
1929                  context (mx->data_file));
1930             return 0;
1931           }
1932
1933         *cp++ = token.number;
1934       }
1935     if (mx->fmt != FREE
1936         && !force_eol (mx->data_file, content_names[wr->content]))
1937       return 0;
1938     debug_printf (("\n"));
1939   }
1940
1941   return 1;
1942 }
1943 \f
1944 /* Matrix source. */
1945
1946 static const struct case_source_class matrix_data_with_rowtype_source_class =
1947   {
1948     "MATRIX DATA",
1949     NULL,
1950     matrix_data_read_with_rowtype,
1951     NULL,
1952   };
1953
1954 static const struct case_source_class
1955 matrix_data_without_rowtype_source_class =
1956   {
1957     "MATRIX DATA",
1958     NULL,
1959     matrix_data_read_without_rowtype,
1960     NULL,
1961   };