1 /* PSPP - computes sample statistics.
2 Copyright (C) 1997-9, 2000 Free Software Foundation, Inc.
3 Written by Ben Pfaff <blp@gnu.org>.
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 /* AIX requires this to be the first thing in the file. */
23 #define alloca __builtin_alloca
31 #ifndef alloca /* predefined by HP cc +Olibcalls */
47 #include "file-handle.h"
55 #include "debug-print.h"
57 /* FIXME: /N subcommand not implemented. It should be pretty simple,
60 /* Format type enums. */
67 /* Matrix section enums. */
75 /* Diagonal inclusion enums. */
103 /* 0=vector, 1=matrix, 2=scalar. */
104 static int content_type[PROX + 1] =
106 0, 2, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1,
109 /* Name of each content type. */
110 static const char *content_names[PROX + 1] =
112 "N", "N", "N_MATRIX", "MEAN", "STDDEV", "COUNT", "MSE",
113 "DFE", "MAT", "COV", "CORR", "PROX",
116 /* The data file to be read. */
117 static struct file_handle *data_file;
120 static int fmt; /* LIST or FREE. */
121 static int section; /* LOWER or UPPER or FULL. */
122 static int diag; /* DIAGONAL or NODIAGONAL. */
124 /* Arena used for all the MATRIX DATA allocations. */
125 static struct pool *container;
127 /* ROWTYPE_ specified explicitly in data? */
128 static int explicit_rowtype;
130 /* ROWTYPE_, VARNAME_ variables. */
131 static struct variable *rowtype_, *varname_;
133 /* Is is per-factor data? */
134 int is_per_factor[PROX + 1];
136 /* Single SPLIT FILE variable. */
137 static struct variable *single_split;
139 /* Factor variables. */
140 static int n_factors;
141 static struct variable **factors;
143 /* Number of cells, or -1 if none. */
146 /* Population N specified by user. */
149 /* CONTENTS subcommand. */
150 static int contents[EOC * 3 + 1];
151 static int n_contents;
153 /* Number of continuous variables. */
154 static int n_continuous;
156 /* Index into default_dict.var of first continuous variables. */
157 static int first_continuous;
159 static int compare_variables_by_mxd_vartype (const void *pa,
161 static void read_matrices_without_rowtype (void);
162 static void read_matrices_with_rowtype (void);
163 static int string_to_content_type (char *, int *);
166 static void debug_print (void);
170 cmd_matrix_data (void)
174 lex_match_id ("MATRIX");
175 lex_match_id ("DATA");
177 container = pool_create ();
179 discard_variables ();
181 data_file = inline_file;
195 if (lex_match_id ("VARIABLES"))
202 msg (SE, _("VARIABLES subcommand multiply specified."));
208 if (!parse_DATA_LIST_vars (&v, &nv, PV_NO_DUPLICATE))
214 for (i = 0; i < nv; i++)
215 if (!strcmp (v[i], "VARNAME_"))
217 msg (SE, _("VARNAME_ cannot be explicitly specified on "
219 for (i = 0; i < nv; i++)
229 for (i = 0; i < nv; i++)
231 struct variable *new_var;
233 if (strcmp (v[i], "ROWTYPE_"))
235 new_var = force_create_variable (&default_dict, v[i],
237 new_var->p.mxd.vartype = MXD_CONTINUOUS;
238 new_var->p.mxd.subtype = i;
241 explicit_rowtype = 1;
248 rowtype_ = force_create_variable (&default_dict, "ROWTYPE_",
250 rowtype_->p.mxd.vartype = MXD_ROWTYPE;
251 rowtype_->p.mxd.subtype = 0;
254 else if (lex_match_id ("FILE"))
257 data_file = fh_parse_file_handle ();
261 else if (lex_match_id ("FORMAT"))
265 while (token == T_ID)
267 if (lex_match_id ("LIST"))
269 else if (lex_match_id ("FREE"))
271 else if (lex_match_id ("LOWER"))
273 else if (lex_match_id ("UPPER"))
275 else if (lex_match_id ("FULL"))
277 else if (lex_match_id ("DIAGONAL"))
279 else if (lex_match_id ("NODIAGONAL"))
283 lex_error (_("in FORMAT subcommand"));
288 else if (lex_match_id ("SPLIT"))
294 msg (SE, _("SPLIT subcommand multiply specified."));
301 lex_error (_("in SPLIT subcommand"));
305 if (!is_varname (tokid)
306 && (lex_look_ahead () == '.' || lex_look_ahead () == '/'))
308 if (!strcmp (tokid, "ROWTYPE_") || !strcmp (tokid, "VARNAME_"))
310 msg (SE, _("Split variable may not be named ROWTYPE_ "
315 single_split = force_create_variable (&default_dict, tokid,
319 single_split->p.mxd.vartype = MXD_CONTINUOUS;
321 default_dict.n_splits = 1;
322 default_dict.splits = xmalloc (2 * sizeof *default_dict.splits);
323 default_dict.splits[0] = single_split;
324 default_dict.splits[1] = NULL;
331 if (!parse_variables (NULL, &v, &n, PV_NO_DUPLICATE))
334 default_dict.n_splits = n;
335 default_dict.splits = v = xrealloc (v, sizeof *v * (n + 1));
342 for (i = 0; i < default_dict.n_splits; i++)
344 if (default_dict.splits[i]->p.mxd.vartype != MXD_CONTINUOUS)
346 msg (SE, _("Split variable %s is already another type."),
350 default_dict.splits[i]->p.mxd.vartype = MXD_SPLIT;
351 default_dict.splits[i]->p.mxd.subtype = i;
355 else if (lex_match_id ("FACTORS"))
361 msg (SE, _("FACTORS subcommand multiply specified."));
366 if (!parse_variables (NULL, &factors, &n_factors, PV_NONE))
372 for (i = 0; i < n_factors; i++)
374 if (factors[i]->p.mxd.vartype != MXD_CONTINUOUS)
376 msg (SE, _("Factor variable %s is already another type."),
380 factors[i]->p.mxd.vartype = MXD_FACTOR;
381 factors[i]->p.mxd.subtype = i;
385 else if (lex_match_id ("CELLS"))
391 msg (SE, _("CELLS subcommand multiply specified."));
395 if (!lex_integer_p () || lex_integer () < 1)
397 lex_error (_("expecting positive integer"));
401 cells = lex_integer ();
404 else if (lex_match_id ("N"))
410 msg (SE, _("N subcommand multiply specified."));
414 if (!lex_integer_p () || lex_integer () < 1)
416 lex_error (_("expecting positive integer"));
420 pop_n = lex_integer ();
423 else if (lex_match_id ("CONTENTS"))
425 int inside_parens = 0;
426 unsigned collide = 0;
431 msg (SE, _("CONTENTS subcommand multiply specified."));
441 for (i = 0; i <= PROX; i++)
442 is_per_factor[i] = 0;
451 msg (SE, _("Nested parentheses not allowed."));
457 else if (lex_match (')'))
461 msg (SE, _("Mismatched right parenthesis (`(')."));
464 if (contents[n_contents - 1] == LPAREN)
466 msg (SE, _("Empty parentheses not allowed."));
479 lex_error (_("in CONTENTS subcommand"));
483 content_type = string_to_content_type (tokid,
485 if (content_type == -1)
487 lex_error (_("in CONTENTS subcommand"));
492 if (collide & (1 << collide_index))
494 msg (SE, _("Content multiply specified for %s."),
495 content_names[content_type]);
498 collide |= (1 << collide_index);
501 is_per_factor[item] = inside_parens;
503 contents[n_contents++] = item;
505 if (token == '/' || token == '.')
511 msg (SE, _("Missing right parenthesis."));
514 contents[n_contents] = EOC;
525 lex_error (_("expecting end of command"));
531 msg (SE, _("Missing VARIABLES subcommand."));
535 if (!n_contents && !explicit_rowtype)
537 msg (SW, _("CONTENTS subcommand not specified: assuming file "
538 "contains only CORR matrix."));
545 if (n_factors && !explicit_rowtype && cells == -1)
547 msg (SE, _("Missing CELLS subcommand. CELLS is required "
548 "when ROWTYPE_ is not given in the data and "
549 "factors are present."));
553 if (explicit_rowtype && single_split)
555 msg (SE, _("Split file values must be present in the data when "
556 "ROWTYPE_ is present."));
560 /* Create VARNAME_. */
562 varname_ = force_create_variable (&default_dict, "VARNAME_",
564 varname_->p.mxd.vartype = MXD_VARNAME;
565 varname_->p.mxd.subtype = 0;
568 /* Sort the dictionary variables into the desired order for the
569 system file output. */
573 qsort (default_dict.var, default_dict.nvar, sizeof *default_dict.var,
574 compare_variables_by_mxd_vartype);
576 for (i = 0; i < default_dict.nvar; i++)
577 default_dict.var[i]->index = i;
582 static const struct fmt_spec fmt_tab[MXD_COUNT] =
593 first_continuous = -1;
594 for (i = 0; i < default_dict.nvar; i++)
596 struct variable *v = default_dict.var[i];
597 int type = v->p.mxd.vartype;
599 assert (type >= 0 && type < MXD_COUNT);
600 v->print = v->write = fmt_tab[type];
602 if (type == MXD_CONTINUOUS)
604 if (first_continuous == -1 && type == MXD_CONTINUOUS)
605 first_continuous = i;
609 if (n_continuous == 0)
611 msg (SE, _("No continuous variables specified."));
619 if (explicit_rowtype)
620 read_matrices_with_rowtype ();
622 read_matrices_without_rowtype ();
624 pool_destroy (container);
629 discard_variables ();
631 pool_destroy (container);
635 /* Look up string S as a content-type name and return the
636 corresponding enumerated value, or -1 if there is no match. If
637 COLLIDE is non-NULL then *COLLIDE returns a value (suitable for use
638 as a bit-index) which can be used for determining whether a related
639 statistic has already been used. */
641 string_to_content_type (char *s, int *collide)
652 {N_VECTOR, 0, "N_VECTOR"},
654 {N_SCALAR, 0, "N_SCALAR"},
655 {N_MATRIX, 1, "N_MATRIX"},
657 {STDDEV, 3, "STDDEV"},
669 for (tp = tab; tp->value != -1; tp++)
670 if (!strcmp (s, tp->string))
673 *collide = tp->collide;
680 /* Compare two variables using p.mxd.vartype and p.mxd.subtype
683 compare_variables_by_mxd_vartype (const void *a_, const void *b_)
685 struct variable *const *pa = a_;
686 struct variable *const *pb = b_;
687 const struct matrix_data_proc *a = &(*pa)->p.mxd;
688 const struct matrix_data_proc *b = &(*pb)->p.mxd;
690 if (a->vartype != b->vartype)
691 return a->vartype > b->vartype ? 1 : -1;
693 return a->subtype < b->subtype ? -1 : a->subtype > b->subtype;
697 /* Print out the command as input. */
701 printf ("MATRIX DATA\n\t/VARIABLES=");
706 for (i = 0; i < default_dict.nvar; i++)
707 printf ("%s ", default_dict.var[i]->name);
711 printf ("\t/FORMAT=");
714 else if (fmt == FREE)
718 if (section == LOWER)
720 else if (section == UPPER)
722 else if (section == FULL)
726 if (diag == DIAGONAL)
727 printf (" DIAGONAL\n");
728 else if (diag == NODIAGONAL)
729 printf (" NODIAGONAL\n");
733 if (default_dict.n_splits)
737 printf ("\t/SPLIT=");
738 for (i = 0; i < default_dict.n_splits; i++)
739 printf ("%s ", default_dict.splits[i]->name);
741 printf ("\t/* single split");
749 printf ("\t/FACTORS=");
750 for (i = 0; i < n_factors; i++)
751 printf ("%s ", factors[i]->name);
756 printf ("\t/CELLS=%d\n", cells);
759 printf ("\t/N=%d\n", pop_n);
766 printf ("\t/CONTENTS=");
767 for (i = 0; i < n_contents; i++)
769 if (contents[i] == LPAREN)
776 else if (contents[i] == RPAREN)
784 assert (contents[i] >= 0 && contents[i] <= PROX);
787 printf ("%s", content_names[contents[i]]);
794 #endif /* DEBUGGING */
796 /* Matrix tokenizer. */
798 /* Matrix token types. */
801 MNULL, /* No token. */
804 MSTOP /* End of file. */
807 /* Current matrix token. */
810 /* Token string if applicable; not null-terminated. */
811 static char *mtokstr;
813 /* Length of mtokstr in characters. */
816 /* Token value if applicable. */
817 static double mtokval;
819 static int mget_token (void);
822 #define mget_token() mget_token_dump()
825 mget_token_dump (void)
827 int result = (mget_token) ();
838 printf (" <NULLTOK>");
841 printf (" #%g", mtokval);
844 printf (" #'%.*s'", mtoklen, mtokstr);
856 /* Return the current position in the data file. */
862 char *p = dfm_get_record (data_file, &len);
865 strcpy (buf, "at end of line");
869 int n_copy = min (10, len);
870 cp = stpcpy (buf, "before `");
871 while (n_copy && isspace ((unsigned char) *p))
873 while (n_copy && !isspace ((unsigned char) *p))
874 *cp++ = *p++, n_copy--;
882 /* Is there at least one token left in the data file? */
894 cp = dfm_get_record (data_file, &len);
899 while (isspace ((unsigned char) *cp) && cp < ep)
905 dfm_fwd_record (data_file);
908 dfm_set_record (data_file, cp);
913 /* Parse a MATRIX DATA token from data_file into mtok*. */
923 cp = dfm_get_record (data_file, &len);
933 while (isspace ((unsigned char) *cp) && cp < ep)
939 dfm_fwd_record (data_file);
942 dfm_set_record (data_file, cp);
943 first_column = dfm_get_cur_col (data_file) + 1;
945 /* Three types of fields: quoted with ', quoted with ", unquoted. */
946 if (*cp == '\'' || *cp == '"')
952 while (cp < ep && *cp != quote)
954 mtoklen = cp - mtokstr;
958 msg (SW, _("Scope of string exceeds line."));
962 int is_num = isdigit ((unsigned char) *cp) || *cp == '.';
965 while (cp < ep && !isspace ((unsigned char) *cp) && *cp != ','
966 && *cp != '-' && *cp != '+')
968 if (isdigit ((unsigned char) *cp))
971 if ((tolower ((unsigned char) *cp) == 'd'
972 || tolower ((unsigned char) *cp) == 'e')
973 && (cp[1] == '+' || cp[1] == '-'))
979 mtoklen = cp - mtokstr;
987 di.e = mtokstr + mtoklen;
988 di.v = (union value *) &mtokval;
989 di.f1 = first_column;
990 di.format.type = FMT_F;
991 di.format.w = mtoklen;
1001 dfm_set_record (data_file, cp);
1006 /* Forcibly skip the end of a line for content type CONTENT in
1009 force_eol (const char *content)
1017 cp = dfm_get_record (data_file, &len);
1020 while (len && isspace (*cp))
1025 msg (SE, _("End of line expected %s while reading %s."),
1026 context (), content);
1030 dfm_fwd_record (data_file);
1035 /* Back end, omitting ROWTYPE_. */
1037 /* MATRIX DATA data. */
1038 static double ***nr_data;
1040 /* Factor values. */
1041 static double *nr_factor_values;
1043 /* Largest-numbered cell that we have read in thus far, plus one. */
1044 static int max_cell_index;
1046 /* SPLIT FILE variable values. */
1047 static double *split_values;
1049 static int nr_read_splits (int compare);
1050 static int nr_read_factors (int cell);
1051 static void nr_output_data (void);
1052 static int matrix_data_read_without_rowtype (void);
1054 /* Read from the data file and write it to the active file. */
1056 read_matrices_without_rowtype (void)
1062 split_values = xmalloc (sizeof *split_values * default_dict.n_splits);
1063 nr_factor_values = xmalloc (sizeof *nr_factor_values * n_factors * cells);
1066 matrix_data_source.read = (void (*)(void)) matrix_data_read_without_rowtype;
1067 vfm_source = &matrix_data_source;
1069 procedure (NULL, NULL, NULL);
1071 free (split_values);
1072 free (nr_factor_values);
1074 fh_close_handle (data_file);
1077 /* Mirror data across the diagonal of matrix CP which contains
1078 CONTENT type data. */
1080 fill_matrix (int content, double *cp)
1082 int type = content_type[content];
1084 if (type == 1 && section != FULL)
1086 if (diag == NODIAGONAL)
1088 const double fill = content == CORR ? 1.0 : SYSMIS;
1091 for (i = 0; i < n_continuous; i++)
1092 cp[i * (1 + n_continuous)] = fill;
1098 if (section == LOWER)
1100 int n_lines = n_continuous;
1101 if (section != FULL && diag == NODIAGONAL)
1104 for (r = 1; r < n_lines; r++)
1105 for (c = 0; c < r; c++)
1106 cp[r + c * n_continuous] = cp[c + r * n_continuous];
1110 assert (section == UPPER);
1111 for (r = 1; r < n_continuous; r++)
1112 for (c = 0; c < r; c++)
1113 cp[c + r * n_continuous] = cp[r + c * n_continuous];
1121 for (c = 1; c < n_continuous; c++)
1126 /* Read data lines for content type CONTENT from the data file. If
1127 PER_FACTOR is nonzero, then factor information is read from the
1128 data file. Data is for cell number CELL. */
1130 nr_read_data_lines (int per_factor, int cell, int content, int compare)
1133 const int type = content_type[content];
1135 /* Number of lines that must be parsed from the data file for this
1139 /* Current position in vector or matrix. */
1149 n_lines = n_continuous;
1150 if (section != FULL && diag == NODIAGONAL)
1154 cp = nr_data[content][cell];
1155 if (type == 1 && section == LOWER && diag == NODIAGONAL)
1158 for (i = 0; i < n_lines; i++)
1162 if (!nr_read_splits (1))
1164 if (per_factor && !nr_read_factors (cell))
1171 n_cols = n_continuous;
1181 n_cols = n_continuous - i;
1182 if (diag == NODIAGONAL)
1189 n_cols = n_continuous;
1205 for (j = 0; j < n_cols; j++)
1211 msg (SE, _("expecting value for %s %s"),
1212 default_dict.var[j]->name, context ());
1218 if (!force_eol (content_names[content]))
1220 debug_printf (("\n"));
1223 if (section == LOWER)
1224 cp += n_continuous - n_cols;
1227 fill_matrix (content, nr_data[content][cell]);
1232 /* When ROWTYPE_ does not appear in the data, reads the matrices and
1233 writes them to the output file. Returns success. */
1235 matrix_data_read_without_rowtype (void)
1240 nr_data = pool_alloc (container, (PROX + 1) * sizeof *nr_data);
1245 for (i = 0; i <= PROX; i++)
1249 for (cp = contents; *cp != EOC; cp++)
1250 if (*cp != LPAREN && *cp != RPAREN)
1252 int per_factor = is_per_factor[*cp];
1255 n_entries = n_continuous;
1256 if (content_type[*cp] == 1)
1257 n_entries *= n_continuous;
1260 int n_vectors = per_factor ? cells : 1;
1263 nr_data[*cp] = pool_alloc (container,
1264 n_vectors * sizeof **nr_data);
1266 for (i = 0; i < n_vectors; i++)
1267 nr_data[*cp][i] = pool_alloc (container,
1268 n_entries * sizeof ***nr_data);
1277 if (!nr_read_splits (0))
1280 for (bp = contents; *bp != EOC; bp = np)
1284 /* Trap the CONTENTS that we should parse in this pass
1285 between bp and ep. Set np to the starting bp for next
1290 while (*ep != RPAREN)
1298 while (*ep != EOC && *ep != LPAREN)
1307 for (i = 0; i < (per_factor ? cells : 1); i++)
1311 for (cp = bp; cp < ep; cp++)
1312 if (!nr_read_data_lines (per_factor, i, *cp, cp != bp))
1320 if (default_dict.n_splits == 0 || !another_token ())
1325 /* Read the split file variables. If COMPARE is 1, compares the
1326 values read to the last values read and returns 1 if they're equal,
1329 nr_read_splits (int compare)
1331 static int just_read = 0;
1333 if (compare && just_read)
1339 if (default_dict.n_splits == 0)
1345 split_values[0] = ++default_dict.splits[0]->p.mxd.subtype;
1355 for (i = 0; i < default_dict.n_splits; i++)
1361 msg (SE, _("Syntax error expecting SPLIT FILE value %s."),
1367 split_values[i] = mtokval;
1368 else if (split_values[i] != mtokval)
1370 msg (SE, _("Expecting value %g for %s."),
1371 split_values[i], default_dict.splits[i]->name);
1380 /* Read the factors for cell CELL. If COMPARE is 1, compares the
1381 values read to the last values read and returns 1 if they're equal,
1384 nr_read_factors (int cell)
1391 assert (max_cell_index >= cell);
1392 if (cell != max_cell_index)
1403 for (i = 0; i < n_factors; i++)
1409 msg (SE, _("Syntax error expecting factor value %s."),
1415 nr_factor_values[i + n_factors * cell] = mtokval;
1416 else if (nr_factor_values[i + n_factors * cell] != mtokval)
1418 msg (SE, _("Syntax error expecting value %g for %s %s."),
1419 nr_factor_values[i + n_factors * cell],
1420 factors[i]->name, context ());
1429 /* Write the contents of a cell having content type CONTENT and data
1430 CP to the active file. */
1432 dump_cell_content (int content, double *cp)
1434 int type = content_type[content];
1437 st_bare_pad_copy (temp_case->data[rowtype_->fv].s,
1438 content_names[content], 8);
1441 memset (&temp_case->data[varname_->fv].s, ' ', 8);
1445 int n_lines = (type == 1) ? n_continuous : 1;
1448 for (i = 0; i < n_lines; i++)
1452 for (j = 0; j < n_continuous; j++)
1454 temp_case->data[(default_dict.var
1455 [first_continuous + j]->fv)].f = *cp;
1456 debug_printf (("c:%s(%g) ",
1457 default_dict.var[first_continuous + j]->name,
1462 st_bare_pad_copy (temp_case->data[varname_->fv].s,
1463 default_dict.var[first_continuous + i]->name,
1465 debug_printf (("\n"));
1471 /* Finally dump out everything from nr_data[] to the output file. */
1473 nr_output_data (void)
1478 for (i = 0; i < default_dict.n_splits; i++)
1479 temp_case->data[default_dict.splits[i]->fv].f = split_values[i];
1486 for (cell = 0; cell < cells; cell++)
1491 for (factor = 0; factor < n_factors; factor++)
1493 temp_case->data[factors[factor]->fv].f
1494 = nr_factor_values[factor + cell * n_factors];
1495 debug_printf (("f:%s ", factors[factor]->name));
1502 for (content = 0; content <= PROX; content++)
1503 if (is_per_factor[content])
1505 assert (nr_data[content] != NULL
1506 && nr_data[content][cell] != NULL);
1508 dump_cell_content (content, nr_data[content][cell]);
1520 for (factor = 0; factor < n_factors; factor++)
1521 temp_case->data[factors[factor]->fv].f = SYSMIS;
1524 for (content = 0; content <= PROX; content++)
1525 if (!is_per_factor[content] && nr_data[content] != NULL)
1526 dump_cell_content (content, nr_data[content][0]);
1530 /* Back end, with ROWTYPE_. */
1532 /* Type of current row. */
1533 static int wr_content;
1535 /* All the data for one set of factor values. */
1539 int n_rows[PROX + 1];
1540 double *data[PROX + 1];
1541 struct factor_data *next;
1544 /* All the data, period. */
1545 struct factor_data *wr_data;
1547 /* Current factor. */
1548 struct factor_data *wr_current;
1550 static int wr_read_splits (void);
1551 static int wr_output_data (void);
1552 static int wr_read_rowtype (void);
1553 static int wr_read_factors (void);
1554 static int wr_read_indeps (void);
1555 static int matrix_data_read_with_rowtype (void);
1557 /* When ROWTYPE_ appears in the data, reads the matrices and writes
1558 them to the output file. */
1560 read_matrices_with_rowtype (void)
1563 wr_data = wr_current = NULL;
1564 split_values = NULL;
1567 matrix_data_source.read = (void (*)(void)) matrix_data_read_with_rowtype;
1568 vfm_source = &matrix_data_source;
1570 procedure (NULL, NULL, NULL);
1572 free (split_values);
1573 fh_close_handle (data_file);
1576 /* Read from the data file and write it to the active file. */
1578 matrix_data_read_with_rowtype (void)
1582 if (!wr_read_splits ())
1585 if (!wr_read_factors ())
1588 if (!wr_read_indeps ())
1591 while (another_token ());
1597 /* Read the split file variables. If they differ from the previous
1598 set of split variables then output the data. Returns success. */
1600 wr_read_splits (void)
1604 if (default_dict.n_splits == 0)
1612 split_values = xmalloc (sizeof *split_values * default_dict.n_splits);
1619 for (i = 0; i < default_dict.n_splits; i++)
1625 msg (SE, _("Syntax error %s expecting SPLIT FILE value."),
1630 if (compare && split_values[i] != mtokval && !different)
1632 if (!wr_output_data ())
1637 split_values[i] = mtokval;
1644 /* Return strcmp()-type comparison of the n_factors factors at _A and
1645 _B. Sort missing values toward the end. */
1647 compare_factors (const void *pa, const void *pb)
1649 const double *a = (*(struct factor_data **) pa)->factors;
1650 const double *b = (*(struct factor_data **) pb)->factors;
1653 for (i = 0; i < n_factors; i++, a++, b++)
1660 else if (*b == SYSMIS)
1663 return *a - *b < 0 ? -1 : 1;
1669 /* Write out the data for the current split file to the active
1672 wr_output_data (void)
1677 for (i = 0; i < default_dict.n_splits; i++)
1678 temp_case->data[default_dict.splits[i]->fv].f = split_values[i];
1681 /* Sort the wr_data list. */
1683 struct factor_data **factors;
1684 struct factor_data *iter;
1687 factors = xmalloc (sizeof *factors * cells);
1689 for (i = 0, iter = wr_data; iter; iter = iter->next, i++)
1692 qsort (factors, cells, sizeof *factors, compare_factors);
1694 wr_data = factors[0];
1695 for (i = 0; i < cells - 1; i++)
1696 factors[i]->next = factors[i + 1];
1697 factors[cells - 1]->next = NULL;
1702 /* Write out records for every set of factor values. */
1704 struct factor_data *iter;
1706 for (iter = wr_data; iter; iter = iter->next)
1711 for (factor = 0; factor < n_factors; factor++)
1713 temp_case->data[factors[factor]->fv].f
1714 = iter->factors[factor];
1715 debug_printf (("f:%s ", factors[factor]->name));
1722 for (content = 0; content <= PROX; content++)
1724 if (!iter->n_rows[content])
1728 int type = content_type[content];
1729 int n_lines = (type == 1
1731 - (section != FULL && diag == NODIAGONAL))
1734 if (n_lines != iter->n_rows[content])
1736 msg (SE, _("Expected %d lines of data for %s content; "
1737 "actually saw %d lines. No data will be "
1738 "output for this content."),
1739 n_lines, content_names[content],
1740 iter->n_rows[content]);
1745 fill_matrix (content, iter->data[content]);
1747 dump_cell_content (content, iter->data[content]);
1753 pool_destroy (container);
1754 container = pool_create ();
1756 wr_data = wr_current = NULL;
1761 /* Read ROWTYPE_ from the data file. Return success. */
1763 wr_read_rowtype (void)
1765 if (wr_content != -1)
1767 msg (SE, _("Multiply specified ROWTYPE_ %s."), context ());
1772 msg (SE, _("Syntax error %s expecting ROWTYPE_ string."), context ());
1780 memcpy (s, mtokstr, min (15, mtoklen));
1781 s[min (15, mtoklen)] = 0;
1783 for (cp = s; *cp; cp++)
1784 *cp = toupper ((unsigned char) *cp);
1786 wr_content = string_to_content_type (s, NULL);
1789 if (wr_content == -1)
1791 msg (SE, _("Syntax error %s."), context ());
1798 /* Read the factors for the current row. Select a set of factors and
1799 point wr_current to it. */
1801 wr_read_factors (void)
1803 double *factor_values = local_alloc (sizeof *factor_values * n_factors);
1809 for (i = 0; i < n_factors; i++)
1815 if (!wr_read_rowtype ())
1822 msg (SE, _("Syntax error expecting factor value %s."),
1827 factor_values[i] = mtokval;
1830 if (wr_content == -1)
1834 if (!wr_read_rowtype ())
1838 /* Try the most recent factor first as a simple caching
1844 for (i = 0; i < n_factors; i++)
1845 if (factor_values[i] != wr_current->factors[i])
1850 /* Linear search through the list. */
1853 struct factor_data *iter;
1855 for (iter = wr_data; iter; iter = iter->next)
1859 for (i = 0; i < n_factors; i++)
1860 if (factor_values[i] != iter->factors[i])
1870 /* Not found. Make a new item. */
1872 struct factor_data *new = pool_alloc (container, sizeof *new);
1874 new->factors = pool_alloc (container, sizeof *new->factors * n_factors);
1879 for (i = 0; i < n_factors; i++)
1880 new->factors[i] = factor_values[i];
1886 for (i = 0; i <= PROX; i++)
1889 new->data[i] = NULL;
1893 new->next = wr_data;
1894 wr_data = wr_current = new;
1899 local_free (factor_values);
1903 local_free (factor_values);
1907 /* Read the independent variables into wr_current. */
1909 wr_read_indeps (void)
1911 struct factor_data *c = wr_current;
1912 const int type = content_type[wr_content];
1913 const int n_rows = c->n_rows[wr_content];
1917 /* Allocate room for data if necessary. */
1918 if (c->data[wr_content] == NULL)
1920 int n_items = n_continuous;
1922 n_items *= n_continuous;
1924 c->data[wr_content] = pool_alloc (container,
1925 sizeof **c->data * n_items);
1928 cp = &c->data[wr_content][n_rows * n_continuous];
1930 /* Figure out how much to read from this line. */
1937 msg (SE, _("Duplicate specification for %s."),
1938 content_names[wr_content]);
1942 n_cols = n_continuous;
1947 if (n_rows >= n_continuous - (section != FULL && diag == NODIAGONAL))
1949 msg (SE, _("Too many rows of matrix data for %s."),
1950 content_names[wr_content]);
1957 n_cols = n_rows + 1;
1958 if (diag == NODIAGONAL)
1963 n_cols = n_continuous - n_rows;
1964 if (diag == NODIAGONAL)
1971 n_cols = n_continuous;
1980 c->n_rows[wr_content]++;
1982 debug_printf ((" (c=%p,r=%d,n=%d)", c, n_rows + 1, n_cols));
1984 /* Read N_COLS items at CP. */
1988 for (j = 0; j < n_cols; j++)
1994 msg (SE, _("Syntax error expecting value for %s %s."),
1995 default_dict.var[first_continuous + j]->name, context ());
2001 if (!force_eol (content_names[wr_content]))
2003 debug_printf (("\n"));
2009 /* Matrix source. */
2011 struct case_stream matrix_data_source =