X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=609e5eb1fd5156365093ac9c70851ab2dbcfce5b;hb=b6d66ec3f328d0e8bf35b71f29332695121f7173;hp=fb214fd644a399c517652e854ad2c2ba31be804b;hpb=bc37c3f489947cf081a930a5d4e58dbd133eb563;p=pspp

diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c
index fb214fd644..609e5eb1fd 100644
--- a/src/language/data-io/matrix-data.c
+++ b/src/language/data-io/matrix-data.c
@@ -34,6 +34,7 @@
 #include "language/lexer/variable-parser.h"
 #include "libpspp/i18n.h"
 #include "libpspp/message.h"
+#include "libpspp/misc.h"
 
 #include "gl/xsize.h"
 #include "gl/xalloc.h"
@@ -65,6 +66,8 @@ enum triangle
     FULL
   };
 
+static const int ROWTYPE_WIDTH = 8;
+
 struct matrix_format
 {
   enum triangle triangle;
@@ -74,6 +77,7 @@ struct matrix_format
   int n_continuous_vars;
   struct variable **split_vars;
   size_t n_split_vars;
+  long n;
 };
 
 /*
@@ -94,19 +98,27 @@ valid rowtype_ values:
   PROX.
 */
 
-/* Sets the value of OUTCASE which corresponds to MFORMAT's varname variable
-   to the string STR. VAR must be of type string.
+/* Sets the value of OUTCASE which corresponds to VNAME
+   to the value STR.  VNAME must be of type string.
  */
 static void
-set_varname_column (struct ccase *outcase, const struct matrix_format *mformat,
-     const char *str, int len)
+set_varname_column (struct ccase *outcase, const struct variable *vname,
+     const char *str)
 {
-  const struct variable *var = mformat->varname;
-  uint8_t *s = value_str_rw (case_data_rw (outcase, var), len);
+  int len = var_get_width (vname);
+  uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len);
 
   strncpy ((char *) s, str, len);
 }
 
+static void
+blank_varname_column (struct ccase *outcase, const struct variable *vname)
+{
+  int len = var_get_width (vname);
+  uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len);
+
+  memset (s, ' ', len);
+}
 
 static struct casereader *
 preprocess (struct casereader *casereader0, const struct dictionary *dict, void *aux)
@@ -115,7 +127,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
   const struct caseproto *proto = casereader_get_proto (casereader0);
   struct casewriter *writer;
   writer = autopaging_writer_create (proto);
-
+  struct ccase *prev_case = NULL;
   double **matrices = NULL;
   size_t n_splits = 0;
 
@@ -155,10 +167,17 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
       if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL)
 	c_offset++;
       const union value *v = case_data (c, mformat->rowtype);
-      const char *val = (const char *) value_str (v, 8);
-      if (0 == strncasecmp (val, "corr    ", 8) ||
-	  0 == strncasecmp (val, "cov     ", 8))
+      const char *val = (const char *) value_str (v, ROWTYPE_WIDTH);
+      if (0 == strncasecmp (val, "corr    ", ROWTYPE_WIDTH) ||
+	  0 == strncasecmp (val, "cov     ", ROWTYPE_WIDTH))
 	{
+	  if (row >= mformat->n_continuous_vars)
+	    {
+	      msg (SE,
+		   _("There are %d variable declared but the data has at least %d matrix rows."),
+		   mformat->n_continuous_vars, row + 1);
+	      goto error;
+	    }
 	  int col;
 	  for (col = c_offset; col < mformat->n_continuous_vars; ++col)
 	    {
@@ -171,9 +190,12 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 	      if (e == SYSMIS)
 	      	continue;
 
-
+	      /* Fill in the lower triangle */
 	      (matrices[n_splits-1])[col + mformat->n_continuous_vars * row] = e;
-	      (matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e;
+
+	      if (mformat->triangle != FULL)
+		/* Fill in the upper triangle */
+		(matrices[n_splits-1]) [row + mformat->n_continuous_vars * col] = e;
 	    }
 	  row++;
 	}
@@ -184,7 +206,25 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
      temporary matrix */
   const int idx = var_get_dict_index (mformat->varname);
   row = 0;
-  struct ccase *prev_case = NULL;
+
+  if (mformat->n >= 0)
+    {
+      int col;
+      struct ccase *outcase = case_create (proto);
+      union value *v = case_data_rw (outcase, mformat->rowtype);
+      uint8_t *n = value_str_rw (v, ROWTYPE_WIDTH);
+      strncpy ((char *) n, "N        ", ROWTYPE_WIDTH);
+      blank_varname_column (outcase, mformat->varname);
+      for (col = 0; col < mformat->n_continuous_vars; ++col)
+	{
+	  union value *dest_val =
+	    case_data_rw_idx (outcase,
+			      1 + col + var_get_dict_index (mformat->varname));
+	  dest_val->f = mformat->n;
+	}
+      casewriter_write (writer, outcase);
+    }
+
   prev_split_hash = 1;
   n_splits = 0;
   for (; (c = casereader_read (casereader0)) != NULL; prev_case = c)
@@ -204,19 +244,30 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 	}
 
       prev_split_hash = split_hash;
-
       case_unref (prev_case);
+      const union value *v = case_data (c, mformat->rowtype);
+      const char *val = (const char *) value_str (v, ROWTYPE_WIDTH);
+      if (mformat->n >= 0)
+	{
+	  if (0 == strncasecmp (val, "n       ", ROWTYPE_WIDTH) ||
+	      0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
+	    {
+	      msg (SW,
+		   _("The N subcommand was specified, but a N record was also found in the data.  The N record will be ignored."));
+	      continue;
+	    }
+	}
+
       struct ccase *outcase = case_create (proto);
       case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto));
-      const union value *v = case_data (c, mformat->rowtype);
-      const char *val = (const char *) value_str (v, 8);
-      if (0 == strncasecmp (val, "corr    ", 8) ||
-	  0 == strncasecmp (val, "cov     ", 8))
+
+      if (0 == strncasecmp (val, "corr    ", ROWTYPE_WIDTH) ||
+	  0 == strncasecmp (val, "cov     ", ROWTYPE_WIDTH))
 	{
 	  int col;
 	  const struct variable *var = dict_get_var (dict, idx + 1 + row);
-	  set_varname_column (outcase, mformat, var_get_name (var), 8);
-	  value_copy (case_data_rw (outcase, mformat->rowtype), v, 8);
+	  set_varname_column (outcase, mformat->varname, var_get_name (var));
+	  value_copy (case_data_rw (outcase, mformat->rowtype), v, ROWTYPE_WIDTH);
 
 	  for (col = 0; col < mformat->n_continuous_vars; ++col)
 	    {
@@ -231,18 +282,18 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 	}
       else
 	{
-	  set_varname_column (outcase, mformat, "        ", 8);
+	  blank_varname_column (outcase, mformat->varname);
 	}
 
       /* Special case for SD and N_VECTOR: Rewrite as STDDEV and N respectively */
-      if (0 == strncasecmp (val, "sd      ", 8))
+      if (0 == strncasecmp (val, "sd      ", ROWTYPE_WIDTH))
 	{
-	  value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8,
+	  value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
 			       (uint8_t *) "STDDEV", 6, ' ');
 	}
-      else if (0 == strncasecmp (val, "n_vector", 8))
+      else if (0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
 	{
-	  value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), 8,
+	  value_copy_buf_rpad (case_data_rw (outcase, mformat->rowtype), ROWTYPE_WIDTH,
 			       (uint8_t *) "N", 1, ' ');
 	}
 
@@ -258,9 +309,8 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
       if (prev_case)
 	case_copy (outcase, 0, prev_case, 0, caseproto_get_n_widths (proto));
 
-
       const struct variable *var = dict_get_var (dict, idx + 1 + row);
-      set_varname_column (outcase, mformat, var_get_name (var), 8);
+      set_varname_column (outcase, mformat->varname, var_get_name (var));
 
       for (col = 0; col < mformat->n_continuous_vars; ++col)
 	{
@@ -275,6 +325,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
       casewriter_write (writer, outcase);
     }
 
+
   if (prev_case)
     case_unref (prev_case);
 
@@ -285,6 +336,17 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
   struct casereader *reader1 = casewriter_make_reader (writer);
   casereader_destroy (casereader0);
   return reader1;
+
+
+error:
+  if (prev_case)
+    case_unref (prev_case);
+
+  for (i = 0 ; i < n_splits; ++i)
+    free (matrices[i]);
+  free (matrices);
+  casereader_destroy (casereader0);
+  return NULL;
 }
 
 int
@@ -302,6 +364,9 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
 
   mformat.triangle = LOWER;
   mformat.diagonal = DIAGONAL;
+  mformat.n_split_vars = 0;
+  mformat.split_vars = NULL;
+  mformat.n = -1;
 
   dict = (in_input_program ()
           ? dataset_dict (ds)
@@ -313,8 +378,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
   data_parser_set_warn_missing_fields (parser, false);
   data_parser_set_span (parser, false);
 
-  mformat.rowtype = dict_create_var (dict, "ROWTYPE_", 8);
-  mformat.varname = dict_create_var (dict, "VARNAME_", 8);
+  mformat.rowtype = dict_create_var (dict, "ROWTYPE_", ROWTYPE_WIDTH);
 
   mformat.n_continuous_vars = 0;
   mformat.n_split_vars = 0;
@@ -324,7 +388,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
 
   lex_match (lexer, T_EQUALS);
 
-  if (! parse_mixed_vars (lexer, dict, &names, &n_names, 0))
+  if (! parse_mixed_vars (lexer, dict, &names, &n_names, PV_NO_DUPLICATE))
     {
       int i;
       for (i = 0; i < n_names; ++i)
@@ -333,6 +397,15 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
       goto error;
     }
 
+  int longest_name = 0;
+  for (i = 0; i < n_names; ++i)
+    {
+      maximize_int (&longest_name, strlen (names[i]));
+    }
+
+  mformat.varname = dict_create_var (dict, "VARNAME_",
+				     8 * DIV_RND_UP (longest_name, 8));
+
   for (i = 0; i < n_names; ++i)
     {
       if (0 == strcasecmp (names[i], "ROWTYPE_"))
@@ -364,7 +437,22 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
       if (! lex_force_match (lexer, T_SLASH))
 	goto error;
 
-      if (lex_match_id (lexer, "FORMAT"))
+      if (lex_match_id (lexer, "N"))
+	{
+	  lex_match (lexer, T_EQUALS);
+
+	  if (! lex_force_int (lexer))
+	    goto error;
+
+	  mformat.n = lex_integer (lexer);
+	  if (mformat.n < 0)
+	    {
+	      msg (SE, _("%s must not be negative."), "N");
+	      goto error;
+	    }
+	  lex_get (lexer);
+	}
+      else if (lex_match_id (lexer, "FORMAT"))
 	{
 	  lex_match (lexer, T_EQUALS);
 
@@ -475,6 +563,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
 
   fh_unref (fh);
   free (encoding);
+  free (mformat.split_vars);
 
   return CMD_DATA_LIST;
 
@@ -484,6 +573,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
     dict_destroy (dict);
   fh_unref (fh);
   free (encoding);
+  free (mformat.split_vars);
   return CMD_CASCADING_FAILURE;
 }