X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Flanguage%2Fdata-io%2Fmatrix-data.c;h=7676bb1de0157d45c0c74ead6f2a18c387378cc6;hb=a49b940e58f148bf111c647d9b4822025636ff80;hp=752a0be3d26105c5930aa8e1b15b0b093ffcebd1;hpb=b4e3d932f4dfbdf3e51c81b78daabb40e23528b2;p=pspp

diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c
index 752a0be3d2..7676bb1de0 100644
--- a/src/language/data-io/matrix-data.c
+++ b/src/language/data-io/matrix-data.c
@@ -77,6 +77,7 @@ struct matrix_format
   int n_continuous_vars;
   struct variable **split_vars;
   size_t n_split_vars;
+  long n;
 };
 
 /*
@@ -105,16 +106,16 @@ set_varname_column (struct ccase *outcase, const struct variable *vname,
      const char *str)
 {
   int len = var_get_width (vname);
-  uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len);
+  uint8_t *s = case_str_rw (outcase, vname);
 
-  strncpy ((char *) s, str, len);
+  strncpy (CHAR_CAST (char *, s), str, len);
 }
 
 static void
 blank_varname_column (struct ccase *outcase, const struct variable *vname)
 {
   int len = var_get_width (vname);
-  uint8_t *s = value_str_rw (case_data_rw (outcase, vname), len);
+  uint8_t *s = case_str_rw (outcase, vname);
 
   memset (s, ' ', len);
 }
@@ -124,8 +125,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 {
   struct matrix_format *mformat = aux;
   const struct caseproto *proto = casereader_get_proto (casereader0);
-  struct casewriter *writer;
-  writer = autopaging_writer_create (proto);
+  struct casewriter *writer = autopaging_writer_create (proto);
   struct ccase *prev_case = NULL;
   double **matrices = NULL;
   size_t n_splits = 0;
@@ -137,20 +137,30 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
   /* Make an initial pass to populate our temporary matrix */
   struct casereader *pass0 = casereader_clone (casereader0);
   struct ccase *c;
-  unsigned int prev_split_hash = 1;
+  union value *prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values);
   int row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ? 1 : 0;
+  bool first_case = true;
   for (; (c = casereader_read (pass0)) != NULL; case_unref (c))
     {
       int s;
-      unsigned int split_hash = 0;
-      for (s = 0; s < mformat->n_split_vars; ++s)
+      bool match = false;
+      if (!first_case)
 	{
-	  const struct variable *svar = mformat->split_vars[s];
-	  const union value *sv = case_data (c, svar);
-	  split_hash = value_hash (sv, var_get_width (svar), split_hash);
+	  match = true;
+	  for (s = 0; s < mformat->n_split_vars; ++s)
+	    {
+	      const struct variable *svar = mformat->split_vars[s];
+	      const union value *sv = case_data (c, svar);
+	      if (! value_equal (prev_values + s, sv, var_get_width (svar)))
+		{
+		  match = false;
+		  break;
+		}
+	    }
 	}
+      first_case = false;
 
-      if (matrices == NULL || prev_split_hash != split_hash)
+      if (matrices == NULL || ! match)
 	{
 	  row = (mformat->triangle == LOWER && mformat->diagonal == NO_DIAGONAL) ?
 	    1 : 0;
@@ -160,13 +170,18 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 	  matrices[n_splits - 1] = xmalloc (sizeof_matrix);
 	}
 
-      prev_split_hash = split_hash;
+      for (s = 0; s < mformat->n_split_vars; ++s)
+	{
+	  const struct variable *svar = mformat->split_vars[s];
+	  const union value *sv = case_data (c, svar);
+	  value_clone (prev_values + s, sv, var_get_width (svar));
+	}
 
       int c_offset = (mformat->triangle == UPPER) ? row : 0;
       if (mformat->triangle == UPPER && mformat->diagonal == NO_DIAGONAL)
 	c_offset++;
       const union value *v = case_data (c, mformat->rowtype);
-      const char *val = (const char *) value_str (v, ROWTYPE_WIDTH);
+      const char *val = CHAR_CAST (const char *, v->s);
       if (0 == strncasecmp (val, "corr    ", ROWTYPE_WIDTH) ||
 	  0 == strncasecmp (val, "cov     ", ROWTYPE_WIDTH))
 	{
@@ -175,6 +190,9 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 	      msg (SE,
 		   _("There are %d variable declared but the data has at least %d matrix rows."),
 		   mformat->n_continuous_vars, row + 1);
+	      case_unref (c);
+	      casereader_destroy (pass0);
+	      free (prev_values);
 	      goto error;
 	    }
 	  int col;
@@ -200,37 +218,82 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 	}
     }
   casereader_destroy (pass0);
+  free (prev_values);
 
   /* Now make a second pass to fill in the other triangle from our
      temporary matrix */
   const int idx = var_get_dict_index (mformat->varname);
   row = 0;
 
-  prev_split_hash = 1;
+  if (mformat->n >= 0)
+    {
+      int col;
+      struct ccase *outcase = case_create (proto);
+      union value *v = case_data_rw (outcase, mformat->rowtype);
+      memcpy (v->s, "N       ", ROWTYPE_WIDTH);
+      blank_varname_column (outcase, mformat->varname);
+      for (col = 0; col < mformat->n_continuous_vars; ++col)
+	{
+	  union value *dest_val =
+	    case_data_rw_idx (outcase,
+			      1 + col + var_get_dict_index (mformat->varname));
+	  dest_val->f = mformat->n;
+	}
+      casewriter_write (writer, outcase);
+    }
+
   n_splits = 0;
+  prev_values = xcalloc (mformat->n_split_vars, sizeof *prev_values);
+  first_case = true;
   for (; (c = casereader_read (casereader0)) != NULL; prev_case = c)
     {
       int s;
-      unsigned int split_hash = 0;
-      for (s = 0; s < mformat->n_split_vars; ++s)
+      bool match = false;
+      if (!first_case)
 	{
-	  const struct variable *svar = mformat->split_vars[s];
-	  const union value *sv = case_data (c, svar);
-	  split_hash = value_hash (sv, var_get_width (svar), split_hash);
+	  match = true;
+	  for (s = 0; s < mformat->n_split_vars; ++s)
+	    {
+	      const struct variable *svar = mformat->split_vars[s];
+	      const union value *sv = case_data (c, svar);
+	      if (! value_equal (prev_values + s, sv, var_get_width (svar)))
+		{
+		  match = false;
+		  break;
+		}
+	    }
 	}
-      if (prev_split_hash != split_hash)
+      first_case = false;
+      if (! match)
 	{
 	  n_splits++;
 	  row = 0;
 	}
 
-      prev_split_hash = split_hash;
+      for (s = 0; s < mformat->n_split_vars; ++s)
+	{
+	  const struct variable *svar = mformat->split_vars[s];
+	  const union value *sv = case_data (c, svar);
+	  value_clone (prev_values + s, sv, var_get_width (svar));
+	}
 
       case_unref (prev_case);
+      const union value *v = case_data (c, mformat->rowtype);
+      const char *val = CHAR_CAST (const char *, v->s);
+      if (mformat->n >= 0)
+	{
+	  if (0 == strncasecmp (val, "n       ", ROWTYPE_WIDTH) ||
+	      0 == strncasecmp (val, "n_vector", ROWTYPE_WIDTH))
+	    {
+	      msg (SW,
+		   _("The N subcommand was specified, but a N record was also found in the data.  The N record will be ignored."));
+	      continue;
+	    }
+	}
+
       struct ccase *outcase = case_create (proto);
       case_copy (outcase, 0, c, 0, caseproto_get_n_widths (proto));
-      const union value *v = case_data (c, mformat->rowtype);
-      const char *val = (const char *) value_str (v, ROWTYPE_WIDTH);
+
       if (0 == strncasecmp (val, "corr    ", ROWTYPE_WIDTH) ||
 	  0 == strncasecmp (val, "cov     ", ROWTYPE_WIDTH))
 	{
@@ -294,7 +357,7 @@ preprocess (struct casereader *casereader0, const struct dictionary *dict, void
 
       casewriter_write (writer, outcase);
     }
-
+  free (prev_values);
 
   if (prev_case)
     case_unref (prev_case);
@@ -316,6 +379,7 @@ error:
     free (matrices[i]);
   free (matrices);
   casereader_destroy (casereader0);
+  casewriter_destroy (writer);
   return NULL;
 }
 
@@ -336,6 +400,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
   mformat.diagonal = DIAGONAL;
   mformat.n_split_vars = 0;
   mformat.split_vars = NULL;
+  mformat.n = -1;
 
   dict = (in_input_program ()
           ? dataset_dict (ds)
@@ -406,7 +471,22 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
       if (! lex_force_match (lexer, T_SLASH))
 	goto error;
 
-      if (lex_match_id (lexer, "FORMAT"))
+      if (lex_match_id (lexer, "N"))
+	{
+	  lex_match (lexer, T_EQUALS);
+
+	  if (! lex_force_int (lexer))
+	    goto error;
+
+	  mformat.n = lex_integer (lexer);
+	  if (mformat.n < 0)
+	    {
+	      msg (SE, _("%s must not be negative."), "N");
+	      goto error;
+	    }
+	  lex_get (lexer);
+	}
+      else if (lex_match_id (lexer, "FORMAT"))
 	{
 	  lex_match (lexer, T_EQUALS);
 
@@ -512,7 +592,8 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
     }
   else
     {
-      data_parser_make_active_file (parser, ds, reader, dict, preprocess, &mformat);
+      data_parser_make_active_file (parser, ds, reader, dict, preprocess,
+				    &mformat);
     }
 
   fh_unref (fh);
@@ -524,7 +605,7 @@ cmd_matrix (struct lexer *lexer, struct dataset *ds)
  error:
   data_parser_destroy (parser);
   if (!in_input_program ())
-    dict_destroy (dict);
+    dict_unref (dict);
   fh_unref (fh);
   free (encoding);
   free (mformat.split_vars);
@@ -580,4 +661,3 @@ data_list_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED)
 
   return retval;
 }
-