SPLIT FILE: Add LAYERED vs. SEPARATE distinction to the dictionary.

author Ben Pfaff <blp@cs.stanford.edu>

Fri, 8 Jul 2022 22:45:40 +0000 (15:45 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Fri, 8 Jul 2022 22:48:09 +0000 (15:48 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Fri, 8 Jul 2022 22:45:40 +0000 (15:45 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Fri, 8 Jul 2022 22:48:09 +0000 (15:48 -0700)
diff --git a/doc/data-selection.texi b/doc/data-selection.texi

index c2f3ab31a4ddf0fdd33797a83981acd820e35590..b92d1826efe79d804d582aeae8384197e2064a13 100644 (file)
--- a/doc/data-selection.texi
+++ b/doc/data-selection.texi
@@ -208,12 +208,15 @@ An independent analysis is carried out for each group of cases, and the
  variable values for the group are printed along with the analysis.
  
  When a list of variable names is specified, one of the keywords
-@subcmd{LAYERED} or @subcmd{SEPARATE} may also be specified.  If provided, either
-keyword are ignored.
+@subcmd{LAYERED} or @subcmd{SEPARATE} may also be specified.  With
+@subcmd{LAYERED}, which is the default, the separate analyses for each
+group are presented together in a single table.  With
+@subcmd{SEPARATE}, each analysis is presented in a separate table.
+Not all procedures honor the distinction.
  
  Groups are formed only by @emph{adjacent} cases.  To create a split
  using a variable where like values are not adjacent in the working file,
-you should first sort the data by that variable (@pxref{SORT CASES}).
+first sort the data by that variable (@pxref{SORT CASES}).
  
  Specify @subcmd{OFF} to disable @cmd{SPLIT FILE} and resume analysis of the
  entire active dataset as a single group of data.
diff --git a/src/data/dictionary.c b/src/data/dictionary.c

index 9d39ab0267b331ce662cc6b0b2c0f0205fc17e61..c331ea234550f461d0ad07d3ff8300a109af440e 100644 (file)
--- a/src/data/dictionary.c
+++ b/src/data/dictionary.c
@@ -67,6 +67,7 @@ struct dictionary
      int next_value_idx;         /* Index of next `union value' to allocate. */
      const struct variable **split;    /* SPLIT FILE vars. */
      size_t n_splits;            /* SPLIT FILE count. */
+    enum split_type split_type;
      struct variable *weight;    /* WEIGHT variable. */
      struct variable *filter;    /* FILTER variable. */
      casenumber case_limit;      /* Current case limit (N command). */
@@ -251,13 +252,16 @@ dict_copy_callbacks (struct dictionary *dest,
  struct dictionary *
  dict_create (const char *encoding)
  {
-  struct dictionary *d = XZALLOC (struct dictionary);
+  struct dictionary *d = xmalloc (sizeof *d);
  
-  d->encoding = xstrdup (encoding);
-  d->names_must_be_ids = true;
-  hmap_init (&d->name_map);
-  attrset_init (&d->attributes);
-  d->ref_cnt = 1;
+  *d = (struct dictionary) {
+    .encoding = xstrdup (encoding),
+    .names_must_be_ids = true,
+    .name_map = HMAP_INITIALIZER (d->name_map),
+    .attributes = ATTRSET_INITIALIZER (d->attributes),
+    .split_type = SPLIT_LAYERED,
+    .ref_cnt = 1,
+  };
  
    return d;
  }
@@ -299,9 +303,10 @@ dict_clone (const struct dictionary *s)
    if (d->n_splits > 0)
      {
         d->split = xnmalloc (d->n_splits, sizeof *d->split);
-      for (i = 0; i < d->n_splits; i++)
-        d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
+       for (i = 0; i < d->n_splits; i++)
+         d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i]));
      }
+  d->split_type = s->split_type;
  
    if (s->weight != NULL)
      dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight)));
@@ -383,11 +388,12 @@ dict_unset_split_var (struct dictionary *d, struct variable *v, bool skip_callba
  static void
  dict_set_split_vars__ (struct dictionary *d,
                         struct variable *const *split, size_t n,
-                       bool skip_callbacks)
+                       enum split_type type, bool skip_callbacks)
  {
    assert (n == 0 || split != NULL);
  
    d->n_splits = n;
+  d->split_type = type;
    if (n > 0)
     {
      d->split = xnrealloc (d->split, n, sizeof *d->split) ;
@@ -410,11 +416,17 @@ dict_set_split_vars__ (struct dictionary *d,
  /* Sets N split vars SPLIT in dictionary D. */
  void
  dict_set_split_vars (struct dictionary *d,
-                     struct variable *const *split, size_t n)
+                     struct variable *const *split, size_t n,
+                     enum split_type type)
  {
-  dict_set_split_vars__ (d, split, n, false);
+  dict_set_split_vars__ (d, split, n, type, false);
  }
  
+void
+dict_clear_split_vars (struct dictionary *d)
+{
+  dict_set_split_vars (d, NULL, 0, SPLIT_LAYERED);
+}
  \f
  
  /* Deletes variable V from dictionary D and frees V.
@@ -611,7 +623,7 @@ dict_clear__ (struct dictionary *d, bool skip_callbacks)
    invalidate_proto (d);
    hmap_clear (&d->name_map);
    d->next_value_idx = 0;
-  dict_set_split_vars__ (d, NULL, 0, skip_callbacks);
+  dict_set_split_vars__ (d, NULL, 0, SPLIT_LAYERED, skip_callbacks);
  
    if (skip_callbacks)
      {
diff --git a/src/data/dictionary.h b/src/data/dictionary.h

index d1f7f2828bc1b902603dc93d749c26dcbda0f0f1..067142cfd08f18761d6b086f7cbd1edb8c4bb5d9 100644 (file)
--- a/src/data/dictionary.h
+++ b/src/data/dictionary.h
@@ -124,10 +124,18 @@ struct caseproto *dict_get_compacted_proto (const struct dictionary *,
                                              unsigned int exclude_classes);
  
  /* SPLIT FILE variables. */
+enum split_type
+  {
+    SPLIT_SEPARATE,             /* Produce separate output for each split. */
+    SPLIT_LAYERED,              /* Output splits in same table.  */
+  };
  const struct variable *const *dict_get_split_vars (const struct dictionary *);
  size_t dict_get_n_splits (const struct dictionary *);
+enum split_type dict_get_split_type (const struct dictionary *);
  void dict_set_split_vars (struct dictionary *,
-                          struct variable *const *, size_t n);
+                          struct variable *const *, size_t n,
+                          enum split_type);
+void dict_clear_split_vars (struct dictionary *);
  
  /* File label. */
  const char *dict_get_label (const struct dictionary *);
diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c

index f510ec25162d194273de45aa9572077a8a718af3..df8647480f9986593ec2351df20dd90c201ea938 100644 (file)
--- a/src/language/data-io/matrix-data.c
+++ b/src/language/data-io/matrix-data.c
@@ -1158,7 +1158,7 @@ cmd_matrix_data (struct lexer *lexer, struct dataset *ds)
    dict_reorder_vars (dict, order, n_order);
    free (order);
  
-  dict_set_split_vars (dict, mf.svars, mf.n_svars);
+  dict_set_split_vars (dict, mf.svars, mf.n_svars, SPLIT_LAYERED);
  
    schedule_matrices (&mf);
  
diff --git a/src/language/dictionary/split-file.c b/src/language/dictionary/split-file.c

index 1ebd7ae9ca6fa415cd2b13a737ea255827562244..8b4c8783371a8d165a1243c2896a9618f542f557 100644 (file)
--- a/src/language/dictionary/split-file.c
+++ b/src/language/dictionary/split-file.c
@@ -43,20 +43,22 @@ int
  cmd_split_file (struct lexer *lexer, struct dataset *ds)
  {
    if (lex_match_id (lexer, "OFF"))
-    dict_set_split_vars (dataset_dict (ds), NULL, 0);
+    dict_clear_split_vars (dataset_dict (ds));
    else
      {
        struct variable **v;
        size_t n;
  
-      /* For now, ignore SEPARATE and LAYERED. */
-      (void) (lex_match_id (lexer, "SEPARATE") || lex_match_id (lexer, "LAYERED"));
+      enum split_type type = (!lex_match_id (lexer, "LAYERED")
+                              && lex_match_id (lexer, "SEPARATE")
+                              ? SPLIT_SEPARATE
+                              : SPLIT_LAYERED);
  
        lex_match (lexer, T_BY);
        if (!parse_variables (lexer, dataset_dict (ds), &v, &n, PV_NO_DUPLICATE))
         return CMD_CASCADING_FAILURE;
  
-      dict_set_split_vars (dataset_dict (ds), v, n);
+      dict_set_split_vars (dataset_dict (ds), v, n, type);
        free (v);
      }
  
diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c

index 640746991c3246209c56efae14df3302d8fccfd9..4151860e109bc4e1c8cf49af8f65789140e25d48 100644 (file)
--- a/src/language/stats/aggregate.c
+++ b/src/language/stats/aggregate.c
@@ -269,7 +269,7 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds)
      dict_clear_documents (agr.dict);
  
    /* Cancel SPLIT FILE. */
-  dict_set_split_vars (agr.dict, NULL, 0);
+  dict_clear_split_vars (agr.dict);
  
    /* Initialize. */
    agr.n_cases = 0;
author	Ben Pfaff <blp@cs.stanford.edu>
	Fri, 8 Jul 2022 22:45:40 +0000 (15:45 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Fri, 8 Jul 2022 22:48:09 +0000 (15:48 -0700)
doc/data-selection.texi		patch \| blob \| history
src/data/dictionary.c		patch \| blob \| history
src/data/dictionary.h		patch \| blob \| history
src/language/data-io/matrix-data.c		patch \| blob \| history
src/language/dictionary/split-file.c		patch \| blob \| history
src/language/stats/aggregate.c		patch \| blob \| history