From: Ben Pfaff Date: Fri, 8 Jul 2022 22:45:40 +0000 (-0700) Subject: SPLIT FILE: Add LAYERED vs. SEPARATE distinction to the dictionary. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=93261332cde187e1392b6935234b2b1d8b9a1d51 SPLIT FILE: Add LAYERED vs. SEPARATE distinction to the dictionary. --- diff --git a/doc/data-selection.texi b/doc/data-selection.texi index c2f3ab31a4..b92d1826ef 100644 --- a/doc/data-selection.texi +++ b/doc/data-selection.texi @@ -208,12 +208,15 @@ An independent analysis is carried out for each group of cases, and the variable values for the group are printed along with the analysis. When a list of variable names is specified, one of the keywords -@subcmd{LAYERED} or @subcmd{SEPARATE} may also be specified. If provided, either -keyword are ignored. +@subcmd{LAYERED} or @subcmd{SEPARATE} may also be specified. With +@subcmd{LAYERED}, which is the default, the separate analyses for each +group are presented together in a single table. With +@subcmd{SEPARATE}, each analysis is presented in a separate table. +Not all procedures honor the distinction. Groups are formed only by @emph{adjacent} cases. To create a split using a variable where like values are not adjacent in the working file, -you should first sort the data by that variable (@pxref{SORT CASES}). +first sort the data by that variable (@pxref{SORT CASES}). Specify @subcmd{OFF} to disable @cmd{SPLIT FILE} and resume analysis of the entire active dataset as a single group of data. diff --git a/src/data/dictionary.c b/src/data/dictionary.c index 9d39ab0267..c331ea2345 100644 --- a/src/data/dictionary.c +++ b/src/data/dictionary.c @@ -67,6 +67,7 @@ struct dictionary int next_value_idx; /* Index of next `union value' to allocate. */ const struct variable **split; /* SPLIT FILE vars. */ size_t n_splits; /* SPLIT FILE count. */ + enum split_type split_type; struct variable *weight; /* WEIGHT variable. */ struct variable *filter; /* FILTER variable. */ casenumber case_limit; /* Current case limit (N command). */ @@ -251,13 +252,16 @@ dict_copy_callbacks (struct dictionary *dest, struct dictionary * dict_create (const char *encoding) { - struct dictionary *d = XZALLOC (struct dictionary); + struct dictionary *d = xmalloc (sizeof *d); - d->encoding = xstrdup (encoding); - d->names_must_be_ids = true; - hmap_init (&d->name_map); - attrset_init (&d->attributes); - d->ref_cnt = 1; + *d = (struct dictionary) { + .encoding = xstrdup (encoding), + .names_must_be_ids = true, + .name_map = HMAP_INITIALIZER (d->name_map), + .attributes = ATTRSET_INITIALIZER (d->attributes), + .split_type = SPLIT_LAYERED, + .ref_cnt = 1, + }; return d; } @@ -299,9 +303,10 @@ dict_clone (const struct dictionary *s) if (d->n_splits > 0) { d->split = xnmalloc (d->n_splits, sizeof *d->split); - for (i = 0; i < d->n_splits; i++) - d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i])); + for (i = 0; i < d->n_splits; i++) + d->split[i] = dict_lookup_var_assert (d, var_get_name (s->split[i])); } + d->split_type = s->split_type; if (s->weight != NULL) dict_set_weight (d, dict_lookup_var_assert (d, var_get_name (s->weight))); @@ -383,11 +388,12 @@ dict_unset_split_var (struct dictionary *d, struct variable *v, bool skip_callba static void dict_set_split_vars__ (struct dictionary *d, struct variable *const *split, size_t n, - bool skip_callbacks) + enum split_type type, bool skip_callbacks) { assert (n == 0 || split != NULL); d->n_splits = n; + d->split_type = type; if (n > 0) { d->split = xnrealloc (d->split, n, sizeof *d->split) ; @@ -410,11 +416,17 @@ dict_set_split_vars__ (struct dictionary *d, /* Sets N split vars SPLIT in dictionary D. */ void dict_set_split_vars (struct dictionary *d, - struct variable *const *split, size_t n) + struct variable *const *split, size_t n, + enum split_type type) { - dict_set_split_vars__ (d, split, n, false); + dict_set_split_vars__ (d, split, n, type, false); } +void +dict_clear_split_vars (struct dictionary *d) +{ + dict_set_split_vars (d, NULL, 0, SPLIT_LAYERED); +} /* Deletes variable V from dictionary D and frees V. @@ -611,7 +623,7 @@ dict_clear__ (struct dictionary *d, bool skip_callbacks) invalidate_proto (d); hmap_clear (&d->name_map); d->next_value_idx = 0; - dict_set_split_vars__ (d, NULL, 0, skip_callbacks); + dict_set_split_vars__ (d, NULL, 0, SPLIT_LAYERED, skip_callbacks); if (skip_callbacks) { diff --git a/src/data/dictionary.h b/src/data/dictionary.h index d1f7f2828b..067142cfd0 100644 --- a/src/data/dictionary.h +++ b/src/data/dictionary.h @@ -124,10 +124,18 @@ struct caseproto *dict_get_compacted_proto (const struct dictionary *, unsigned int exclude_classes); /* SPLIT FILE variables. */ +enum split_type + { + SPLIT_SEPARATE, /* Produce separate output for each split. */ + SPLIT_LAYERED, /* Output splits in same table. */ + }; const struct variable *const *dict_get_split_vars (const struct dictionary *); size_t dict_get_n_splits (const struct dictionary *); +enum split_type dict_get_split_type (const struct dictionary *); void dict_set_split_vars (struct dictionary *, - struct variable *const *, size_t n); + struct variable *const *, size_t n, + enum split_type); +void dict_clear_split_vars (struct dictionary *); /* File label. */ const char *dict_get_label (const struct dictionary *); diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index f510ec2516..df8647480f 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -1158,7 +1158,7 @@ cmd_matrix_data (struct lexer *lexer, struct dataset *ds) dict_reorder_vars (dict, order, n_order); free (order); - dict_set_split_vars (dict, mf.svars, mf.n_svars); + dict_set_split_vars (dict, mf.svars, mf.n_svars, SPLIT_LAYERED); schedule_matrices (&mf); diff --git a/src/language/dictionary/split-file.c b/src/language/dictionary/split-file.c index 1ebd7ae9ca..8b4c878337 100644 --- a/src/language/dictionary/split-file.c +++ b/src/language/dictionary/split-file.c @@ -43,20 +43,22 @@ int cmd_split_file (struct lexer *lexer, struct dataset *ds) { if (lex_match_id (lexer, "OFF")) - dict_set_split_vars (dataset_dict (ds), NULL, 0); + dict_clear_split_vars (dataset_dict (ds)); else { struct variable **v; size_t n; - /* For now, ignore SEPARATE and LAYERED. */ - (void) (lex_match_id (lexer, "SEPARATE") || lex_match_id (lexer, "LAYERED")); + enum split_type type = (!lex_match_id (lexer, "LAYERED") + && lex_match_id (lexer, "SEPARATE") + ? SPLIT_SEPARATE + : SPLIT_LAYERED); lex_match (lexer, T_BY); if (!parse_variables (lexer, dataset_dict (ds), &v, &n, PV_NO_DUPLICATE)) return CMD_CASCADING_FAILURE; - dict_set_split_vars (dataset_dict (ds), v, n); + dict_set_split_vars (dataset_dict (ds), v, n, type); free (v); } diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c index 640746991c..4151860e10 100644 --- a/src/language/stats/aggregate.c +++ b/src/language/stats/aggregate.c @@ -269,7 +269,7 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds) dict_clear_documents (agr.dict); /* Cancel SPLIT FILE. */ - dict_set_split_vars (agr.dict, NULL, 0); + dict_clear_split_vars (agr.dict); /* Initialize. */ agr.n_cases = 0;