2 /* PSPP - a program for statistical analysis.
3 Copyright (C) 2011, 2012 Free Software Foundation, Inc.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation, either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 #include "data/case.h"
21 #include "data/casegrouper.h"
22 #include "data/casereader.h"
23 #include "data/dataset.h"
24 #include "data/dictionary.h"
25 #include "data/format.h"
26 #include "data/variable.h"
28 #include "language/command.h"
29 #include "language/lexer/lexer.h"
30 #include "language/lexer/variable-parser.h"
32 #include "libpspp/misc.h"
33 #include "libpspp/pool.h"
35 #include "math/categoricals.h"
36 #include "math/interaction.h"
37 #include "math/moments.h"
39 #include "output/tab.h"
44 #define _(msgid) gettext (msgid)
45 #define N_(msgid) (msgid)
57 typedef void *stat_create (struct pool *pool);
58 typedef void stat_update (void *stat, double w, double x);
59 typedef double stat_get (const struct per_var_data *, void *aux);
63 /* Printable title for output */
66 /* Keyword for syntax */
81 harmonic_create (struct pool *pool)
83 struct harmonic_mean *hm = pool_alloc (pool, sizeof *hm);
93 harmonic_update (void *stat, double w, double x)
95 struct harmonic_mean *hm = stat;
102 harmonic_get (const struct per_var_data *pvd UNUSED, void *stat)
104 struct harmonic_mean *hm = stat;
106 return hm->n / hm->rsum;
111 struct geometric_mean
119 geometric_create (struct pool *pool)
121 struct geometric_mean *gm = pool_alloc (pool, sizeof *gm);
131 geometric_update (void *stat, double w, double x)
133 struct geometric_mean *gm = stat;
134 gm->prod *= pow (x, w);
140 geometric_get (const struct per_var_data *pvd UNUSED, void *stat)
142 struct geometric_mean *gm = stat;
144 return pow (gm->prod, 1.0 / gm->n);
150 sum_get (const struct per_var_data *pvd, void *stat UNUSED)
154 moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
161 n_get (const struct per_var_data *pvd, void *stat UNUSED)
165 moments1_calculate (pvd->mom, &n, 0, 0, 0, 0);
171 arithmean_get (const struct per_var_data *pvd, void *stat UNUSED)
175 moments1_calculate (pvd->mom, &n, &mean, 0, 0, 0);
181 variance_get (const struct per_var_data *pvd, void *stat UNUSED)
183 double n, mean, variance;
185 moments1_calculate (pvd->mom, &n, &mean, &variance, 0, 0);
192 stddev_get (const struct per_var_data *pvd, void *stat)
194 return sqrt (variance_get (pvd, stat));
201 skew_get (const struct per_var_data *pvd, void *stat UNUSED)
205 moments1_calculate (pvd->mom, NULL, NULL, NULL, &skew, 0);
211 sekurt_get (const struct per_var_data *pvd, void *stat UNUSED)
215 moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
217 return calc_sekurt (n);
221 seskew_get (const struct per_var_data *pvd, void *stat UNUSED)
225 moments1_calculate (pvd->mom, &n, NULL, NULL, NULL, NULL);
227 return calc_seskew (n);
231 kurt_get (const struct per_var_data *pvd, void *stat UNUSED)
235 moments1_calculate (pvd->mom, NULL, NULL, NULL, NULL, &kurt);
241 semean_get (const struct per_var_data *pvd, void *stat UNUSED)
245 moments1_calculate (pvd->mom, &n, NULL, &var, NULL, NULL);
247 return sqrt (var / n);
253 min_create (struct pool *pool)
255 double *r = pool_alloc (pool, sizeof *r);
263 min_update (void *stat, double w UNUSED, double x)
272 min_get (const struct per_var_data *pvd UNUSED, void *stat)
280 max_create (struct pool *pool)
282 double *r = pool_alloc (pool, sizeof *r);
290 max_update (void *stat, double w UNUSED, double x)
299 max_get (const struct per_var_data *pvd UNUSED, void *stat)
315 range_create (struct pool *pool)
317 struct range *r = pool_alloc (pool, sizeof *r);
326 range_update (void *stat, double w UNUSED, double x)
328 struct range *r = stat;
338 range_get (const struct per_var_data *pvd UNUSED, void *stat)
340 struct range *r = stat;
342 return r->max - r->min;
348 last_create (struct pool *pool)
350 double *l = pool_alloc (pool, sizeof *l);
356 last_update (void *stat, double w UNUSED, double x)
364 last_get (const struct per_var_data *pvd UNUSED, void *stat)
373 first_create (struct pool *pool)
375 double *f = pool_alloc (pool, sizeof *f);
383 first_update (void *stat, double w UNUSED, double x)
392 first_get (const struct per_var_data *pvd UNUSED, void *stat)
406 /* Table of cell_specs */
407 static const struct cell_spec cell_spec[] = {
408 {N_("Mean"), "MEAN", NULL, NULL, arithmean_get},
409 {N_("N"), "COUNT", NULL, NULL, n_get},
410 {N_("Std. Deviation"), "STDDEV", NULL, NULL, stddev_get},
412 {N_("Median"), "MEDIAN", NULL, NULL, NULL},
413 {N_("Group Median"), "GMEDIAN", NULL, NULL, NULL},
415 {N_("S.E. Mean"), "SEMEAN", NULL, NULL, semean_get},
416 {N_("Sum"), "SUM", NULL, NULL, sum_get},
417 {N_("Min"), "MIN", min_create, min_update, min_get},
418 {N_("Max"), "MAX", max_create, max_update, max_get},
419 {N_("Range"), "RANGE", range_create, range_update, range_get},
420 {N_("Variance"), "VARIANCE", NULL, NULL, variance_get},
421 {N_("Kurtosis"), "KURT", NULL, NULL, kurt_get},
422 {N_("S.E. Kurt"), "SEKURT", NULL, NULL, sekurt_get},
423 {N_("Skewness"), "SKEW", NULL, NULL, skew_get},
424 {N_("S.E. Skew"), "SESKEW", NULL, NULL, seskew_get},
425 {N_("First"), "FIRST", first_create, first_update, first_get},
426 {N_("Last"), "LAST", last_create, last_update, last_get},
428 {N_("Percent N"), "NPCT", NULL, NULL, NULL},
429 {N_("Percent Sum"), "SPCT", NULL, NULL, NULL},
431 {N_("Harmonic Mean"), "HARMONIC", harmonic_create, harmonic_update, harmonic_get},
432 {N_("Geom. Mean"), "GEOMETRIC", geometric_create, geometric_update, geometric_get}
435 #define n_C (sizeof (cell_spec) / sizeof (struct cell_spec))
441 casenumber non_missing;
447 size_t n_factor_vars;
448 const struct variable **factor_vars;
451 /* The thing parsed after TABLES= */
455 const struct variable **dep_vars;
458 struct layer *layers;
460 struct interaction **interactions;
461 struct summary *summary;
465 struct categoricals *cats;
470 const struct dictionary *dict;
472 struct mtable *table;
475 /* Missing value class for categorical variables */
476 enum mv_class exclude;
478 /* Missing value class for dependent variables */
479 enum mv_class dep_exclude;
481 bool listwise_exclude;
483 /* an array indicating which statistics are to be calculated */
489 /* Pool on which cell functions may allocate data */
495 run_means (struct means *cmd, struct casereader *input,
496 const struct dataset *ds);
501 parse_means_table_syntax (struct lexer *lexer, const struct means *cmd, struct mtable *table)
505 table->layers = NULL;
507 /* Dependent variable (s) */
508 if (!parse_variables_const_pool (lexer, cmd->pool, cmd->dict,
509 &table->dep_vars, &table->n_dep_vars,
510 PV_NO_DUPLICATE | PV_NUMERIC))
513 /* Factor variable (s) */
514 while (lex_token (lexer) != T_ENDCMD && lex_token (lexer) != T_SLASH)
516 if (lex_match (lexer, T_BY))
520 pool_realloc (cmd->pool, table->layers,
521 sizeof (*table->layers) * table->n_layers);
523 if (!parse_variables_const_pool
524 (lexer, cmd->pool, cmd->dict,
525 &table->layers[table->n_layers - 1].factor_vars,
526 &table->layers[table->n_layers - 1].n_factor_vars,
533 /* There is always at least one layer.
534 However the final layer is the total, and not
535 normally considered by the user as a
541 pool_realloc (cmd->pool, table->layers,
542 sizeof (*table->layers) * table->n_layers);
543 table->layers[table->n_layers - 1].factor_vars = NULL;
544 table->layers[table->n_layers - 1].n_factor_vars = 0;
550 If the match succeeds, the variable will be placed in VAR.
551 Returns true if successful */
553 lex_is_variable (struct lexer *lexer, const struct dictionary *dict,
557 if (lex_next_token (lexer, n) != T_ID)
560 tstr = lex_next_tokcstr (lexer, n);
562 if (NULL == dict_lookup_var (dict, tstr) )
570 cmd_means (struct lexer *lexer, struct dataset *ds)
576 bool more_tables = true;
578 means.pool = pool_create ();
580 means.exclude = MV_ANY;
581 means.dep_exclude = MV_ANY;
582 means.listwise_exclude = false;
586 means.dict = dataset_dict (ds);
589 means.cells = pool_calloc (means.pool, means.n_cells, sizeof (*means.cells));
592 /* The first three items (MEAN, COUNT, STDDEV) are the default */
593 for (i = 0; i < 3; ++i)
597 /* Optional TABLES = */
598 if (lex_match_id (lexer, "TABLES"))
600 lex_force_match (lexer, T_EQUALS);
605 /* Parse the "tables" */
609 means.table = pool_realloc (means.pool, means.table, means.n_tables * sizeof (*means.table));
611 if (! parse_means_table_syntax (lexer, &means,
612 &means.table[means.n_tables - 1]))
617 /* Look ahead to see if there are more tables to be parsed */
619 if ( T_SLASH == lex_next_token (lexer, 0) )
621 if (lex_is_variable (lexer, means.dict, 1) )
624 lex_force_match (lexer, T_SLASH);
629 /* /MISSING subcommand */
630 while (lex_token (lexer) != T_ENDCMD)
632 lex_match (lexer, T_SLASH);
634 if (lex_match_id (lexer, "MISSING"))
637 If no MISSING subcommand is specified, each combination of
638 a dependent variable and categorical variables is handled
641 lex_match (lexer, T_EQUALS);
642 if (lex_match_id (lexer, "INCLUDE"))
645 Use the subcommand "/MISSING=INCLUDE" to include user-missing
646 values in the analysis.
649 means.exclude = MV_SYSTEM;
650 means.dep_exclude = MV_SYSTEM;
652 else if (lex_match_id (lexer, "TABLE"))
654 This is the default. (I think).
655 Every case containing a complete set of variables for a given
656 table. If any variable, categorical or dependent for in a table
657 is missing (as defined by what?), then that variable will
658 be dropped FOR THAT TABLE ONLY.
661 means.listwise_exclude = true;
663 else if (lex_match_id (lexer, "DEPENDENT"))
665 Use the command "/MISSING=DEPENDENT" to
666 include user-missing values for the categorical variables,
667 while excluding them for the dependent variables.
669 Cases are dropped only when user-missing values
670 appear in dependent variables. User-missing
671 values for categorical variables are treated according to
674 Cases are ALWAYS dropped when System Missing values appear
675 in the categorical variables.
678 means.dep_exclude = MV_ANY;
679 means.exclude = MV_SYSTEM;
683 lex_error (lexer, NULL);
687 else if (lex_match_id (lexer, "CELLS"))
689 lex_match (lexer, T_EQUALS);
691 /* The default values become overwritten */
693 while (lex_token (lexer) != T_ENDCMD
694 && lex_token (lexer) != T_SLASH)
697 if (lex_match (lexer, T_ALL))
701 pool_realloc (means.pool, means.cells,
702 (means.n_cells += n_C) * sizeof (*means.cells));
704 for (x = 0; x < n_C; ++x)
705 means.cells[means.n_cells - (n_C - 1 - x) - 1] = x;
707 else if (lex_match_id (lexer, "NONE"))
711 else if (lex_match_id (lexer, "DEFAULT"))
714 pool_realloc (means.pool, means.cells,
715 (means.n_cells += 3) * sizeof (*means.cells));
717 means.cells[means.n_cells - 2 - 1] = MEANS_MEAN;
718 means.cells[means.n_cells - 1 - 1] = MEANS_N;
719 means.cells[means.n_cells - 0 - 1] = MEANS_STDDEV;
725 if (lex_match_id (lexer, cell_spec[k].keyword))
728 pool_realloc (means.pool, means.cells,
729 ++means.n_cells * sizeof (*means.cells));
731 means.cells[means.n_cells - 1] = k;
738 lex_error (lexer, NULL);
745 lex_error (lexer, NULL);
752 for (t = 0; t < means.n_tables; ++t)
754 struct mtable *table = &means.table[t];
756 table->interactions =
757 pool_calloc (means.pool, table->n_layers, sizeof (*table->interactions));
760 pool_calloc (means.pool, table->n_dep_vars * table->n_layers, sizeof (*table->summary));
762 for (l = 0; l < table->n_layers; ++l)
765 const struct layer *lyr = &table->layers[l];
766 const int n_vars = lyr->n_factor_vars;
767 table->interactions[l] = interaction_create (NULL);
768 for (v = 0 ; v < n_vars ; ++v)
770 interaction_add_variable (table->interactions[l],
771 lyr->factor_vars[v]);
777 struct casegrouper *grouper;
778 struct casereader *group;
781 grouper = casegrouper_create_splits (proc_open (ds), means.dict);
782 while (casegrouper_get_next_group (grouper, &group))
784 run_means (&means, group, ds);
786 ok = casegrouper_destroy (grouper);
787 ok = proc_commit (ds) && ok;
791 pool_destroy (means.pool);
796 pool_destroy (means.pool);
802 is_missing (const struct means *cmd,
803 const struct variable *dvar,
804 const struct interaction *iact,
805 const struct ccase *c)
807 if ( interaction_case_is_missing (iact, c, cmd->exclude) )
811 if (var_is_value_missing (dvar,
819 static void output_case_processing_summary (const struct mtable *);
821 static void output_report (const struct means *, int, const struct mtable *);
826 struct per_var_data *pvd;
832 create_n (const void *aux1, void *aux2)
835 const struct means *means = aux1;
836 struct mtable *table = aux2;
837 struct per_cat_data *per_cat_data = pool_malloc (means->pool, sizeof *per_cat_data);
839 struct per_var_data *pvd = pool_calloc (means->pool, table->n_dep_vars, sizeof *pvd);
841 for (v = 0; v < table->n_dep_vars; ++v)
843 enum moment maxmom = MOMENT_KURTOSIS;
844 struct per_var_data *pp = &pvd[v];
846 pp->cell_stats = pool_calloc (means->pool, means->n_cells, sizeof *pp->cell_stats);
849 for (i = 0; i < means->n_cells; ++i)
851 int csi = means->cells[i];
852 const struct cell_spec *cs = &cell_spec[csi];
855 pp->cell_stats[i] = cs->sc (means->pool);
858 pp->mom = moments1_create (maxmom);
862 per_cat_data->pvd = pvd;
863 per_cat_data->warn = true;
868 update_n (const void *aux1, void *aux2, void *user_data, const struct ccase *c, double weight)
872 const struct means *means = aux1;
873 struct mtable *table = aux2;
874 struct per_cat_data *per_cat_data = user_data;
876 for (v = 0; v < table->n_dep_vars; ++v)
878 struct per_var_data *pvd = &per_cat_data->pvd[v];
880 const double x = case_data (c, table->dep_vars[v])->f;
882 for (i = 0; i < table->n_layers; ++i)
884 if ( is_missing (means, table->dep_vars[v],
885 table->interactions[i], c))
889 for (i = 0; i < means->n_cells; ++i)
891 const int csi = means->cells[i];
892 const struct cell_spec *cs = &cell_spec[csi];
896 cs->su (pvd->cell_stats[i],
900 moments1_add (pvd->mom, x, weight);
908 calculate_n (const void *aux1, void *aux2, void *user_data)
912 struct per_cat_data *per_cat_data = user_data;
913 const struct means *means = aux1;
914 struct mtable *table = aux2;
916 for (v = 0; v < table->n_dep_vars; ++v)
918 struct per_var_data *pvd = &per_cat_data->pvd[v];
919 for (i = 0; i < means->n_cells; ++i)
921 int csi = means->cells[i];
922 const struct cell_spec *cs = &cell_spec[csi];
925 cs->sd (pvd, pvd->cell_stats[i]);
931 run_means (struct means *cmd, struct casereader *input,
932 const struct dataset *ds UNUSED)
935 const struct variable *wv = dict_get_weight (cmd->dict);
937 struct casereader *reader;
939 struct payload payload;
940 payload.create = create_n;
941 payload.update = update_n;
942 payload.calculate = calculate_n;
943 payload.destroy = NULL;
945 for (t = 0; t < cmd->n_tables; ++t)
947 struct mtable *table = &cmd->table[t];
949 = categoricals_create (table->interactions,
950 table->n_layers, wv, cmd->dep_exclude, cmd->exclude);
952 categoricals_set_payload (table->cats, &payload, cmd, table);
956 (c = casereader_read (reader)) != NULL; case_unref (c))
958 for (t = 0; t < cmd->n_tables; ++t)
960 bool something_missing = false;
962 struct mtable *table = &cmd->table[t];
964 for (v = 0; v < table->n_dep_vars; ++v)
967 for (i = 0; i < table->n_layers; ++i)
970 is_missing (cmd, table->dep_vars[v],
971 table->interactions[i], c);
974 something_missing = true;
975 table->summary[v * table->n_layers + i].missing++;
978 table->summary[v * table->n_layers + i].non_missing++;
981 if ( something_missing && cmd->listwise_exclude)
984 categoricals_update (table->cats, c);
987 casereader_destroy (reader);
989 for (t = 0; t < cmd->n_tables; ++t)
991 struct mtable *table = &cmd->table[t];
993 categoricals_done (table->cats);
997 for (t = 0; t < cmd->n_tables; ++t)
1000 const struct mtable *table = &cmd->table[t];
1002 output_case_processing_summary (table);
1004 for (i = 0; i < table->n_layers; ++i)
1006 output_report (cmd, i, table);
1008 categoricals_destroy (table->cats);
1016 output_case_processing_summary (const struct mtable *table)
1019 const int heading_columns = 1;
1020 const int heading_rows = 3;
1021 struct tab_table *t;
1023 const int nr = heading_rows + table->n_layers * table->n_dep_vars;
1026 t = tab_create (nc, nr);
1027 tab_title (t, _("Case Processing Summary"));
1029 tab_headers (t, heading_columns, 0, heading_rows, 0);
1031 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
1033 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1034 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1037 tab_joint_text (t, heading_columns, 0,
1038 nc - 1, 0, TAB_CENTER | TAT_TITLE, _("Cases"));
1040 tab_joint_text (t, 1, 1, 2, 1, TAB_CENTER | TAT_TITLE, _("Included"));
1041 tab_joint_text (t, 3, 1, 4, 1, TAB_CENTER | TAT_TITLE, _("Excluded"));
1042 tab_joint_text (t, 5, 1, 6, 1, TAB_CENTER | TAT_TITLE, _("Total"));
1044 tab_hline (t, TAL_1, heading_columns, nc - 1, 1);
1045 tab_hline (t, TAL_1, heading_columns, nc - 1, 2);
1048 for (i = 0; i < 3; ++i)
1050 tab_text (t, heading_columns + i * 2, 2, TAB_CENTER | TAT_TITLE,
1052 tab_text (t, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE,
1056 for (v = 0; v < table->n_dep_vars; ++v)
1058 const struct variable *var = table->dep_vars[v];
1059 const char *dv_name = var_to_string (var);
1060 for (i = 0; i < table->n_layers; ++i)
1062 const int row = v * table->n_layers + i;
1063 const struct interaction *iact = table->interactions[i];
1067 ds_init_cstr (&str, dv_name);
1068 ds_put_cstr (&str, ": ");
1070 interaction_to_string (iact, &str);
1072 tab_text (t, 0, row + heading_rows,
1073 TAB_LEFT | TAT_TITLE, ds_cstr (&str));
1076 n_total = table->summary[row].missing +
1077 table->summary[row].non_missing;
1079 tab_double (t, 1, row + heading_rows,
1080 0, table->summary[row].non_missing, &F_8_0);
1082 tab_text_format (t, 2, row + heading_rows,
1084 table->summary[row].non_missing / (double) n_total * 100.0);
1087 tab_double (t, 3, row + heading_rows,
1088 0, table->summary[row].missing, &F_8_0);
1091 tab_text_format (t, 4, row + heading_rows,
1093 table->summary[row].missing / (double) n_total * 100.0);
1096 tab_double (t, 5, row + heading_rows,
1097 0, table->summary[row].missing +
1098 table->summary[row].non_missing, &F_8_0);
1100 tab_text_format (t, 6, row + heading_rows,
1102 n_total / (double) n_total * 100.0);
1114 output_report (const struct means *cmd, int iact_idx,
1115 const struct mtable *table)
1120 const struct interaction *iact = table->interactions[iact_idx];
1122 const int heading_columns = 1 + iact->n_vars;
1123 const int heading_rows = 1;
1124 struct tab_table *t;
1126 const int n_cats = categoricals_n_count (table->cats, iact_idx);
1128 const int nr = n_cats * table->n_dep_vars + heading_rows;
1130 const int nc = heading_columns + cmd->n_cells;
1132 t = tab_create (nc, nr);
1133 tab_title (t, _("Report"));
1135 tab_headers (t, heading_columns, 0, heading_rows, 0);
1137 tab_box (t, TAL_2, TAL_2, -1, TAL_1, 0, 0, nc - 1, nr - 1);
1139 tab_hline (t, TAL_2, 0, nc - 1, heading_rows);
1140 tab_vline (t, TAL_2, heading_columns, 0, nr - 1);
1142 for (i = 0; i < iact->n_vars; ++i)
1144 tab_text (t, 1 + i, 0, TAB_CENTER | TAT_TITLE,
1145 var_to_string (iact->vars[i]));
1148 for (i = 0; i < cmd->n_cells; ++i)
1150 tab_text (t, heading_columns + i, 0,
1151 TAB_CENTER | TAT_TITLE,
1152 gettext (cell_spec[cmd->cells[i]].title));
1156 for (i = 0; i < n_cats; ++i)
1159 const struct ccase *c =
1160 categoricals_get_case_by_category_real (table->cats, iact_idx, i);
1162 for (dv = 0; dv < table->n_dep_vars; ++dv)
1165 heading_rows + dv * n_cats,
1166 TAB_RIGHT | TAT_TITLE,
1167 var_to_string (table->dep_vars[dv])
1171 tab_hline (t, TAL_1, 0, nc - 1, heading_rows + dv * n_cats);
1173 for (v = 0; v < iact->n_vars; ++v)
1175 const struct variable *var = iact->vars[v];
1176 const union value *val = case_data (c, var);
1178 ds_init_empty (&str);
1179 var_append_value_name (var, val, &str);
1181 tab_text (t, 1 + v, heading_rows + dv * n_cats + i,
1182 TAB_RIGHT | TAT_TITLE, ds_cstr (&str));
1189 for (grp = 0; grp < n_cats; ++grp)
1192 struct per_cat_data *per_cat_data =
1193 categoricals_get_user_data_by_category_real (table->cats, iact_idx, grp);
1195 for (dv = 0; dv < table->n_dep_vars; ++dv)
1197 const struct per_var_data *pvd = &per_cat_data->pvd[dv];
1198 for (i = 0; i < cmd->n_cells; ++i)
1200 const int csi = cmd->cells[i];
1201 const struct cell_spec *cs = &cell_spec[csi];
1203 double result = cs->sd (pvd, pvd->cell_stats[i]);
1205 tab_double (t, heading_columns + i,
1206 heading_rows + grp + dv * n_cats,