work on configurable summaries for totals
authorBen Pfaff <blp@cs.stanford.edu>
Thu, 13 Jan 2022 05:51:10 +0000 (21:51 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 2 Apr 2022 01:48:55 +0000 (18:48 -0700)
src/language/stats/ctables.c

index 74afa0136da77b7dd10570f38c8be122f247d827..e5c1328f039dd3f9bcb3639c2cd497d967f3bc67 100644 (file)
@@ -181,6 +181,7 @@ struct ctables_cell
     struct ctables_domain *domains[N_CTDTS];
 
     bool hide;
+    bool total;
 
     struct
       {
@@ -276,6 +277,17 @@ enum ctables_label_position
     CTLP_LAYER,
   };
 
+struct ctables_summary_spec_set
+  {
+    struct ctables_summary_spec *summaries;
+    size_t n;
+    size_t allocated;
+
+    struct variable *var;
+  };
+
+static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
+
 struct var_array
   {
     struct variable **vars;
@@ -284,9 +296,8 @@ struct var_array
     size_t *domains[N_CTDTS];
     size_t n_domains[N_CTDTS];
 
-    struct ctables_summary_spec *summaries;
-    size_t n_summaries;
-    struct variable *summary_var;
+    struct ctables_summary_spec_set cell_summaries;
+    struct ctables_summary_spec_set total_summaries;
   };
 
 struct var_array2
@@ -487,9 +498,8 @@ struct ctables_axis
           {
             struct ctables_var var;
             bool scale;
-            struct ctables_summary_spec *summaries;
-            size_t n_summaries;
-            size_t allocated_summaries;
+            struct ctables_summary_spec_set cell_summaries;
+            struct ctables_summary_spec_set total_summaries;
           };
 
         /* Nonterminals. */
@@ -530,6 +540,14 @@ ctables_summary_spec_uninit (struct ctables_summary_spec *s)
     free (s->label);
 }
 
+static void
+ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
+{
+  for (size_t i = 0; i < set->n; i++)
+    ctables_summary_spec_uninit (&set->summaries[i]);
+  free (set->summaries);
+}
+
 static bool
 parse_col_width (struct lexer *lexer, const char *name, double *width)
 {
@@ -623,9 +641,8 @@ ctables_axis_destroy (struct ctables_axis *axis)
   switch (axis->op)
     {
     case CTAO_VAR:
-      for (size_t i = 0; i < axis->n_summaries; i++)
-        ctables_summary_spec_uninit (&axis->summaries[i]);
-      free (axis->summaries);
+      ctables_summary_spec_set_uninit (&axis->cell_summaries);
+      ctables_summary_spec_set_uninit (&axis->total_summaries);
       break;
 
     case CTAO_STACK:
@@ -716,15 +733,10 @@ static bool
 add_summary_spec (struct ctables_axis *axis,
                   enum ctables_summary_function function, double percentile,
                   const char *label, const struct fmt_spec *format,
-                  const struct msg_location *loc)
+                  const struct msg_location *loc, bool totals)
 {
   if (axis->op == CTAO_VAR)
     {
-      if (axis->n_summaries >= axis->allocated_summaries)
-        axis->summaries = x2nrealloc (axis->summaries,
-                                      &axis->allocated_summaries,
-                                      sizeof *axis->summaries);
-
       const char *function_name = ctables_summary_function_name (function);
       const char *var_name = ctables_var_name (&axis->var);
       switch (ctables_function_availability (function))
@@ -756,7 +768,13 @@ add_summary_spec (struct ctables_axis *axis,
           break;
         }
 
-      struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++];
+      struct ctables_summary_spec_set *set = (totals ? &axis->total_summaries
+                                              : &axis->cell_summaries);
+      if (set->n >= set->allocated)
+        set->summaries = x2nrealloc (set->summaries, &set->allocated,
+                                     sizeof *set->summaries);
+
+      struct ctables_summary_spec *dst = &set->summaries[set->n++];
       *dst = (struct ctables_summary_spec) {
         .function = function,
         .percentile = percentile,
@@ -770,7 +788,7 @@ add_summary_spec (struct ctables_axis *axis,
     {
       for (size_t i = 0; i < 2; i++)
         if (!add_summary_spec (axis->subs[i], function, percentile, label,
-                               format, loc))
+                               format, loc, totals))
           return false;
       return true;
     }
@@ -851,7 +869,8 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
   if (!sub || !lex_match (ctx->lexer, T_LBRACK))
     return sub;
 
-  do
+  bool totals = false;
+  for (;;)
     {
       int start_ofs = lex_ofs (ctx->lexer);
 
@@ -899,15 +918,28 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
 
       struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
                                                    lex_ofs (ctx->lexer) - 1);
-      add_summary_spec (sub, function, percentile, label, formatp, loc);
+      add_summary_spec (sub, function, percentile, label, formatp, loc,
+                        totals);
       free (label);
       msg_location_destroy (loc);
 
-      lex_match (ctx->lexer, T_COMMA);
+      if (lex_match (ctx->lexer, T_COMMA))
+        {
+          if (!totals && lex_match_id (ctx->lexer, "TOTALS"))
+            {
+              if (!lex_force_match (ctx->lexer, T_LBRACK))
+                goto error;
+            }
+        }
+      else if (lex_force_match (ctx->lexer, T_RBRACK))
+        {
+          if (totals && !lex_force_match (ctx->lexer, T_RBRACK))
+            goto error;
+          return sub;
+        }
+      else
+        goto error;
     }
-  while (!lex_match (ctx->lexer, T_RBRACK));
-
-  return sub;
 
 error:
   ctables_axis_destroy (sub);
@@ -947,7 +979,7 @@ find_categorical_summary_spec (const struct ctables_axis *axis)
   if (!axis)
     return NULL;
   else if (axis->op == CTAO_VAR)
-    return !axis->scale && axis->n_summaries ? axis : NULL;
+    return !axis->scale && axis->cell_summaries.n ? axis : NULL;
   else
     {
       for (size_t i = 0; i < 2; i++)
@@ -1446,9 +1478,9 @@ nest_fts (struct var_array2 va0, struct var_array2 va1)
         assert (n == allocate);
 
         const struct var_array *summary_src;
-        if (!a->summary_var)
+        if (!a->cell_summaries.var)
           summary_src = b;
-        else if (!b->summary_var)
+        else if (!b->cell_summaries.var)
           summary_src = a;
         else
           NOT_REACHED ();
@@ -1458,9 +1490,8 @@ nest_fts (struct var_array2 va0, struct var_array2 va1)
                         : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
                         : SIZE_MAX),
           .n = n,
-          .summaries = summary_src->summaries,
-          .n_summaries = summary_src->n_summaries,
-          .summary_var = summary_src->summary_var,
+          .cell_summaries = summary_src->cell_summaries,
+          .total_summaries = summary_src->total_summaries,
         };
       }
   var_array2_uninit (&va0);
@@ -1502,11 +1533,12 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
         .n = 1,
         .scale_idx = a->scale ? 0 : SIZE_MAX,
       };
-      if (a->n_summaries || a->scale)
+      if (a->cell_summaries.n || a->scale)
         {
-          va->summaries = a->summaries;
-          va->n_summaries = a->n_summaries;
-          va->summary_var = a->var.var;
+          va->cell_summaries = a->cell_summaries;
+          va->total_summaries = a->total_summaries;
+          va->cell_summaries.var = a->var.var;
+          va->total_summaries.var = a->var.var;
         }
       return (struct var_array2) { .vas = va, .n = 1 };
 
@@ -2180,6 +2212,7 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
   const struct var_array *ss = &t->vaas[t->summary_axis].vas[ix[t->summary_axis]];
 
   size_t hash = 0;
+  bool total = false;
   for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
     {
       const struct var_array *va = &t->vaas[a].vas[ix[a]];
@@ -2193,6 +2226,8 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
                 && cats[a][i]->type != CCT_HSUBTOTAL)
               hash = value_hash (case_data (c, va->vars[i]),
                                  var_get_width (va->vars[i]), hash);
+            else
+              total = true;
           }
     }
 
@@ -2223,6 +2258,7 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
 
   cell = xmalloc (sizeof *cell);
   cell->hide = false;
+  cell->total = total;
   for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
     {
       const struct var_array *va = &t->vaas[a].vas[ix[a]];
@@ -2244,17 +2280,22 @@ ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
                        var_get_width (va->vars[i]));
         }
     }
-  cell->summaries = xmalloc (ss->n_summaries * sizeof *cell->summaries);
-  for (size_t i = 0; i < ss->n_summaries; i++)
-    ctables_summary_init (&cell->summaries[i], &ss->summaries[i]);
+
+  {
+    const struct ctables_summary_spec_set *sss = &ss->cell_summaries;
+    cell->summaries = xmalloc (sss->n * sizeof *cell->summaries);
+    for (size_t i = 0; i < sss->n; i++)
+      ctables_summary_init (&cell->summaries[i], &sss->summaries[i]);
+  }
   for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
     cell->domains[dt] = ctables_domain_insert (t, cell, dt);
   hmap_insert (&t->cells, &cell->node, hash);
 
-summarize:
-  for (size_t i = 0; i < ss->n_summaries; i++)
-    ctables_summary_add (&cell->summaries[i], &ss->summaries[i], ss->summary_var,
-                         case_data (c, ss->summary_var), weight);
+summarize: ;
+  const struct ctables_summary_spec_set *sss = &ss->cell_summaries;
+  for (size_t i = 0; i < sss->n; i++)
+    ctables_summary_add (&cell->summaries[i], &sss->summaries[i], sss->var,
+                         case_data (c, sss->var), weight);
   for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
     cell->domains[dt]->valid += weight;
 }
@@ -2422,22 +2463,25 @@ ctables_execute (struct dataset *ds, struct ctables *ct)
       for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++)
         {
           struct var_array *va = &t->vaas[t->summary_axis].vas[i];
-          if (!va->n_summaries)
+          if (!va->cell_summaries.n)
             {
-              va->summaries = xmalloc (sizeof *va->summaries);
-              va->n_summaries = 1;
+              struct ctables_summary_spec_set *css = &va->cell_summaries;
+              css->summaries = xmalloc (sizeof *css->summaries);
+              css->n = 1;
 
               enum ctables_summary_function function
-                = va->summary_var ? CTSF_MEAN : CTSF_COUNT;
-              struct ctables_var var = { .is_mrset = false, .var = va->summary_var };
+                = css->var ? CTSF_MEAN : CTSF_COUNT;
+              struct ctables_var var = { .is_mrset = false, .var = css->var };
 
-              *va->summaries = (struct ctables_summary_spec) {
+              *css->summaries = (struct ctables_summary_spec) {
                 .function = function,
                 .format = ctables_summary_default_format (function, &var),
                 .label = ctables_summary_default_label (function, 0),
               };
-              if (!va->summary_var)
-                va->summary_var = va->vars[0];
+              if (!css->var)
+                css->var = va->vars[0];
+
+              va->total_summaries = va->cell_summaries;
             }
         }
     }
@@ -2579,10 +2623,10 @@ ctables_execute (struct dataset *ds, struct ctables *ct)
                         {
                           if (label)
                             parent = pivot_category_create_group__ (parent, label);
-                          for (size_t m = 0; m < va->n_summaries; m++)
+                          for (size_t m = 0; m < va->cell_summaries.n; m++)
                             {
                               int leaf = pivot_category_create_leaf (
-                                parent, pivot_value_new_text (va->summaries[m].label));
+                                parent, pivot_value_new_text (va->cell_summaries.summaries[m].label));
                               if (m == 0)
                                 prev_leaf = leaf;
                             }
@@ -2619,8 +2663,8 @@ ctables_execute (struct dataset *ds, struct ctables *ct)
           if (cell->hide)
             continue;
 
-          const struct var_array *ss = &t->vaas[t->summary_axis].vas[cell->axes[t->summary_axis].vaa_idx];
-          for (size_t j = 0; j < ss->n_summaries; j++)
+          const struct ctables_summary_spec_set *sss = &t->vaas[t->summary_axis].vas[cell->axes[t->summary_axis].vaa_idx].cell_summaries;
+          for (size_t j = 0; j < sss->n; j++)
             {
               size_t dindexes[3];
               size_t n_dindexes = 0;
@@ -2634,9 +2678,9 @@ ctables_execute (struct dataset *ds, struct ctables *ct)
                     dindexes[n_dindexes++] = leaf;
                   }
 
-              double d = ctables_summary_value (cell, &cell->summaries[j], &ss->summaries[j]);
+              double d = ctables_summary_value (cell, &cell->summaries[j], &sss->summaries[j]);
               struct pivot_value *value = pivot_value_new_number (d);
-              value->numeric.format = ss->summaries[j].format;
+              value->numeric.format = sss->summaries[j].format;
               pivot_table_put (pt, dindexes, n_dindexes, value);
             }
         }