CTABLES work on distinguishing scale variables in summaries
[pspp] / src / language / stats / ctables.c
index 418496df928711a23b063690ff2c0cf876fffaf5..f7b2691ac0a2bc3a526c79cbbba2b7bb1668b619 100644 (file)
@@ -115,8 +115,8 @@ enum ctables_vlabel
     S(CTSF_LAYERPCT_SUM, "LAYERPCT.SUM", N_("Layer Sum %"), CTF_PERCENT, CTFA_SCALE) \
     S(CTSF_LAYERROWPCT_SUM, "LAYERROWPCT.SUM", N_("Layer Row Sum %"), CTF_PERCENT, CTFA_SCALE) \
     S(CTSF_LAYERCOLPCT_SUM, "LAYERCOLPCT.SUM", N_("Layer Column Sum %"), CTF_PERCENT, CTFA_SCALE) \
-                                                                        \
-    /* Multiple response sets. */                                       \
+
+#if 0         /* Multiple response sets not yet implemented. */
   S(CTSF_RESPONSES, "RESPONSES", N_("Responses"), CTF_COUNT, CTFA_MRSETS) \
     S(CTSF_ROWPCT_RESPONSES, "ROWPCT.RESPONSES", N_("Row Responses %"), CTF_PERCENT, CTFA_MRSETS) \
     S(CTSF_COLPCT_RESPONSES, "COLPCT.RESPONSES", N_("Column Responses %"), CTF_PERCENT, CTFA_MRSETS) \
@@ -139,6 +139,7 @@ enum ctables_vlabel
     S(CTSF_LAYERPCT_COUNT_RESPONSES, "LAYERPCT.COUNT.RESPONSES", N_("Layer Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
     S(CTSF_LAYERROWPCT_COUNT_RESPONSES, "LAYERROWPCT.COUNT.RESPONSES", N_("Layer Row Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS) \
     S(CTSF_LAYERCOLPCT_COUNT_RESPONSES, "LAYERCOLPCT.RESPONSES.COUNT", N_("Layer Column Count % (Base: Responses)"), CTF_PERCENT, CTFA_MRSETS)
+#endif
 
 enum ctables_summary_function
   {
@@ -336,7 +337,7 @@ struct ctables_summary_spec_set
     size_t n;
     size_t allocated;
 
-    struct variable *var;
+    struct variable *scale_var;
   };
 
 static void ctables_summary_spec_set_clone (struct ctables_summary_spec_set *,
@@ -729,7 +730,7 @@ ctables_summary_spec_set_clone (struct ctables_summary_spec_set *dst,
     .specs = specs,
     .n = src->n,
     .allocated = src->n,
-    .var = src->var
+    .scale_var = src->scale_var
   };
 }
 
@@ -788,32 +789,59 @@ ctables_function_availability (enum ctables_summary_function f)
 static bool
 ctables_summary_function_is_count (enum ctables_summary_function f)
 {
-  static const bool is_count[N_CTSF_FUNCTIONS] = {
-    [CTSF_COUNT] = true,
-    [CTSF_ECOUNT] = true,
-    [CTSF_ROWPCT_COUNT] = true,
-    [CTSF_COLPCT_COUNT] = true,
-    [CTSF_TABLEPCT_COUNT] = true,
-    [CTSF_SUBTABLEPCT_COUNT] = true,
-    [CTSF_LAYERPCT_COUNT] = true,
-    [CTSF_LAYERROWPCT_COUNT] = true,
-    [CTSF_LAYERCOLPCT_COUNT] = true,
-    [CTSF_ROWPCT_RESPONSES_COUNT] = true,
-    [CTSF_COLPCT_RESPONSES_COUNT] = true,
-    [CTSF_TABLEPCT_RESPONSES_COUNT] = true,
-    [CTSF_SUBTABLEPCT_RESPONSES_COUNT] = true,
-    [CTSF_LAYERPCT_RESPONSES_COUNT] = true,
-    [CTSF_LAYERROWPCT_RESPONSES_COUNT] = true,
-    [CTSF_LAYERCOLPCT_RESPONSES_COUNT] = true,
-    [CTSF_ROWPCT_COUNT_RESPONSES] = true,
-    [CTSF_COLPCT_COUNT_RESPONSES] = true,
-    [CTSF_TABLEPCT_COUNT_RESPONSES] = true,
-    [CTSF_SUBTABLEPCT_COUNT_RESPONSES] = true,
-    [CTSF_LAYERPCT_COUNT_RESPONSES] = true,
-    [CTSF_LAYERROWPCT_COUNT_RESPONSES] = true,
-    [CTSF_LAYERCOLPCT_COUNT_RESPONSES] = true,
-  };
-  return is_count[f];
+  switch (f)
+    {
+    case CTSF_COUNT:
+    case CTSF_ECOUNT:
+    case CTSF_ROWPCT_COUNT:
+    case CTSF_COLPCT_COUNT:
+    case CTSF_TABLEPCT_COUNT:
+    case CTSF_SUBTABLEPCT_COUNT:
+    case CTSF_LAYERPCT_COUNT:
+    case CTSF_LAYERROWPCT_COUNT:
+    case CTSF_LAYERCOLPCT_COUNT:
+      return true;
+
+    case CTSF_ROWPCT_VALIDN:
+    case CTSF_COLPCT_VALIDN:
+    case CTSF_TABLEPCT_VALIDN:
+    case CTSF_SUBTABLEPCT_VALIDN:
+    case CTSF_LAYERPCT_VALIDN:
+    case CTSF_LAYERROWPCT_VALIDN:
+    case CTSF_LAYERCOLPCT_VALIDN:
+    case CTSF_ROWPCT_TOTALN:
+    case CTSF_COLPCT_TOTALN:
+    case CTSF_TABLEPCT_TOTALN:
+    case CTSF_SUBTABLEPCT_TOTALN:
+    case CTSF_LAYERPCT_TOTALN:
+    case CTSF_LAYERROWPCT_TOTALN:
+    case CTSF_LAYERCOLPCT_TOTALN:
+    case CTSF_MAXIMUM:
+    case CTSF_MEAN:
+    case CTSF_MEDIAN:
+    case CTSF_MINIMUM:
+    case CTSF_MISSING:
+    case CTSF_MODE:
+    case CTSF_PTILE:
+    case CTSF_RANGE:
+    case CTSF_SEMEAN:
+    case CTSF_STDDEV:
+    case CTSF_SUM:
+    case CSTF_TOTALN:
+    case CTSF_ETOTALN:
+    case CTSF_VALIDN:
+    case CTSF_EVALIDN:
+    case CTSF_VARIANCE:
+    case CTSF_ROWPCT_SUM:
+    case CTSF_COLPCT_SUM:
+    case CTSF_TABLEPCT_SUM:
+    case CTSF_SUBTABLEPCT_SUM:
+    case CTSF_LAYERPCT_SUM:
+    case CTSF_LAYERROWPCT_SUM:
+    case CTSF_LAYERCOLPCT_SUM:
+      return false;
+  }
+  NOT_REACHED ();
 }
 
 
@@ -1947,9 +1975,9 @@ nest_fts (struct ctables_stack s0, struct ctables_stack s1)
         assert (n == allocate);
 
         const struct ctables_nest *summary_src;
-        if (!a->specs[CSV_CELL].var)
+        if (!a->specs[CSV_CELL].n && !a->specs[CSV_CELL].scale_var)
           summary_src = b;
-        else if (!b->specs[CSV_CELL].var)
+        else if (!b->specs[CSV_CELL].n && !b->specs[CSV_CELL].scale_var)
           summary_src = a;
         else
           NOT_REACHED ();
@@ -2008,7 +2036,7 @@ enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
         for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
           {
             ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
-            nest->specs[sv].var = a->var.var;
+            nest->specs[sv].scale_var = a->var.var;
           }
       return (struct ctables_stack) { .nests = nest, .n = 1 };
 
@@ -2130,30 +2158,6 @@ ctables_summary_init (union ctables_summary *s,
         s->ovalue = SYSMIS;
       }
       break;
-
-    case CTSF_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES:
-    case CTSF_COLPCT_RESPONSES:
-    case CTSF_TABLEPCT_RESPONSES:
-    case CTSF_SUBTABLEPCT_RESPONSES:
-    case CTSF_LAYERPCT_RESPONSES:
-    case CTSF_LAYERROWPCT_RESPONSES:
-    case CTSF_LAYERCOLPCT_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES_COUNT:
-    case CTSF_COLPCT_RESPONSES_COUNT:
-    case CTSF_TABLEPCT_RESPONSES_COUNT:
-    case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
-    case CTSF_LAYERPCT_RESPONSES_COUNT:
-    case CTSF_LAYERROWPCT_RESPONSES_COUNT:
-    case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
-    case CTSF_ROWPCT_COUNT_RESPONSES:
-    case CTSF_COLPCT_COUNT_RESPONSES:
-    case CTSF_TABLEPCT_COUNT_RESPONSES:
-    case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
-    case CTSF_LAYERPCT_COUNT_RESPONSES:
-    case CTSF_LAYERROWPCT_COUNT_RESPONSES:
-    case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
-      NOT_REACHED ();
     }
 }
 
@@ -2218,45 +2222,40 @@ ctables_summary_uninit (union ctables_summary *s,
     case CTSF_PTILE:
       casewriter_destroy (s->writer);
       break;
-
-    case CTSF_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES:
-    case CTSF_COLPCT_RESPONSES:
-    case CTSF_TABLEPCT_RESPONSES:
-    case CTSF_SUBTABLEPCT_RESPONSES:
-    case CTSF_LAYERPCT_RESPONSES:
-    case CTSF_LAYERROWPCT_RESPONSES:
-    case CTSF_LAYERCOLPCT_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES_COUNT:
-    case CTSF_COLPCT_RESPONSES_COUNT:
-    case CTSF_TABLEPCT_RESPONSES_COUNT:
-    case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
-    case CTSF_LAYERPCT_RESPONSES_COUNT:
-    case CTSF_LAYERROWPCT_RESPONSES_COUNT:
-    case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
-    case CTSF_ROWPCT_COUNT_RESPONSES:
-    case CTSF_COLPCT_COUNT_RESPONSES:
-    case CTSF_TABLEPCT_COUNT_RESPONSES:
-    case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
-    case CTSF_LAYERPCT_COUNT_RESPONSES:
-    case CTSF_LAYERROWPCT_COUNT_RESPONSES:
-    case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
-      NOT_REACHED ();
     }
 }
 
 static void
 ctables_summary_add (union ctables_summary *s,
                      const struct ctables_summary_spec *ss,
-                     const struct variable *var, const union value *value,
+                     const struct variable *scale_var, const union value *value,
                      double d_weight, double e_weight)
 {
+  /* To determine whether a case is included in a given table for a particular
+     kind of summary, consider the following charts for each variable in the
+     table.  Only if "yes" appears for every variable for the summary is the
+     case counted.
+
+     Categorical variables:                    VALIDN   COUNT   TOTALN
+       Valid values in included categories       yes     yes      yes
+       Missing values in included categories     ---     yes      yes
+       Missing values in excluded categories     ---     ---      yes
+       Valid values in excluded categories       ---     ---      ---
+
+     Scale variables:                          VALIDN   COUNT   TOTALN
+       Valid value                               yes     yes      yes
+       Missing value                             ---     yes      yes
+
+     Missing values include both user- and system-missing.  (The system-missing
+     value is always in an excluded category.)
+  */
   switch (ss->function)
     {
     case CTSF_COUNT:
+
     case CSTF_TOTALN:
     case CTSF_VALIDN:
-      if (var_is_value_missing (var, value))
+      if (scale_var && var_is_value_missing (scale_var, value))
         s->missing += d_weight;
       else
         s->valid += d_weight;
@@ -2287,7 +2286,7 @@ ctables_summary_add (union ctables_summary *s,
     case CTSF_MISSING:
     case CTSF_ETOTALN:
     case CTSF_EVALIDN:
-      if (var_is_value_missing (var, value))
+      if (scale_var && var_is_value_missing (scale_var, value))
         s->missing += e_weight;
       else
         s->valid += e_weight;
@@ -2296,9 +2295,9 @@ ctables_summary_add (union ctables_summary *s,
     case CTSF_MAXIMUM:
     case CTSF_MINIMUM:
     case CTSF_RANGE:
-      if (!var_is_value_missing (var, value))
+      if (!var_is_value_missing (scale_var, value))
         {
-          assert (!var_is_alpha (var)); /* XXX? */
+          assert (!var_is_alpha (scale_var)); /* XXX? */
           if (s->min == SYSMIS || value->f < s->min)
             s->min = value->f;
           if (s->max == SYSMIS || value->f > s->max)
@@ -2318,14 +2317,14 @@ ctables_summary_add (union ctables_summary *s,
     case CTSF_LAYERPCT_SUM:
     case CTSF_LAYERROWPCT_SUM:
     case CTSF_LAYERCOLPCT_SUM:
-      if (!var_is_value_missing (var, value))
+      if (!var_is_value_missing (scale_var, value))
         moments1_add (s->moments, value->f, e_weight);
       break;
 
     case CTSF_MEDIAN:
     case CTSF_MODE:
     case CTSF_PTILE:
-      if (var_is_value_missing (var, value))
+      if (var_is_value_missing (scale_var, value))
         {
           s->ovalid += e_weight;
 
@@ -2335,30 +2334,6 @@ ctables_summary_add (union ctables_summary *s,
           casewriter_write (s->writer, c);
         }
       break;
-
-    case CTSF_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES:
-    case CTSF_COLPCT_RESPONSES:
-    case CTSF_TABLEPCT_RESPONSES:
-    case CTSF_SUBTABLEPCT_RESPONSES:
-    case CTSF_LAYERPCT_RESPONSES:
-    case CTSF_LAYERROWPCT_RESPONSES:
-    case CTSF_LAYERCOLPCT_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES_COUNT:
-    case CTSF_COLPCT_RESPONSES_COUNT:
-    case CTSF_TABLEPCT_RESPONSES_COUNT:
-    case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
-    case CTSF_LAYERPCT_RESPONSES_COUNT:
-    case CTSF_LAYERROWPCT_RESPONSES_COUNT:
-    case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
-    case CTSF_ROWPCT_COUNT_RESPONSES:
-    case CTSF_COLPCT_COUNT_RESPONSES:
-    case CTSF_TABLEPCT_COUNT_RESPONSES:
-    case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
-    case CTSF_LAYERPCT_COUNT_RESPONSES:
-    case CTSF_LAYERROWPCT_COUNT_RESPONSES:
-    case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
-      NOT_REACHED ();
     }
 }
 
@@ -2385,67 +2360,45 @@ ctables_function_domain (enum ctables_summary_function function)
     case CTSF_MEDIAN:
     case CTSF_PTILE:
     case CTSF_MODE:
-    case CTSF_RESPONSES:
       NOT_REACHED ();
 
     case CTSF_COLPCT_COUNT:
-    case CTSF_COLPCT_COUNT_RESPONSES:
-    case CTSF_COLPCT_RESPONSES:
-    case CTSF_COLPCT_RESPONSES_COUNT:
     case CTSF_COLPCT_SUM:
     case CTSF_COLPCT_TOTALN:
     case CTSF_COLPCT_VALIDN:
       return CTDT_COL;
 
     case CTSF_LAYERCOLPCT_COUNT:
-    case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
-    case CTSF_LAYERCOLPCT_RESPONSES:
-    case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
     case CTSF_LAYERCOLPCT_SUM:
     case CTSF_LAYERCOLPCT_TOTALN:
     case CTSF_LAYERCOLPCT_VALIDN:
       return CTDT_LAYERCOL;
 
     case CTSF_LAYERPCT_COUNT:
-    case CTSF_LAYERPCT_COUNT_RESPONSES:
-    case CTSF_LAYERPCT_RESPONSES:
-    case CTSF_LAYERPCT_RESPONSES_COUNT:
     case CTSF_LAYERPCT_SUM:
     case CTSF_LAYERPCT_TOTALN:
     case CTSF_LAYERPCT_VALIDN:
       return CTDT_LAYER;
 
     case CTSF_LAYERROWPCT_COUNT:
-    case CTSF_LAYERROWPCT_COUNT_RESPONSES:
-    case CTSF_LAYERROWPCT_RESPONSES:
-    case CTSF_LAYERROWPCT_RESPONSES_COUNT:
     case CTSF_LAYERROWPCT_SUM:
     case CTSF_LAYERROWPCT_TOTALN:
     case CTSF_LAYERROWPCT_VALIDN:
       return CTDT_LAYERROW;
 
     case CTSF_ROWPCT_COUNT:
-    case CTSF_ROWPCT_COUNT_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES_COUNT:
     case CTSF_ROWPCT_SUM:
     case CTSF_ROWPCT_TOTALN:
     case CTSF_ROWPCT_VALIDN:
       return CTDT_ROW;
 
     case CTSF_SUBTABLEPCT_COUNT:
-    case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
-    case CTSF_SUBTABLEPCT_RESPONSES:
-    case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
     case CTSF_SUBTABLEPCT_SUM:
     case CTSF_SUBTABLEPCT_TOTALN:
     case CTSF_SUBTABLEPCT_VALIDN:
       return CTDT_SUBTABLE;
 
     case CTSF_TABLEPCT_COUNT:
-    case CTSF_TABLEPCT_COUNT_RESPONSES:
-    case CTSF_TABLEPCT_RESPONSES:
-    case CTSF_TABLEPCT_RESPONSES_COUNT:
     case CTSF_TABLEPCT_SUM:
     case CTSF_TABLEPCT_TOTALN:
     case CTSF_TABLEPCT_VALIDN:
@@ -2589,30 +2542,6 @@ ctables_summary_value (const struct ctables_cell *cell,
           statistic_destroy (&mode->parent.parent);
         }
       return s->ovalue;
-
-    case CTSF_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES:
-    case CTSF_COLPCT_RESPONSES:
-    case CTSF_TABLEPCT_RESPONSES:
-    case CTSF_SUBTABLEPCT_RESPONSES:
-    case CTSF_LAYERPCT_RESPONSES:
-    case CTSF_LAYERROWPCT_RESPONSES:
-    case CTSF_LAYERCOLPCT_RESPONSES:
-    case CTSF_ROWPCT_RESPONSES_COUNT:
-    case CTSF_COLPCT_RESPONSES_COUNT:
-    case CTSF_TABLEPCT_RESPONSES_COUNT:
-    case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
-    case CTSF_LAYERPCT_RESPONSES_COUNT:
-    case CTSF_LAYERROWPCT_RESPONSES_COUNT:
-    case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
-    case CTSF_ROWPCT_COUNT_RESPONSES:
-    case CTSF_COLPCT_COUNT_RESPONSES:
-    case CTSF_TABLEPCT_COUNT_RESPONSES:
-    case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
-    case CTSF_LAYERPCT_COUNT_RESPONSES:
-    case CTSF_LAYERROWPCT_COUNT_RESPONSES:
-    case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
-      NOT_REACHED ();
     }
 
   NOT_REACHED ();
@@ -2917,8 +2846,12 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
 
   const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
   for (size_t i = 0; i < specs->n; i++)
-    ctables_summary_add (&cell->summaries[i], &specs->specs[i], specs->var,
-                         case_data (c, specs->var), d_weight, e_weight);
+    {
+      const struct variable *scale_var = specs->scale_var;
+      const union value *value = scale_var ? case_data (c, scale_var) : NULL;
+      ctables_summary_add (&cell->summaries[i], &specs->specs[i],
+                           scale_var, value, d_weight, e_weight);
+    }
   if (cell->contributes_to_domains)
     {
       for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
@@ -3877,16 +3810,14 @@ ctables_prepare_table (struct ctables_table *t)
           specs->n = 1;
 
           enum ctables_summary_function function
-            = specs->var ? CTSF_MEAN : CTSF_COUNT;
-          struct ctables_var var = { .is_mrset = false, .var = specs->var };
+            = specs->scale_var ? CTSF_MEAN : CTSF_COUNT;
+          struct ctables_var var = { .var = specs->scale_var };
 
           *specs->specs = (struct ctables_summary_spec) {
             .function = function,
             .format = ctables_summary_default_format (function, &var),
             .label = ctables_summary_default_label (function, 0),
           };
-          if (!specs->var)
-            specs->var = nest->vars[0];
 
           ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
                                           &nest->specs[CSV_CELL]);