simplify weighting
[pspp] / src / language / stats / ctables.c
index 7f5ac97f8bb9de9b0f2aac8a059aec589c4e481c..7e899d1b668a640355ead9e7ccf83fd8c340490f 100644 (file)
@@ -63,18 +63,32 @@ enum ctables_vlabel
     CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
   };
 
+enum ctables_weighting
+  {
+    CTW_EFFECTIVE,
+    CTW_DICTIONARY,
+    CTW_UNWEIGHTED
+#define N_CTWS 3
+  };
+
 enum ctables_function_type
   {
-    /* A function that operates on data in a single cell.  The function does
-       not have an unweighted version. */
+    /* A function that operates on data in a single cell.  It operates on
+       effective weights.  It does not have an unweighted version. */
     CTFT_CELL,
 
-    /* A function that operates on data in a single cell.  The function has an
-       unweighted version. */
+    /* A function that operates on data in a single cell.  The function
+       operates on effective weights and has a U-prefixed unweighted
+       version. */
     CTFT_UCELL,
 
-    /* A function that operates on an area of cells.  The function has an
-       unweighted version. */
+    /* A function that operates on data in a single cell.  It operates on
+       dictionary weights, and has U-prefixed unweighted version and an
+       E-prefixed effective weight version. */
+    CTFT_UECELL,
+
+    /* A function that operates on an area of cells.  It operates on effective
+       weights and has a U-prefixed unweighted version. */
     CTFT_AREA,
   };
 
@@ -113,8 +127,9 @@ struct ctables_function_info
     enum ctables_format format;
     enum ctables_function_availability availability;
 
-    bool may_be_unweighted;
-    bool is_area;
+    bool u_prefix;              /* Accepts a 'U' prefix (for unweighted)? */
+    bool e_prefix;              /* Accepts an 'E' prefix (for effective)? */
+    bool is_area;               /* Needs an area prefix. */
   };
 static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS] = {
 #define S(ENUM, NAME, TYPE, FORMAT, AVAILABILITY)                       \
@@ -123,15 +138,14 @@ static const struct ctables_function_info ctables_function_info[N_CTSF_FUNCTIONS
     .type = TYPE,                                                       \
     .format = FORMAT,                                                   \
     .availability = AVAILABILITY,                                       \
-    .may_be_unweighted = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_AREA,   \
+    .u_prefix = (TYPE) == CTFT_UCELL || (TYPE) == CTFT_UECELL || (TYPE) == CTFT_AREA, \
+    .e_prefix = (TYPE) == CTFT_UECELL,                                  \
     .is_area = (TYPE) == CTFT_AREA                                      \
   },
 #include "ctables.inc"
 #undef S
 };
 
-static bool ctables_summary_function_is_count (enum ctables_summary_function);
-
 enum ctables_area_type
   {
     /* Within a section, where stacked variables divide one section from
@@ -169,22 +183,15 @@ struct ctables_area
     const struct ctables_cell *example;
 
     size_t sequence;
-    double d_valid;             /* Dictionary weight. */
-    double d_count;
-    double d_total;
-    double e_valid;             /* Effective weight */
-    double e_count;
-    double e_total;
-    double u_valid;             /* Unweighted. */
-    double u_count;
-    double u_total;
+    double count[N_CTWS];
+    double valid[N_CTWS];
+    double total[N_CTWS];
     struct ctables_sum *sums;
   };
 
 struct ctables_sum
   {
-    double e_sum;
-    double u_sum;
+    double sum[N_CTWS];
   };
 
 enum ctables_summary_variant
@@ -373,6 +380,7 @@ struct ctables_nest
     struct variable **vars;
     size_t n;
     size_t scale_idx;
+    size_t summary_idx;
     size_t *areas[N_CTATS];
     size_t n_areas[N_CTATS];
     size_t group_head;
@@ -542,7 +550,7 @@ struct ctables_category
 
             /* CCT_FUNCTION. */
             enum ctables_summary_function sort_function;
-            bool weighted;
+            enum ctables_weighting weighting;
             enum ctables_area_type area;
             struct variable *sort_var;
             double percentile;
@@ -745,7 +753,7 @@ struct ctables_summary_spec
        cell, it must be 0).  For CTSF_PTILE only, 'percentile' is the
        percentile between 0 and 100 (for other functions it must be 0). */
     enum ctables_summary_function function;
-    bool weighted;
+    enum ctables_weighting weighting;
     enum ctables_area_type calc_area;
     double percentile;          /* CTSF_PTILE only. */
 
@@ -855,23 +863,18 @@ ctables_function_availability (enum ctables_summary_function f)
   return availability[f];
 }
 
-static bool
-ctables_summary_function_is_count (enum ctables_summary_function f)
-{
-  return f == CTSF_COUNT || f == CTSF_ECOUNT;
-}
-
 static bool
 parse_ctables_summary_function (struct lexer *lexer,
                                 enum ctables_summary_function *function,
-                                bool *weighted,
+                                enum ctables_weighting *weighting,
                                 enum ctables_area_type *area)
 {
   if (!lex_force_id (lexer))
     return false;
 
   struct substring name = lex_tokss (lexer);
-  *weighted = !(ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u'));
+  bool u = ss_match_byte (&name, 'U') || ss_match_byte (&name, 'u');
+  bool e = !u && (ss_match_byte (&name, 'E') || ss_match_byte (&name, 'e'));
 
   bool has_area = false;
   *area = 0;
@@ -885,6 +888,7 @@ parse_ctables_summary_function (struct lexer *lexer,
           {
             /* Special case where .COUNT suffix is omitted. */
             *function = CTSF_areaPCT_COUNT;
+            *weighting = CTW_EFFECTIVE;
             lex_get (lexer);
             return true;
           }
@@ -897,11 +901,13 @@ parse_ctables_summary_function (struct lexer *lexer,
       if (ss_equals_case (cfi->basename, name))
         {
           *function = f;
-          if (!*weighted && !cfi->may_be_unweighted)
-            break;
-          if (has_area != cfi->is_area)
+          if ((u && !cfi->u_prefix) || (e && !cfi->e_prefix) || (has_area != cfi->is_area))
             break;
 
+          *weighting = (e ? CTW_EFFECTIVE
+                        : u ? CTW_UNWEIGHTED
+                        : cfi->e_prefix ? CTW_DICTIONARY
+                        : CTW_EFFECTIVE);
           lex_get (lexer);
           return true;
         }
@@ -985,15 +991,15 @@ ctables_summary_default_format (enum ctables_summary_function function,
 static const char *
 ctables_summary_label__ (const struct ctables_summary_spec *spec)
 {
-  bool w = spec->weighted;
+  bool w = spec->weighting != CTW_UNWEIGHTED;
+  bool d = spec->weighting == CTW_DICTIONARY;
   enum ctables_area_type a = spec->user_area;
   switch (spec->function)
     {
     case CTSF_COUNT:
-      return w ? N_("Count") : N_("Unweighted Count");
-
-    case CTSF_ECOUNT:
-      return N_("Adjusted Count");
+      return (d ? N_("Count")
+              : w ? N_("Adjusted Count")
+              : N_("Unweighted Count"));
 
     case CTSF_areaPCT_COUNT:
       switch (a)
@@ -1036,20 +1042,22 @@ ctables_summary_label__ (const struct ctables_summary_spec *spec)
 
     case CTSF_MAXIMUM: return N_("Maximum");
     case CTSF_MEAN: return w ? N_("Mean") : N_("Unweighted Mean");
-    case CTSF_MEDIAN: return N_("Median");
+    case CTSF_MEDIAN: return w ? N_("Median") : N_("Unweighted Median");
     case CTSF_MINIMUM: return N_("Minimum");
-    case CTSF_MISSING: return N_("Missing");
-    case CTSF_MODE: return N_("Mode");
+    case CTSF_MISSING: return w ? N_("Missing") : N_("Unweighted Missing");
+    case CTSF_MODE: return w ? N_("Mode") : N_("Unweighted Mode");
     case CTSF_PTILE: NOT_REACHED ();
     case CTSF_RANGE: return N_("Range");
-    case CTSF_SEMEAN: return N_("Std Error of Mean");
-    case CTSF_STDDEV: return N_("Std Deviation");
-    case CTSF_SUM: return N_("Sum");
-    case CTSF_TOTALN: return N_("Total N");
-    case CTSF_ETOTALN: return N_("Adjusted Total N");
-    case CTSF_VALIDN: return N_("Valid N");
-    case CTSF_EVALIDN: return N_("Adjusted Valid N");
-    case CTSF_VARIANCE: return N_("Variance");
+    case CTSF_SEMEAN: return w ? N_("Std Error of Mean") : N_("Unweighted Std Error of Mean");
+    case CTSF_STDDEV: return w ? N_("Std Deviation") : N_("Unweighted Std Deviation");
+    case CTSF_SUM: return w ? N_("Sum") : N_("Unweighted Sum");
+    case CTSF_TOTALN: return (d ? N_("Total N")
+                              : w ? N_("Adjusted Total N")
+                              : N_("Unweighted Total N"));
+    case CTSF_VALIDN: return (d ? N_("Valid N")
+                              : w ? N_("Adjusted Valid N")
+                              : N_("Unweighted Valid N"));
+    case CTSF_VARIANCE: return w ? N_("Variance") : N_("Unweighted Variance");
     case CTSF_areaPCT_SUM:
       switch (a)
         {
@@ -1089,7 +1097,7 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
       if (spec->function == CTSF_PTILE)
         {
           double p = spec->percentile;
-          char *s = (spec->weighted
+          char *s = (spec->weighting != CTW_UNWEIGHTED
                      ? xasprintf (_("Percentile %.2f"), p)
                      : xasprintf (_("Unweighted Percentile %.2f"), p));
           return pivot_value_new_user_text_nocopy (s);
@@ -1119,13 +1127,16 @@ ctables_summary_label (const struct ctables_summary_spec *spec, double cilevel)
 
 static const char *
 ctables_summary_function_name (enum ctables_summary_function function,
-                               bool weighted,
+                               enum ctables_weighting weighting,
                                enum ctables_area_type area,
                                char *buffer, size_t bufsize)
 {
   const struct ctables_function_info *cfi = &ctables_function_info[function];
   snprintf (buffer, bufsize, "%s%s%s",
-            weighted ? "" : "U",
+            (weighting == CTW_UNWEIGHTED ? "U"
+             : weighting == CTW_DICTIONARY ? ""
+             : cfi->e_prefix ? "E"
+             : ""),
             cfi->is_area ? ctables_area_type_name[area] : "",
             cfi->basename.string);
   return buffer;
@@ -1133,7 +1144,8 @@ ctables_summary_function_name (enum ctables_summary_function function,
 
 static bool
 add_summary_spec (struct ctables_axis *axis,
-                  enum ctables_summary_function function, bool weighted,
+                  enum ctables_summary_function function,
+                  enum ctables_weighting weighting,
                   enum ctables_area_type area, double percentile,
                   const char *label, const struct fmt_spec *format,
                   bool is_ctables_format, const struct msg_location *loc,
@@ -1142,7 +1154,7 @@ add_summary_spec (struct ctables_axis *axis,
   if (axis->op == CTAO_VAR)
     {
       char function_name[128];
-      ctables_summary_function_name (function, weighted, area,
+      ctables_summary_function_name (function, weighting, area,
                                      function_name, sizeof function_name);
       const char *var_name = var_get_name (axis->var);
       switch (ctables_function_availability (function))
@@ -1180,7 +1192,7 @@ add_summary_spec (struct ctables_axis *axis,
       struct ctables_summary_spec *dst = &set->specs[set->n++];
       *dst = (struct ctables_summary_spec) {
         .function = function,
-        .weighted = weighted,
+        .weighting = weighting,
         .calc_area = area,
         .user_area = area,
         .percentile = percentile,
@@ -1194,7 +1206,7 @@ add_summary_spec (struct ctables_axis *axis,
   else
     {
       for (size_t i = 0; i < 2; i++)
-        if (!add_summary_spec (axis->subs[i], function, weighted, area,
+        if (!add_summary_spec (axis->subs[i], function, weighting, area,
                                percentile, label, format, is_ctables_format,
                                loc, sv))
           return false;
@@ -1312,9 +1324,9 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
 
       /* Parse function. */
       enum ctables_summary_function function;
-      bool weighted;
+      enum ctables_weighting weighting;
       enum ctables_area_type area;
-      if (!parse_ctables_summary_function (ctx->lexer, &function, &weighted,
+      if (!parse_ctables_summary_function (ctx->lexer, &function, &weighting,
                                            &area))
         goto error;
 
@@ -1356,7 +1368,7 @@ ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
 
       struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
                                                    lex_ofs (ctx->lexer) - 1);
-      add_summary_spec (sub, function, weighted, area, percentile, label,
+      add_summary_spec (sub, function, weighting, area, percentile, label,
                         formatp, is_ctables_format, loc, sv);
       free (label);
       msg_location_destroy (loc);
@@ -2106,7 +2118,7 @@ ctables_table_parse_categories (struct lexer *lexer, struct dictionary *dict,
             {
               cat.type = CCT_FUNCTION;
               if (!parse_ctables_summary_function (lexer, &cat.sort_function,
-                                                   &cat.weighted, &cat.area))
+                                                   &cat.weighting, &cat.area))
                 goto error;
 
               if (lex_match (lexer, T_LPAREN))
@@ -2421,6 +2433,9 @@ nest_fts (struct ctables_stack s0, struct ctables_stack s1)
           .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
                         : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
                         : SIZE_MAX),
+          .summary_idx = (a->summary_idx != SIZE_MAX ? a->summary_idx
+                          : b->summary_idx != SIZE_MAX ? a->n + b->summary_idx
+                          : SIZE_MAX),
           .n = n,
         };
         for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
@@ -2455,13 +2470,15 @@ var_fts (const struct ctables_axis *a)
   struct variable **vars = xmalloc (sizeof *vars);
   *vars = a->var;
 
+  bool is_summary = a->specs[CSV_CELL].n || a->scale;
   struct ctables_nest *nest = xmalloc (sizeof *nest);
   *nest = (struct ctables_nest) {
     .vars = vars,
     .n = 1,
     .scale_idx = a->scale ? 0 : SIZE_MAX,
+    .summary_idx = is_summary ? 0 : SIZE_MAX,
   };
-  if (a->specs[CSV_CELL].n || a->scale)
+  if (is_summary)
     for (enum ctables_summary_variant sv = 0; sv < N_CSVS; sv++)
       {
         ctables_summary_spec_set_clone (&nest->specs[sv], &a->specs[sv]);
@@ -2529,15 +2546,12 @@ ctables_summary_init (union ctables_summary *s,
   switch (ss->function)
     {
     case CTSF_COUNT:
-    case CTSF_ECOUNT:
     case CTSF_areaPCT_COUNT:
     case CTSF_areaPCT_VALIDN:
     case CTSF_areaPCT_TOTALN:
     case CTSF_MISSING:
     case CTSF_TOTALN:
-    case CTSF_ETOTALN:
     case CTSF_VALIDN:
-    case CTSF_EVALIDN:
       s->count = 0;
       break;
 
@@ -2551,11 +2565,14 @@ ctables_summary_init (union ctables_summary *s,
       break;
 
     case CTSF_MEAN:
+    case CTSF_SUM:
+    case CTSF_areaPCT_SUM:
+      s->moments = moments1_create (MOMENT_MEAN);
+      break;
+
     case CTSF_SEMEAN:
     case CTSF_STDDEV:
-    case CTSF_SUM:
     case CTSF_VARIANCE:
-    case CTSF_areaPCT_SUM:
       s->moments = moments1_create (MOMENT_VARIANCE);
       break;
 
@@ -2587,15 +2604,12 @@ ctables_summary_uninit (union ctables_summary *s,
   switch (ss->function)
     {
     case CTSF_COUNT:
-    case CTSF_ECOUNT:
     case CTSF_areaPCT_COUNT:
     case CTSF_areaPCT_VALIDN:
     case CTSF_areaPCT_TOTALN:
     case CTSF_MISSING:
     case CTSF_TOTALN:
-    case CTSF_ETOTALN:
     case CTSF_VALIDN:
-    case CTSF_EVALIDN:
       break;
 
     case CTSF_areaID:
@@ -2628,8 +2642,8 @@ ctables_summary_add (union ctables_summary *s,
                      const struct ctables_summary_spec *ss,
                      const struct variable *var, const union value *value,
                      bool is_scale, bool is_scale_missing,
-                     bool is_missing, bool excluded_missing,
-                     double d_weight, double e_weight)
+                     bool is_missing, bool is_included,
+                     double weight)
 {
   /* To determine whether a case is included in a given table for a particular
      kind of summary, consider the following charts for each variable in the
@@ -2652,22 +2666,35 @@ ctables_summary_add (union ctables_summary *s,
   switch (ss->function)
     {
     case CTSF_TOTALN:
+      s->count += weight;
+      break;
+
     case CTSF_areaPCT_TOTALN:
-      s->count += ss->weighted ? d_weight : 1.0;
+      s->count += weight;
       break;
 
     case CTSF_COUNT:
+      if (is_scale || is_included)
+        s->count += weight;
+      break;
+
     case CTSF_areaPCT_COUNT:
-      if (is_scale || !excluded_missing)
-        s->count += ss->weighted ? d_weight : 1.0;
+      if (is_scale || is_included)
+        s->count += weight;
       break;
 
     case CTSF_VALIDN:
+      if (is_scale
+          ? !is_scale_missing
+          : !is_missing)
+        s->count += weight;
+      break;
+
     case CTSF_areaPCT_VALIDN:
       if (is_scale
           ? !is_scale_missing
           : !is_missing)
-        s->count += ss->weighted ? d_weight : 1.0;
+        s->count += weight;
       break;
 
     case CTSF_areaID:
@@ -2677,23 +2704,7 @@ ctables_summary_add (union ctables_summary *s,
       if (is_scale
           ? is_scale_missing
           : is_missing)
-        s->count += ss->weighted ? d_weight : 1.0;
-      break;
-
-    case CTSF_ECOUNT:
-      if (is_scale || !excluded_missing)
-        s->count += e_weight;
-      break;
-
-    case CTSF_EVALIDN:
-      if (is_scale
-          ? !is_scale_missing
-          : !is_missing)
-        s->count += e_weight;
-      break;
-
-    case CTSF_ETOTALN:
-      s->count += e_weight;
+        s->count += weight;
       break;
 
     case CTSF_MAXIMUM:
@@ -2714,9 +2725,13 @@ ctables_summary_add (union ctables_summary *s,
     case CTSF_STDDEV:
     case CTSF_SUM:
     case CTSF_VARIANCE:
-    case CTSF_areaPCT_SUM:
       if (!is_scale_missing)
-        moments1_add (s->moments, value->f, ss->weighted ? e_weight : 1.0);
+        moments1_add (s->moments, value->f, weight);
+      break;
+
+    case CTSF_areaPCT_SUM:
+      if (!is_missing && !is_scale_missing)
+        moments1_add (s->moments, value->f, weight);
       break;
 
     case CTSF_MEDIAN:
@@ -2724,12 +2739,11 @@ ctables_summary_add (union ctables_summary *s,
     case CTSF_PTILE:
       if (!is_scale_missing)
         {
-          double w = ss->weighted ? e_weight : 1.0;
-          s->ovalid += w;
+          s->ovalid += weight;
 
           struct ccase *c = case_create (casewriter_get_proto (s->writer));
           *case_num_rw_idx (c, 0) = value->f;
-          *case_num_rw_idx (c, 1) = w;
+          *case_num_rw_idx (c, 1) = weight;
           casewriter_write (s->writer, c);
         }
       break;
@@ -2744,7 +2758,6 @@ ctables_summary_value (const struct ctables_cell *cell,
   switch (ss->function)
     {
     case CTSF_COUNT:
-    case CTSF_ECOUNT:
       return s->count;
 
     case CTSF_areaID:
@@ -2753,29 +2766,27 @@ ctables_summary_value (const struct ctables_cell *cell,
     case CTSF_areaPCT_COUNT:
       {
         const struct ctables_area *a = cell->areas[ss->calc_area];
-        double a_count = ss->weighted ? a->e_count : a->u_count;
+        double a_count = a->count[ss->weighting];
         return a_count ? s->count / a_count * 100 : SYSMIS;
       }
 
     case CTSF_areaPCT_VALIDN:
       {
         const struct ctables_area *a = cell->areas[ss->calc_area];
-        double a_valid = ss->weighted ? a->e_valid : a->u_valid;
+        double a_valid = a->valid[ss->weighting];
         return a_valid ? s->count / a_valid * 100 : SYSMIS;
       }
 
     case CTSF_areaPCT_TOTALN:
       {
         const struct ctables_area *a = cell->areas[ss->calc_area];
-        double a_total = ss->weighted ? a->e_total : a->u_total;
+        double a_total = a->total[ss->weighting];
         return a_total ? s->count / a_total * 100 : SYSMIS;
       }
 
     case CTSF_MISSING:
     case CTSF_TOTALN:
-    case CTSF_ETOTALN:
     case CTSF_VALIDN:
-    case CTSF_EVALIDN:
       return s->count;
 
     case CTSF_MAXIMUM:
@@ -2831,7 +2842,7 @@ ctables_summary_value (const struct ctables_cell *cell,
 
         const struct ctables_area *a = cell->areas[ss->calc_area];
         const struct ctables_sum *sum = &a->sums[ss->sum_var_idx];
-        double denom = ss->weighted ? sum->e_sum : sum->u_sum;
+        double denom = sum->sum[ss->weighting];
         return denom != 0 ? weight * mean / denom * 100 : SYSMIS;
       }
 
@@ -3280,15 +3291,9 @@ ctables_cell_insert__ (struct ctables_section *s, const struct ccase *c,
 }
 
 static bool
-is_scale_missing (const struct ctables_summary_spec_set *specs,
-                  const struct ccase *c)
+is_listwise_missing (const struct ctables_summary_spec_set *specs,
+                     const struct ccase *c)
 {
-  if (!specs->is_scale)
-    return false;
-
-  if (var_is_num_missing (specs->var, case_num (c, specs->var)))
-    return true;
-
   for (size_t i = 0; i < specs->n_listwise_vars; i++)
     {
       const struct variable *var = specs->listwise_vars[i];
@@ -3299,41 +3304,42 @@ is_scale_missing (const struct ctables_summary_spec_set *specs,
   return false;
 }
 
+static void
+add_weight (double dst[N_CTWS], const double src[N_CTWS])
+{
+  for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
+    dst[wt] += src[wt];
+}
+
 static void
 ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
                     const struct ctables_category *cats[PIVOT_N_AXES][10],
-                    bool is_missing, bool excluded_missing,
-                    double d_weight, double e_weight)
+                    bool is_included, double weight[N_CTWS])
 {
   struct ctables_cell *cell = ctables_cell_insert__ (s, c, cats);
   const struct ctables_nest *ss = s->nests[s->table->summary_axis];
 
   const struct ctables_summary_spec_set *specs = &ss->specs[cell->sv];
+  const union value *value = case_data (c, specs->var);
+  bool is_missing = var_is_value_missing (specs->var, value);
+  bool scale_missing = specs->is_scale && (is_missing || is_listwise_missing (specs, c));
 
-  bool scale_missing = is_scale_missing (specs, c);
   for (size_t i = 0; i < specs->n; i++)
-    ctables_summary_add (&cell->summaries[i], &specs->specs[i],
-                         specs->var, case_data (c, specs->var), specs->is_scale,
-                         scale_missing, is_missing, excluded_missing,
-                         d_weight, e_weight);
+     ctables_summary_add (&cell->summaries[i], &specs->specs[i],
+                          specs->var, value, specs->is_scale,
+                          scale_missing, is_missing, is_included,
+                          weight[specs->specs[i].weighting]);
   for (enum ctables_area_type at = 0; at < N_CTATS; at++)
     if (!(cell->omit_areas && (1u << at)))
       {
         struct ctables_area *a = cell->areas[at];
-        a->d_total += d_weight;
-        a->e_total += e_weight;
-        a->u_total += 1.0;
-        if (!excluded_missing)
-          {
-            a->d_count += d_weight;
-            a->e_count += e_weight;
-            a->u_count += 1.0;
-          }
+
+        add_weight (a->total, weight);
+        if (is_included)
+          add_weight (a->count, weight);
         if (!is_missing)
           {
-            a->d_valid += d_weight;
-            a->e_valid += e_weight;
-            a->u_count += 1.0;
+            add_weight (a->valid, weight);
 
             for (size_t i = 0; i < s->table->n_sum_vars; i++)
               {
@@ -3343,8 +3349,8 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
                 if (!var_is_num_missing (var, addend))
                   {
                     struct ctables_sum *sum = &a->sums[i];
-                    sum->e_sum += addend * e_weight;
-                    sum->u_sum += addend;
+                    for (enum ctables_weighting wt = 0; wt < N_CTWS; wt++)
+                      sum->sum[wt] += addend * weight[wt];
                   }
               }
           }
@@ -3354,8 +3360,7 @@ ctables_cell_add__ (struct ctables_section *s, const struct ccase *c,
 static void
 recurse_totals (struct ctables_section *s, const struct ccase *c,
                 const struct ctables_category *cats[PIVOT_N_AXES][10],
-                bool is_missing, bool excluded_missing,
-                double d_weight, double e_weight,
+                bool is_included, double weight[N_CTWS],
                 enum pivot_axis_type start_axis, size_t start_nest)
 {
   for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
@@ -3374,10 +3379,8 @@ recurse_totals (struct ctables_section *s, const struct ccase *c,
             {
               const struct ctables_category *save = cats[a][i];
               cats[a][i] = total;
-              ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
-                                  d_weight, e_weight);
-              recurse_totals (s, c, cats, is_missing, excluded_missing,
-                              d_weight, e_weight, a, i + 1);
+              ctables_cell_add__ (s, c, cats, is_included, weight);
+              recurse_totals (s, c, cats, is_included, weight, a, i + 1);
               cats[a][i] = save;
             }
         }
@@ -3388,8 +3391,7 @@ recurse_totals (struct ctables_section *s, const struct ccase *c,
 static void
 recurse_subtotals (struct ctables_section *s, const struct ccase *c,
                    const struct ctables_category *cats[PIVOT_N_AXES][10],
-                   bool is_missing, bool excluded_missing,
-                   double d_weight, double e_weight,
+                   bool is_included, double weight[N_CTWS],
                    enum pivot_axis_type start_axis, size_t start_nest)
 {
   for (enum pivot_axis_type a = start_axis; a < PIVOT_N_AXES; a++)
@@ -3404,10 +3406,8 @@ recurse_subtotals (struct ctables_section *s, const struct ccase *c,
           if (save->subtotal)
             {
               cats[a][i] = save->subtotal;
-              ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
-                                  d_weight, e_weight);
-              recurse_subtotals (s, c, cats, is_missing, excluded_missing,
-                                 d_weight, e_weight, a, i + 1);
+              ctables_cell_add__ (s, c, cats, is_included, weight);
+              recurse_subtotals (s, c, cats, is_included, weight, a, i + 1);
               cats[a][i] = save;
             }
         }
@@ -3435,53 +3435,43 @@ ctables_add_occurrence (const struct variable *var,
 }
 
 static void
-ctables_cell_insert (struct ctables_section *s,
-                     const struct ccase *c,
-                     double d_weight, double e_weight)
+ctables_cell_insert (struct ctables_section *s, const struct ccase *c,
+                     double weight[N_CTWS])
 {
   const struct ctables_category *cats[PIVOT_N_AXES][10]; /* XXX */
 
-  /* Does at least one categorical variable have a missing value in an included
-     or excluded category? */
-  bool is_missing = false;
-
-  /* Does at least one categorical variable have a missing value in an excluded
-     category? */
-  bool excluded_missing = false;
+  bool is_included = true;
 
   for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
     {
       const struct ctables_nest *nest = s->nests[a];
       for (size_t i = 0; i < nest->n; i++)
-        {
-          if (i == nest->scale_idx)
-            continue;
-
-          const struct variable *var = nest->vars[i];
-          const union value *value = case_data (c, var);
-
-          bool var_missing = var_is_value_missing (var, value) != 0;
-          if (var_missing)
-            is_missing = true;
-
-          cats[a][i] = ctables_categories_match (
-            s->table->categories[var_get_dict_index (var)], value, var);
-          if (!cats[a][i])
-            {
-              if (!var_missing)
-                return;
+        if (i != nest->scale_idx)
+          {
+            const struct variable *var = nest->vars[i];
+            const union value *value = case_data (c, var);
 
-              static const struct ctables_category cct_excluded_missing = {
-                .type = CCT_EXCLUDED_MISSING,
-                .hide = true,
-              };
-              cats[a][i] = &cct_excluded_missing;
-              excluded_missing = true;
-            }
+            cats[a][i] = ctables_categories_match (
+              s->table->categories[var_get_dict_index (var)], value, var);
+            if (!cats[a][i])
+              {
+                if (i != nest->summary_idx)
+                  return;
+
+                if (!var_is_value_missing (var, value))
+                  return;
+
+                static const struct ctables_category cct_excluded_missing = {
+                  .type = CCT_EXCLUDED_MISSING,
+                  .hide = true,
+                };
+                cats[a][i] = &cct_excluded_missing;
+                is_included = false;
+              }
         }
     }
 
-  if (!excluded_missing)
+  if (is_included)
     for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
       {
         const struct ctables_nest *nest = s->nests[a];
@@ -3494,16 +3484,9 @@ ctables_cell_insert (struct ctables_section *s,
             }
       }
 
-  ctables_cell_add__ (s, c, cats, is_missing, excluded_missing,
-                      d_weight, e_weight);
-
-  //if (!excluded_missing)
-    {
-      recurse_totals (s, c, cats, is_missing, excluded_missing,
-                      d_weight, e_weight, 0, 0);
-      recurse_subtotals (s, c, cats, is_missing, excluded_missing,
-                         d_weight, e_weight, 0, 0);
-    }
+  ctables_cell_add__ (s, c, cats, is_included, weight);
+  recurse_totals (s, c, cats, is_included, weight, 0, 0);
+  recurse_subtotals (s, c, cats, is_included, weight, 0, 0);
 }
 
 struct merge_item
@@ -3519,8 +3502,8 @@ merge_item_compare_3way (const struct merge_item *a, const struct merge_item *b)
   const struct ctables_summary_spec *bs = &b->set->specs[b->ofs];
   if (as->function != bs->function)
     return as->function > bs->function ? 1 : -1;
-  else if (as->weighted != bs->weighted)
-    return as->weighted > bs->weighted ? 1 : -1;
+  else if (as->weighting != bs->weighting)
+    return as->weighting > bs->weighting ? 1 : -1;
   else if (as->calc_area != bs->calc_area)
     return as->calc_area > bs->calc_area ? 1 : -1;
   else if (as->percentile != bs->percentile)
@@ -3726,8 +3709,8 @@ ctables_table_add_section (struct ctables_table *t, enum pivot_axis_type a,
             for (size_t i = 0; i < nest->n; i++)
               hmap_init (&s->occurrences[a][i]);
         }
-      for (size_t i = 0; i < N_CTATS; i++)
-        hmap_init (&s->areas[i]);
+      for (enum ctables_area_type at = 0; at < N_CTATS; at++)
+        hmap_init (&s->areas[at]);
     }
 }
 
@@ -4017,7 +4000,7 @@ ctables_cell_calculate_postcompute (const struct ctables_section *s,
         {
           const struct ctables_summary_spec *ss2 = &pc->specs->specs[i];
           if (ss->function == ss2->function
-              && ss->weighted == ss2->weighted
+              && ss->weighting == ss2->weighting
               && ss->calc_area == ss2->calc_area
               && ss->percentile == ss2->percentile)
             {
@@ -4465,7 +4448,7 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t)
               struct pivot_value *value;
               if (ct->hide_threshold != 0
                   && d < ct->hide_threshold
-                  && ctables_summary_function_is_count (ss->function))
+                  && ss->function == CTSF_COUNT)
                 {
                   value = pivot_value_new_user_text_nocopy (
                     xasprintf ("<%d", ct->hide_threshold));
@@ -4665,98 +4648,76 @@ ctables_prepare_table (struct ctables_table *t)
                 nest->areas[at] = xmalloc (nest->n * sizeof *nest->areas[at]);
                 nest->n_areas[at] = 0;
 
-                bool add_vars = (at == CTAT_LAYER ? a == PIVOT_AXIS_LAYER
-                                 : at == CTAT_LAYERROW ? a != PIVOT_AXIS_COLUMN
-                                 : at == CTAT_LAYERCOL ? a != PIVOT_AXIS_ROW
-                                 : at == CTAT_TABLE ? false
-                                 : true);
-                if (add_vars)
-                  for (size_t k = 0; k < nest->n; k++)
-                    {
-                      if (k == nest->scale_idx)
-                        continue;
-                      nest->areas[at][nest->n_areas[at]++] = k;
-                    }
-                else if ((at == CTAT_LAYERCOL && a == PIVOT_AXIS_ROW && t->label_axis[a] != a)
-                         || (at == CTAT_LAYERROW && a == PIVOT_AXIS_COLUMN && t->label_axis[a] != a)
-                         || (at == CTAT_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER))
-                  {
-                    for (size_t k = nest->n - 1; k < nest->n; k--)
-                      {
-                        if (k == nest->scale_idx)
-                          continue;
-                        nest->areas[at][nest->n_areas[at]++] = k;
-                        break;
-                      }
-                    continue;
-                  }
-
-                size_t n_drop;
-                bool drop_inner = false;
-                if (at == CTAT_SUBTABLE
-                    && t->clabels_from_axis != PIVOT_AXIS_LAYER
-                    && t->clabels_to_axis != PIVOT_AXIS_LAYER)
-                  n_drop = a == t->clabels_from_axis ? 2 : 0;
-                else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
-                  {
-                    if (a == PIVOT_AXIS_COLUMN)
-                      n_drop = 1;
-                    else
-                      {
-                        if (a == PIVOT_AXIS_ROW)
-                          drop_inner = true;
-                        n_drop = 0;
-                      }
-                  }
-                else if (at == CTAT_SUBTABLE && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
-                  {
-                    if (a == PIVOT_AXIS_ROW)
-                      n_drop = 1;
-                    else
-                      {
-                        if (a == PIVOT_AXIS_COLUMN)
-                          drop_inner = true;
-                        n_drop = 0;
-                      }
-                  }
-                else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
-                  n_drop = 0;
-                else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN)
+                enum pivot_axis_type ata, atb;
+                if (at == CTAT_ROW || at == CTAT_LAYERROW)
                   {
-                    drop_inner = true;
-                    n_drop = 0;
-                  }
-                else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
-                  n_drop = 0;
-                else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW)
-                  {
-                    drop_inner = true;
-                    n_drop = 0;
+                    ata = PIVOT_AXIS_ROW;
+                    atb = PIVOT_AXIS_COLUMN;
                   }
-                else if (at == CTAT_COL && a == PIVOT_AXIS_ROW && t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_LAYER)
+                else if (at == CTAT_COL || at == CTAT_LAYERCOL)
                   {
-                    drop_inner = true;
-                    n_drop = 0;
+                    ata = PIVOT_AXIS_COLUMN;
+                    atb = PIVOT_AXIS_ROW;
                   }
-                else if (at == CTAT_ROW && a == PIVOT_AXIS_COLUMN && t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_LAYER)
+
+                if (at == CTAT_LAYER
+                    ? a != PIVOT_AXIS_LAYER && t->label_axis[a] == PIVOT_AXIS_LAYER
+                    : at == CTAT_LAYERCOL || at == CTAT_LAYERROW
+                    ? a == atb && t->label_axis[a] != a
+                    : false)
                   {
-                    drop_inner = true;
-                    n_drop = 0;
+                    for (size_t k = nest->n - 1; k < nest->n; k--)
+                      if (k != nest->scale_idx)
+                        {
+                          nest->areas[at][nest->n_areas[at]++] = k;
+                          break;
+                        }
+                    continue;
                   }
-                else
+
+                if (at == CTAT_LAYER ? a != PIVOT_AXIS_LAYER
+                    : at == CTAT_LAYERROW || at == CTAT_LAYERCOL ? a == atb
+                    : at == CTAT_TABLE ? true
+                    : false)
+                  continue;
+
+                for (size_t k = 0; k < nest->n; k++)
+                  if (k != nest->scale_idx)
+                    nest->areas[at][nest->n_areas[at]++] = k;
+
+                int n_drop;
+                switch (at)
                   {
+                  case CTAT_SUBTABLE:
+#define L PIVOT_AXIS_LAYER
+                    n_drop = (t->clabels_from_axis == L ? a != L
+                              : t->clabels_to_axis == L ? (t->clabels_from_axis == a ? -1 : a != L)
+                              : t->clabels_from_axis == a ? 2
+                              : 0);
+#undef L
+                    break;
+
+                  case CTAT_LAYERROW:
+                  case CTAT_LAYERCOL:
+                    n_drop = a == ata && t->label_axis[ata] == atb;
+                    break;
+
+                  case CTAT_ROW:
+                  case CTAT_COL:
+                    n_drop = (a == ata ? t->label_axis[ata] == atb
+                              : a != atb ? 0
+                              : t->clabels_from_axis == atb ? -1
+                              : t->clabels_to_axis != atb ? 1
+                              : 0);
+                    break;
+
+                  case CTAT_LAYER:
+                  case CTAT_TABLE:
                     n_drop = 0;
-                    if (at == CTAT_SUBTABLE ? a != PIVOT_AXIS_LAYER
-                        : at == CTAT_ROW ? a == PIVOT_AXIS_COLUMN
-                        : at == CTAT_COL ? a == PIVOT_AXIS_ROW
-                        : false)
-                      n_drop++;
-                    if ((t->label_axis[PIVOT_AXIS_ROW] == PIVOT_AXIS_COLUMN && (at == CTAT_ROW || at == CTAT_LAYERROW) && a == PIVOT_AXIS_ROW)
-                        || (t->label_axis[PIVOT_AXIS_COLUMN] == PIVOT_AXIS_ROW && (at == CTAT_COL || at == CTAT_LAYERCOL) && a == PIVOT_AXIS_COLUMN))
-                    n_drop++;
+                    break;
                   }
 
-                if (drop_inner)
+                if (n_drop < 0)
                   {
                     size_t n = nest->n_areas[at];
                     if (n > 1)
@@ -4765,16 +4726,23 @@ ctables_prepare_table (struct ctables_table *t)
                         nest->n_areas[at]--;
                       }
                   }
-                for (size_t i = 0; i < n_drop; i++)
-                  if (nest->n_areas[at] > 0)
-                    nest->n_areas[at]--;
+                else
+                  {
+                    for (int i = 0; i < n_drop; i++)
+                      if (nest->n_areas[at] > 0)
+                        nest->n_areas[at]--;
+                  }
               }
           }
       }
     else
       {
         struct ctables_nest *nest = xmalloc (sizeof *nest);
-        *nest = (struct ctables_nest) { .n = 0 };
+        *nest = (struct ctables_nest) {
+          .n = 0,
+          .scale_idx = SIZE_MAX,
+          .summary_idx = SIZE_MAX
+        };
         t->stacks[a] = (struct ctables_stack) { .nests = nest, .n = 1 };
 
         /* There's no point in moving labels away from an axis that has no
@@ -4788,20 +4756,23 @@ ctables_prepare_table (struct ctables_table *t)
       struct ctables_nest *nest = &stack->nests[i];
       if (!nest->specs[CSV_CELL].n)
         {
-          struct ctables_summary_spec_set *specs = &nest->specs[CSV_CELL];
-          specs->specs = xmalloc (sizeof *specs->specs);
-          specs->n = 1;
+          struct ctables_summary_spec_set *ss = &nest->specs[CSV_CELL];
+          ss->specs = xmalloc (sizeof *ss->specs);
+          ss->n = 1;
 
           enum ctables_summary_function function
-            = specs->is_scale ? CTSF_MEAN : CTSF_COUNT;
+            = ss->is_scale ? CTSF_MEAN : CTSF_COUNT;
 
-          *specs->specs = (struct ctables_summary_spec) {
+          if (!ss->var)
+            {
+              nest->summary_idx = nest->n - 1;
+              ss->var = nest->vars[nest->summary_idx];
+            }
+          *ss->specs = (struct ctables_summary_spec) {
             .function = function,
-            .weighted = true,
-            .format = ctables_summary_default_format (function, specs->var),
+            .weighting = ss->is_scale ? CTW_EFFECTIVE : CTW_DICTIONARY,
+            .format = ctables_summary_default_format (function, ss->var),
           };
-          if (!specs->var)
-            specs->var = nest->vars[0];
 
           ctables_summary_spec_set_clone (&nest->specs[CSV_TOTAL],
                                           &nest->specs[CSV_CELL]);
@@ -5208,8 +5179,8 @@ ctables_section_uninit (struct ctables_section *s)
     }
 
   hmap_destroy (&s->cells);
-  for (size_t i = 0; i < N_CTATS; i++)
-    hmap_destroy (&s->areas[i]);
+  for (enum ctables_area_type at = 0; at < N_CTATS; at++)
+    hmap_destroy (&s->areas[at]);
 }
 
 static void
@@ -5276,19 +5247,24 @@ ctables_execute (struct dataset *ds, struct casereader *input,
       for (struct ccase *c = casereader_read (group); c;
            case_unref (c), c = casereader_read (group))
         {
-          double d_weight = dict_get_case_weight (dict, c, &warn_on_invalid);
+          double d_weight = dict_get_rounded_case_weight (dict, c, &warn_on_invalid);
           double e_weight = (ct->e_weight
                              ? var_force_valid_weight (ct->e_weight,
                                                        case_num (c, ct->e_weight),
                                                        &warn_on_invalid)
                              : d_weight);
+          double weight[] = {
+            [CTW_DICTIONARY] = d_weight,
+            [CTW_EFFECTIVE] = e_weight,
+            [CTW_UNWEIGHTED] = 1.0,
+          };
 
           for (size_t i = 0; i < ct->n_tables; i++)
             {
               struct ctables_table *t = ct->tables[i];
 
               for (size_t j = 0; j < t->n_sections; j++)
-                ctables_cell_insert (&t->sections[j], c, d_weight, e_weight);
+                ctables_cell_insert (&t->sections[j], c, weight);
 
               for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
                 if (t->label_axis[a] != a)
@@ -5786,9 +5762,9 @@ ctables_parse_pproperties_format (struct lexer *lexer,
     {
       /* Parse function. */
       enum ctables_summary_function function;
-      bool weighted;
+      enum ctables_weighting weighting;
       enum ctables_area_type area;
-      if (!parse_ctables_summary_function (lexer, &function, &weighted, &area))
+      if (!parse_ctables_summary_function (lexer, &function, &weighting, &area))
         goto error;
 
       /* Parse percentile. */
@@ -5812,7 +5788,7 @@ ctables_parse_pproperties_format (struct lexer *lexer,
                                  sizeof *sss->specs);
       sss->specs[sss->n++] = (struct ctables_summary_spec) {
         .function = function,
-        .weighted = weighted,
+        .weighting = weighting,
         .calc_area = area,
         .user_area = area,
         .percentile = percentile,