CTABLES: implement some simple features
authorBen Pfaff <blp@cs.stanford.edu>
Sat, 4 Jun 2022 03:08:03 +0000 (20:08 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Sat, 25 Jun 2022 04:19:07 +0000 (21:19 -0700)
src/language/stats/ctables.c
tests/language/stats/ctables.at

index 4f231caa7a9f8f6ace7596be97576b52f6dfce3a..ce49e591020f73fa9bd2e587b6f5439ae1855731 100644 (file)
@@ -152,6 +152,8 @@ enum {
 #undef S
 };
 
+static bool ctables_summary_function_is_count (enum ctables_summary_function);
+
 enum ctables_domain_type
   {
     /* Within a section, where stacked variables divide one section from
@@ -771,6 +773,38 @@ ctables_function_availability (enum ctables_summary_function f)
   return availability[f];
 }
 
+static bool
+ctables_summary_function_is_count (enum ctables_summary_function f)
+{
+  static const bool is_count[N_CTSF_FUNCTIONS] = {
+    [CTSF_COUNT] = true,
+    [CTSF_ECOUNT] = true,
+    [CTSF_ROWPCT_COUNT] = true,
+    [CTSF_COLPCT_COUNT] = true,
+    [CTSF_TABLEPCT_COUNT] = true,
+    [CTSF_SUBTABLEPCT_COUNT] = true,
+    [CTSF_LAYERPCT_COUNT] = true,
+    [CTSF_LAYERROWPCT_COUNT] = true,
+    [CTSF_LAYERCOLPCT_COUNT] = true,
+    [CTSF_ROWPCT_RESPONSES_COUNT] = true,
+    [CTSF_COLPCT_RESPONSES_COUNT] = true,
+    [CTSF_TABLEPCT_RESPONSES_COUNT] = true,
+    [CTSF_SUBTABLEPCT_RESPONSES_COUNT] = true,
+    [CTSF_LAYERPCT_RESPONSES_COUNT] = true,
+    [CTSF_LAYERROWPCT_RESPONSES_COUNT] = true,
+    [CTSF_LAYERCOLPCT_RESPONSES_COUNT] = true,
+    [CTSF_ROWPCT_COUNT_RESPONSES] = true,
+    [CTSF_COLPCT_COUNT_RESPONSES] = true,
+    [CTSF_TABLEPCT_COUNT_RESPONSES] = true,
+    [CTSF_SUBTABLEPCT_COUNT_RESPONSES] = true,
+    [CTSF_LAYERPCT_COUNT_RESPONSES] = true,
+    [CTSF_LAYERROWPCT_COUNT_RESPONSES] = true,
+    [CTSF_LAYERCOLPCT_COUNT_RESPONSES] = true,
+  };
+  return is_count[f];
+}
+
+
 static bool
 parse_ctables_summary_function (struct lexer *lexer,
                                 enum ctables_summary_function *f)
@@ -3580,11 +3614,30 @@ ctables_table_output (struct ctables *ct, struct ctables_table *t)
                     dindexes[n_dindexes++] = leaf;
                   }
 
+              const struct ctables_summary_spec *ss = &specs->specs[j];
+
               double d = (cell->postcompute
                           ? ctables_cell_calculate_postcompute (s, cell)
-                          : ctables_summary_value (cell, &cell->summaries[j], &specs->specs[j]));
-              struct pivot_value *value = pivot_value_new_number (d);
-              value->numeric.format = specs->specs[j].format;
+                          : ctables_summary_value (cell, &cell->summaries[j], ss));
+              struct pivot_value *value;
+              if (ct->hide_threshold != 0
+                  && d < ct->hide_threshold
+                  && (cell->postcompute
+                      ? false /* XXX */
+                      : ctables_summary_function_is_count (ss->function)))
+                {
+                  value = pivot_value_new_user_text_nocopy (
+                    xasprintf ("<%d", ct->hide_threshold));
+                }
+              else if (d == 0 && ct->zero)
+                value = pivot_value_new_user_text (ct->zero, SIZE_MAX);
+              else if (d == SYSMIS && ct->missing)
+                value = pivot_value_new_user_text (ct->missing, SIZE_MAX);
+              else
+                {
+                  value = pivot_value_new_number (d);
+                  value->numeric.format = specs->specs[j].format;
+                }
               pivot_table_put (pt, dindexes, n_dindexes, value);
             }
         }
@@ -4628,7 +4681,6 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
                                         pivot_table_look_get_default ())),
     .vlabels = vlabels,
     .postcomputes = HMAP_INITIALIZER (ct->postcomputes),
-    .hide_threshold = 5,
   };
   ct->look->omit_empty = false;
 
@@ -4806,15 +4858,19 @@ cmd_ctables (struct lexer *lexer, struct dataset *ds)
           if (!ct->e_weight)
             goto error;
         }
-      else if (lex_match_id (lexer, "HIDESMALLCOUNTS"))
+      else if (lex_match_id (lexer, " HIDESMALLCOUNTS"))
         {
-          if (!lex_force_match_id (lexer, "COUNT"))
-            goto error;
-          lex_match (lexer, T_EQUALS);
-          if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT", 2, INT_MAX))
-            goto error;
-          ct->hide_threshold = lex_integer (lexer);
-          lex_get (lexer);
+          if (lex_match_id (lexer, "COUNT"))
+            {
+              lex_match (lexer, T_EQUALS);
+              if (!lex_force_int_range (lexer, "HIDESMALLCOUNTS COUNT",
+                                        2, INT_MAX))
+                goto error;
+              ct->hide_threshold = lex_integer (lexer);
+              lex_get (lexer);
+            }
+          else if (ct->hide_threshold == 0)
+            ct->hide_threshold = 5;
         }
       else
         {
index abc18bbb1203f589308eaa0792c0b67641398a45..35d7c7ffbe3685c40a57adfca92d6220556fdd15 100644 (file)
@@ -1,12 +1,8 @@
 AT_BANNER([CTABLES])
 
-dnl TODO:
+dnl Features not yet implemented:
 dnl
-dnl - Parsing (positive and negative)
-dnl - String variables and values
 dnl - Date/time variables and values
-dnl - Multiple-response sets.
-dnl   * MRSETS subcommand.
 dnl - SPLIT FILE with SEPARATE splits
 dnl - Definition of columns/rows when labels are rotated from one axis to another.
 dnl - Preprocessing to distinguish categorical from scale.
@@ -18,26 +14,18 @@ dnl   * .LCL and .UCL suffixes.
 dnl   * .SE suffixes.
 dnl   * Separate summary functions for totals and subtotals.
 dnl - Special formats for summary functions: NEGPAREN, NEQUAL, PAREN, PCTPAREN.
-dnl - Testing details of missing value handling in summaries.
-dnl - test CLABELS ROWLABELS=LAYER.
 dnl - CATEGORIES:
-dnl   * Special case for explicit category specifications and multiple dichotomy sets
 dnl   * THRU
 dnl   * OTHERNM
 dnl   * String values
 dnl   * Date values
 dnl   * Data-dependent sorting.
 dnl - TITLES: )DATE, )TIME, )TABLE.
-dnl - SIGTEST
-dnl - COMPARETEST
 dnl - FORMAT:
 dnl   * MINCOLWIDTH, MAXCOLWIDTH, UNITS.
 dnl   * EMPTY.
 dnl   * MISSING.
-dnl - Test VLABELS.
 dnl - SMISSING (see documentation).
-dnl - Test WEIGHT and adjustment weights.
-dnl - Test PCOMPUTE and PPROPERTIES.
 dnl - PCOMPUTE:
 dnl   * multi-dimensional
 dnl   * MISSING, OTHERNM
@@ -48,6 +36,23 @@ dnl   * summary statistics and formats?
 dnl - HIDESMALLCOUNTS.
 dnl - Are string ranges a thing?
 dnl
+dnl Features not yet tested:
+dnl - Parsing (positive and negative)
+dnl - String variables and values
+dnl - Testing details of missing value handling in summaries.
+dnl - test CLABELS ROWLABELS=LAYER.
+dnl - Test VLABELS.
+dnl - Test WEIGHT and adjustment weights.
+dnl - Test PCOMPUTE and PPROPERTIES.
+dnl
+dnl Not for v1:
+dnl - Multiple response sets
+dnl - MRSETS subcommand.
+dnl - CATEGORIES: Special case for explicit category specifications and multiple dichotomy sets.
+dnl - SIGTEST
+dnl - COMPARETEST
+dnl
+dnl
 dnl Bug:
 dnl     CTABLES /TABLE=qnd1 [MEAN, MEDIAN] BY qns3a.
 dnl produces a bad median: