#include <config.h>
+#include <math.h>
+
#include "data/casereader.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "language/lexer/format-parser.h"
#include "language/lexer/lexer.h"
#include "language/lexer/variable-parser.h"
-#include "language/stats/freq.h"
+#include "libpspp/array.h"
#include "libpspp/assertion.h"
#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
+#include "math/moments.h"
#include "output/pivot-table.h"
#include "gl/minmax.h"
enum ctables_vlabel
{
- CTVL_DEFAULT = SETTINGS_VALUE_SHOW_DEFAULT,
+ CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
- CTVL_NONE,
};
-static void UNUSED
-ctables_vlabel_unique (enum ctables_vlabel vlabel)
-{
- /* This ensures that all of the values are unique. */
- switch (vlabel)
- {
- case CTVL_DEFAULT:
- case CTVL_NAME:
- case CTVL_LABEL:
- case CTVL_BOTH:
- case CTVL_NONE:
- abort ();
- }
-}
/* XXX:
- unweighted summaries (U*)
struct ctables_chisq *chisq;
struct ctables_pairwise *pairwise;
+
+ struct ctables_freqtab **fts;
+ size_t n_fts;
};
struct ctables_var
{
struct ctables_var var;
bool scale;
- struct ctables_summary *summaries;
+ struct ctables_summary_spec *summaries;
size_t n_summaries;
size_t allocated_summaries;
};
CTFA_MRSETS, /* Only multiple-response sets */
};
-struct ctables_summary
+struct ctables_summary_spec
{
enum ctables_summary_function function;
double percentile; /* CTSF_PTILE only. */
};
static void
-ctables_summary_uninit (struct ctables_summary *s)
+ctables_summary_spec_uninit (struct ctables_summary_spec *s)
{
if (s)
free (s->label);
{
case CTAO_VAR:
for (size_t i = 0; i < axis->n_summaries; i++)
- ctables_summary_uninit (&axis->summaries[i]);
+ ctables_summary_spec_uninit (&axis->summaries[i]);
free (axis->summaries);
break;
}
static bool
-add_summary (struct ctables_axis *axis,
- enum ctables_summary_function function, double percentile,
- const char *label, const struct fmt_spec *format,
- const struct msg_location *loc)
+add_summary_spec (struct ctables_axis *axis,
+ enum ctables_summary_function function, double percentile,
+ const char *label, const struct fmt_spec *format,
+ const struct msg_location *loc)
{
if (axis->op == CTAO_VAR)
{
break;
}
- struct ctables_summary *dst = &axis->summaries[axis->n_summaries++];
- *dst = (struct ctables_summary) {
+ struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++];
+ *dst = (struct ctables_summary_spec) {
.function = function,
.percentile = percentile,
.label = xstrdup (label),
else
{
for (size_t i = 0; i < 2; i++)
- if (!add_summary (axis->subs[i], function, percentile, label, format,
- loc))
+ if (!add_summary_spec (axis->subs[i], function, percentile, label,
+ format, loc))
return false;
return true;
}
struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
lex_ofs (ctx->lexer) - 1);
- add_summary (sub, function, percentile, label, formatp, loc);
+ add_summary_spec (sub, function, percentile, label, formatp, loc);
free (label);
msg_location_destroy (loc);
}
static const struct ctables_axis *
-find_categorical_summary (const struct ctables_axis *axis)
+find_categorical_summary_spec (const struct ctables_axis *axis)
{
if (!axis)
return NULL;
for (size_t i = 0; i < 2; i++)
{
const struct ctables_axis *sum
- = find_categorical_summary (axis->subs[i]);
+ = find_categorical_summary_spec (axis->subs[i]);
if (sum)
return sum;
}
return NULL;
}
- const struct ctables_axis *outer_sum = find_categorical_summary (lhs);
+ const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
if (outer_sum)
{
msg_at (SE, nest->loc,
struct var_array
{
+ const struct ctables_axis *summary;
struct variable **vars;
+ enum pivot_axis_type *axes;
size_t n;
};
var_array_uninit (struct var_array *va)
{
if (va)
- free (va->vars);
+ {
+ free (va->vars);
+ free (va->axes);
+ }
}
struct var_array2
for (size_t i = 0; i < va0.n; i++)
for (size_t j = 0; j < va1.n; j++)
{
- size_t allocate = va0.vas[i].n + va1.vas[j].n;
+ const struct var_array *a = &va0.vas[i];
+ const struct var_array *b = &va1.vas[j];
+
+ size_t allocate = a->n + b->n;
struct variable **vars = xnmalloc (allocate, sizeof *vars);
+ enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
size_t n = 0;
- for (size_t k = 0; k < va0.vas[i].n; k++)
- vars[n++] = va0.vas[i].vars[k];
- for (size_t k = 0; k < va1.vas[j].n; k++)
- vars[n++] = va1.vas[j].vars[k];
+ for (size_t k = 0; k < a->n; k++)
+ {
+ vars[n] = a->vars[k];
+ axes[n] = a->axes[k];
+ n++;
+ }
+ for (size_t k = 0; k < b->n; k++)
+ {
+ vars[n] = b->vars[k];
+ axes[n] = b->axes[k];
+ n++;
+ }
assert (n == allocate);
- vaa.vas[vaa.n++] = (struct var_array) { .vars = vars, n = n };
+ assert (!(a->summary && b->summary));
+ vaa.vas[vaa.n++] = (struct var_array) {
+ .summary = a->summary ? a->summary : b->summary,
+ .vars = vars,
+ .axes = axes,
+ .n = n
+ };
}
var_array2_uninit (&va0);
var_array2_uninit (&va1);
}
static struct var_array2
-enumerate_fts (const struct ctables_axis *a)
+enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
{
if (!a)
return (struct var_array2) { .n = 0 };
{
case CTAO_VAR:
assert (!a->var.is_mrset);
- struct variable **v = xmalloc (sizeof *v);
- *v = a->var.var;
struct var_array *va = xmalloc (sizeof *va);
- *va = (struct var_array) { .vars = v, .n = 1 };
+ if (a->scale)
+ *va = (struct var_array) { .n = 0 };
+ else
+ {
+ struct variable **vars = xmalloc (sizeof *vars);
+ *vars = a->var.var;
+ enum pivot_axis_type *axes = xmalloc (sizeof *axes);
+ *axes = axis_type;
+ *va = (struct var_array) { .vars = vars, .axes = axes, .n = 1 };
+ }
+ va->summary = a->scale || a->n_summaries ? a : NULL;
return (struct var_array2) { .vas = va, .n = 1 };
case CTAO_STACK:
- return stack_fts (enumerate_fts (a->subs[0]),
- enumerate_fts (a->subs[1]));
+ return stack_fts (enumerate_fts (axis_type, a->subs[0]),
+ enumerate_fts (axis_type, a->subs[1]));
case CTAO_NEST:
- return nest_fts (enumerate_fts (a->subs[0]),
- enumerate_fts (a->subs[1]));
+ return nest_fts (enumerate_fts (axis_type, a->subs[0]),
+ enumerate_fts (axis_type, a->subs[1]));
+ }
+
+ NOT_REACHED ();
+}
+
+union ctables_summary
+ {
+ /* COUNT, VALIDN, TOTALN. */
+ struct
+ {
+ double valid;
+ double missing;
+ };
+
+ /* MINIMUM, MAXIMUM, RANGE. */
+ struct
+ {
+ double min;
+ double max;
+ };
+
+ /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
+ struct moments1 *moments;
+
+ /* XXX percentiles, median, mode, multiple response */
+ };
+
+static void
+ctables_summary_init (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ s->missing = s->valid = 0;
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ s->min = s->max = SYSMIS;
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ s->moments = moments1_create (MOMENT_VARIANCE);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+}
+
+static void
+ctables_summary_uninit (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ moments1_destroy (s->moments);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+}
+
+static void
+ctables_summary_add (union ctables_summary *s,
+ const struct ctables_summary_spec *ss,
+ const struct variable *var, const union value *value,
+ double weight)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ if (var_is_value_missing (var, value))
+ s->missing += weight;
+ else
+ s->valid += weight;
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ if (!var_is_value_missing (var, value))
+ {
+ assert (!var_is_alpha (var)); /* XXX? */
+ if (s->min == SYSMIS || value->f < s->min)
+ s->min = value->f;
+ if (s->max == SYSMIS || value->f > s->max)
+ s->max = value->f;
+ }
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ moments1_add (s->moments, value->f, weight);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+}
+
+
+static double
+ctables_summary_value (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ return s->valid;
+
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ NOT_REACHED ();
+
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ return s->valid + s->missing;
+
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ return s->valid;
+
+ case CTSF_MAXIMUM:
+ return s->max;
+
+ case CTSF_MINIMUM:
+ return s->min;
+
+ case CTSF_RANGE:
+ return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
+
+ case CTSF_MEAN:
+ {
+ double mean;
+ moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
+ return mean;
+ }
+
+ case CTSF_SEMEAN:
+ {
+ double weight, variance;
+ moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
+ return calc_semean (variance, weight);
+ }
+
+ case CTSF_STDDEV:
+ {
+ double variance;
+ moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
+ return variance != SYSMIS ? sqrt (variance) : SYSMIS;
+ }
+
+ case CTSF_SUM:
+ {
+ double weight, mean;
+ moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
+ return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
+ }
+
+ case CTSF_VARIANCE:
+ {
+ double variance;
+ moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
+ return variance;
+ }
+
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ NOT_REACHED ();
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
}
NOT_REACHED ();
}
+struct ctables_freq
+ {
+ struct hmap_node node; /* Element in hash table. */
+ union ctables_summary *summaries;
+ union value values[]; /* The value. */
+ };
+
struct ctables_freqtab
{
struct var_array vars;
- struct hmap data; /* Contains "struct freq"s. */
+ struct hmap data; /* Contains "struct ctables_freq"s. */
+ const struct ctables_summary_spec *summaries;
+ size_t n_summaries;
+ const struct variable *summary_var;
+ struct ctables_freq **sorted;
};
+static struct ctables_freq *
+ctables_freq_create (struct ctables_freqtab *ft)
+{
+ struct ctables_freq *f = xmalloc (sizeof *f + ft->vars.n * sizeof *f->values);
+ f->summaries = xmalloc (ft->n_summaries * sizeof *f->summaries);
+ for (size_t i = 0; i < ft->n_summaries; i++)
+ ctables_summary_init (&f->summaries[i], &ft->summaries[i]);
+ return f;
+}
+
+static void
+ctables_freq_add (struct ctables_freqtab *ft, struct ctables_freq *f,
+ const struct variable *var, const union value *value,
+ double weight)
+{
+ for (size_t i = 0; i < ft->n_summaries; i++)
+ ctables_summary_add (&f->summaries[i], &ft->summaries[i],
+ var, value, weight);
+}
+
+static int
+ctables_freq_compare_3way (const void *a_, const void *b_, const void *vars_)
+{
+ const struct var_array *vars = vars_;
+ struct ctables_freq *const *a = a_;
+ struct ctables_freq *const *b = b_;
+
+ for (size_t i = 0; i < vars->n; i++)
+ {
+ int cmp = value_compare_3way (&(*a)->values[i], &(*b)->values[i],
+ var_get_width (vars->vars[i]));
+ if (cmp)
+ return cmp;
+ }
+ return 0;
+}
+
static bool
ctables_execute (struct dataset *ds, struct ctables *ct)
{
- struct ctables_freqtab **fts = NULL;
- size_t n_fts = 0;
- size_t allocated_fts = 0;
for (size_t i = 0; i < ct->n_tables; i++)
{
+ size_t allocated_fts = 0;
+
struct ctables_table *t = &ct->tables[i];
- struct var_array2 vaa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]);
- vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN]));
- vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER]));
+ struct var_array2 vaa = { .n = 0 };
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ vaa = nest_fts (vaa, enumerate_fts (a, t->axes[a]));
for (size_t i = 0; i < vaa.n; i++)
{
for (size_t j = 0; j < vaa.vas[i].n; j++)
{
if (j)
fputs (", ", stdout);
- fputs (var_get_name (vaa.vas[i].vars[j]), stdout);
+ printf ("%s (%c)", var_get_name (vaa.vas[i].vars[j]),
+ vaa.vas[i].axes[j] == PIVOT_AXIS_ROW ? 'r'
+ : vaa.vas[i].axes[j] == PIVOT_AXIS_COLUMN ? 'c'
+ : vaa.vas[i].axes[j] == PIVOT_AXIS_LAYER ? 'l'
+ : '?');
}
putchar ('\n');
}
for (size_t j = 0; j < vaa.n; j++)
{
- if (n_fts >= allocated_fts)
- fts = x2nrealloc (fts, &allocated_fts, sizeof *fts);
+ const struct var_array *va = &vaa.vas[j];
+ const struct ctables_summary_spec *summaries;
+ size_t n_summaries;
+ const struct variable *summary_var;
+ if (!va->summary)
+ {
+ static const struct ctables_summary_spec count = {
+ .function = CTSF_COUNT,
+ .label = (char *) N_("Count"),
+ .format = { .type = FMT_F, .w = 40 },
+ };
+ summaries = &count;
+ n_summaries = 1;
+ summary_var = va->vars[0];
+ }
+ else if (va->summary->n_summaries)
+ {
+ summaries = va->summary->summaries;
+ n_summaries = va->summary->n_summaries;
+ summary_var = va->summary->var.var;
+ }
+ else
+ {
+ static const struct ctables_summary_spec mean = {
+ .function = CTSF_MEAN,
+ .label = (char *) N_("Mean"),
+ .format = { .type = FMT_F, .w = 40, .d = 2}, /* XXX */
+ };
+ summaries = &mean;
+ n_summaries = 1;
+ summary_var = va->summary->var.var;
+ };
struct ctables_freqtab *ft = xmalloc (sizeof *ft);
*ft = (struct ctables_freqtab) {
- .vars = vaa.vas[j],
+ .vars = *va,
+ .summaries = summaries,
+ .n_summaries = n_summaries,
+ .summary_var = summary_var,
.data = HMAP_INITIALIZER (ft->data),
};
- fts[n_fts++] = ft;
+
+ if (t->n_fts >= allocated_fts)
+ t->fts = x2nrealloc (t->fts, &allocated_fts, sizeof *t->fts);
+ t->fts[t->n_fts++] = ft;
}
free (vaa.vas);
{
double weight = dict_get_case_weight (dataset_dict (ds), c,
&warn_on_invalid);
- for (size_t i = 0; i < n_fts; i++)
- {
- struct ctables_freqtab *ft = fts[i];
- size_t hash = 0;
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = &ct->tables[i];
- for (size_t j = 0; j < ft->vars.n; j++)
+ for (size_t j = 0; j < t->n_fts; j++)
{
- const struct variable *var = ft->vars.vars[j];
- hash = value_hash (case_data (c, var), var_get_width (var), hash);
- }
+ struct ctables_freqtab *ft = t->fts[j];
- struct freq *f;
- HMAP_FOR_EACH_WITH_HASH (f, struct freq, node, hash, &ft->data)
- {
- for (size_t j = 0; j < ft->vars.n; j++)
+ for (size_t k = 0; k < ft->vars.n; k++)
{
- const struct variable *var = ft->vars.vars[j];
- if (!value_equal (case_data (c, var), &f->values[j],
- var_get_width (var)))
- goto next_hash_node;
+ const struct variable *var = ft->vars.vars[k];
+ switch (var_is_value_missing (var, case_data (c, var)))
+ {
+ case MV_SYSTEM:
+ goto next_ft;
+
+ case MV_USER:
+ if (!t->categories[var_get_dict_index (var)]
+ || !t->categories[var_get_dict_index (var)]->include_missing)
+ goto next_ft;
+ break;
+ }
+ }
+ size_t hash = 0;
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ hash = value_hash (case_data (c, var), var_get_width (var), hash);
}
- f->count += weight;
- goto next_ft;
+ struct ctables_freq *f;
+ HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &ft->data)
+ {
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ if (!value_equal (case_data (c, var), &f->values[k],
+ var_get_width (var)))
+ goto next_hash_node;
+ }
+ goto found;
+
+ next_hash_node: ;
+ }
- next_hash_node: ;
- }
+ f = ctables_freq_create (ft);
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ value_clone (&f->values[k], case_data (c, var),
+ var_get_width (var));
+ }
+ hmap_insert (&ft->data, &f->node, hash);
- f = xmalloc (table_entry_size (ft->vars.n));
- f->count = weight;
- for (size_t j = 0; j < ft->vars.n; j++)
- {
- const struct variable *var = ft->vars.vars[j];
- value_clone (&f->values[j], case_data (c, var),
- var_get_width (var));
- }
- hmap_insert (&ft->data, &f->node, hash);
+ found:
+ ctables_freq_add (ft, f, ft->summary_var,
+ case_data (c, ft->summary_var), weight);
- next_ft: ;
+ next_ft: ;
+ }
}
}
casereader_destroy (input);
- for (size_t i = 0; i < n_fts; i++)
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = &ct->tables[i];
+
+ struct pivot_table *pt = pivot_table_create (N_("Custom Tables"));
+ struct pivot_dimension *d = pivot_dimension_create (
+ pt, PIVOT_AXIS_ROW, N_("Rows"));
+ for (size_t j = 0; j < t->n_fts; j++)
+ {
+ struct ctables_freqtab *ft = t->fts[j];
+ ft->sorted = xnmalloc (ft->data.count, sizeof *ft->sorted);
+
+ struct ctables_freq *f;
+ size_t n = 0;
+ HMAP_FOR_EACH (f, struct ctables_freq, node, &ft->data)
+ ft->sorted[n++] = f;
+ assert (n == ft->data.count);
+ sort (ft->sorted, n, sizeof *ft->sorted,
+ ctables_freq_compare_3way, &ft->vars);
+
+ struct pivot_category **groups = xnmalloc (ft->vars.n,
+ sizeof *groups);
+ for (size_t k = 0; k < n; k++)
+ {
+ struct ctables_freq *prev = k > 0 ? ft->sorted[k - 1] : NULL;
+ struct ctables_freq *f = ft->sorted[k];
+
+ size_t n_common = 0;
+ if (prev)
+ for (; n_common + 1 < ft->vars.n; n_common++)
+ if (!value_equal (&prev->values[n_common],
+ &f->values[n_common],
+ var_get_type (ft->vars.vars[n_common])))
+ break;
+
+ for (size_t m = n_common; m < ft->vars.n; m++)
+ {
+ struct pivot_category *parent = m > 0 ? groups[m - 1] : d->root;
+ const struct variable *var = ft->vars.vars[m];
+ enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (var)];
+
+ if (vlabel != CTVL_NONE)
+ parent = pivot_category_create_group__ (
+ parent, pivot_value_new_variable (ft->vars.vars[m]));
+
+ if (m + 1 < ft->vars.n)
+ parent = pivot_category_create_group__ (
+ parent,
+ pivot_value_new_var_value (ft->vars.vars[m], &f->values[m]));
+ groups[m] = parent;
+
+ if (m == ft->vars.n - 1)
+ {
+ struct pivot_category *c = pivot_category_create_group__ (
+ parent,
+ pivot_value_new_var_value (ft->vars.vars[ft->vars.n - 1],
+ &f->values[ft->vars.n - 1]));
+ for (size_t p = 0; p < ft->n_summaries; p++)
+ {
+ double value = ctables_summary_value (
+ &f->summaries[p], &ft->summaries[p]);
+ int leaf = pivot_category_create_leaf (
+ c, pivot_value_new_text (ft->summaries[p].label));
+ pivot_table_put1 (pt, leaf, pivot_value_new_number (value));
+ }
+ }
+ }
+ }
+ free (groups);
+ }
+ pivot_table_submit (pt);
+ }
+
+ for (size_t i = 0; i < ct->n_tables; i++)
{
- struct ctables_freqtab *ft = fts[i];
- struct freq *f, *next;
- HMAP_FOR_EACH_SAFE (f, next, struct freq, node, &ft->data)
+ struct ctables_table *t = &ct->tables[i];
+
+ for (size_t j = 0; j < t->n_fts; j++)
{
- hmap_delete (&ft->data, &f->node);
- for (size_t j = 0; j < ft->vars.n; j++)
+ struct ctables_freqtab *ft = t->fts[j];
+ struct ctables_freq *f, *next;
+ HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data)
{
- const struct variable *var = ft->vars.vars[j];
- value_destroy (&f->values[j], var_get_width (var));
+ hmap_delete (&ft->data, &f->node);
+ for (size_t k = 0; k < ft->n_summaries; k++)
+ ctables_summary_uninit (&f->summaries[k], &ft->summaries[k]);
+ free (f->summaries);
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ value_destroy (&f->values[k], var_get_width (var));
+ }
+ free (f);
}
- free (f);
+ hmap_destroy (&ft->data);
+ free (ft->sorted);
+ var_array_uninit (&ft->vars);
+ free (ft);
}
- hmap_destroy (&ft->data);
- var_array_uninit (&ft->vars);
- free (ft);
+ free (t->fts);
}
- free (fts);
return proc_commit (ds);
}
{
size_t n_vars = dict_get_n_vars (dataset_dict (ds));
enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
+ enum settings_value_show tvars = settings_get_show_variables ();
for (size_t i = 0; i < n_vars; i++)
- vlabels[i] = CTVL_DEFAULT;
+ vlabels[i] = (enum ctables_vlabel) tvars;
struct ctables *ct = xmalloc (sizeof *ct);
*ct = (struct ctables) {
enum ctables_vlabel vlabel;
if (lex_match_id (lexer, "DEFAULT"))
- vlabel = CTVL_DEFAULT;
+ vlabel = (enum ctables_vlabel) settings_get_show_variables ();
else if (lex_match_id (lexer, "NAME"))
vlabel = CTVL_NAME;
else if (lex_match_id (lexer, "LABEL"))