#include <config.h>
+#include <math.h>
+
#include "data/casereader.h"
#include "data/dataset.h"
#include "data/dictionary.h"
#include "language/lexer/format-parser.h"
#include "language/lexer/lexer.h"
#include "language/lexer/variable-parser.h"
-#include "language/stats/freq.h"
+#include "libpspp/array.h"
#include "libpspp/assertion.h"
+#include "libpspp/hash-functions.h"
#include "libpspp/hmap.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
+#include "math/moments.h"
#include "output/pivot-table.h"
#include "gl/minmax.h"
enum ctables_vlabel
{
- CTVL_DEFAULT = SETTINGS_VALUE_SHOW_DEFAULT,
+ CTVL_NONE = SETTINGS_VALUE_SHOW_DEFAULT,
CTVL_NAME = SETTINGS_VALUE_SHOW_VALUE,
CTVL_LABEL = SETTINGS_VALUE_SHOW_LABEL,
CTVL_BOTH = SETTINGS_VALUE_SHOW_BOTH,
- CTVL_NONE,
};
-static void UNUSED
-ctables_vlabel_unique (enum ctables_vlabel vlabel)
-{
- /* This ensures that all of the values are unique. */
- switch (vlabel)
- {
- case CTVL_DEFAULT:
- case CTVL_NAME:
- case CTVL_LABEL:
- case CTVL_BOTH:
- case CTVL_NONE:
- abort ();
- }
-}
/* XXX:
- unweighted summaries (U*)
#undef S
};
+enum ctables_domain_type
+ {
+ /* Within a section, where stacked variables divide one section from
+ another. */
+ CTDT_TABLE, /* All layers of a whole section. */
+ CTDT_LAYER, /* One layer within a section. */
+ CTDT_LAYERROW, /* Row in one layer within a section. */
+ CTDT_LAYERCOL, /* Column in one layer within a section. */
+
+ /* Within a subtable, where a subtable pairs an innermost row variable with
+ an innermost column variable within a single layer. */
+ CTDT_SUBTABLE, /* Whole subtable. */
+ CTDT_ROW, /* Row within a subtable. */
+ CTDT_COL, /* Column within a subtable. */
+#define N_CTDTS 7
+ };
+
+struct ctables_domain
+ {
+ struct hmap_node node;
+
+ const struct ctables_freq *example;
+
+ double valid;
+ double missing;
+ };
+
+struct ctables_freq
+ {
+ /* In struct ctables's 'ft' hmap. Indexed by all the values in all the
+ axes (except the scalar variable, if any). */
+ struct hmap_node node;
+
+ /* The domains that contains this cell. */
+ struct ctables_domain *domains[N_CTDTS];
+
+ struct
+ {
+ size_t vaa_idx;
+ union value *values;
+ int leaf;
+ }
+ axes[PIVOT_N_AXES];
+
+ union ctables_summary *summaries;
+ };
+
struct ctables
{
struct pivot_table_look *look;
struct variable *base_weight; /* WEIGHT. */
int hide_threshold; /* HIDESMALLCOUNTS. */
- struct ctables_table *tables;
+ struct ctables_table **tables;
size_t n_tables;
};
CTLP_LAYER,
};
+struct var_array
+ {
+ struct variable **vars;
+ size_t n;
+ size_t scale_idx;
+ size_t *domains[N_CTDTS];
+ size_t n_domains[N_CTDTS];
+
+ struct ctables_summary_spec *summaries;
+ size_t n_summaries;
+ struct variable *summary_var;
+ };
+
+struct var_array2
+ {
+ struct var_array *vas;
+ size_t n;
+ };
+
struct ctables_table
{
struct ctables_axis *axes[PIVOT_N_AXES];
+ struct var_array2 vaas[PIVOT_N_AXES];
+ enum pivot_axis_type summary_axis;
+ struct hmap ft;
+ struct hmap domains[N_CTDTS];
enum pivot_axis_type slabels_position;
bool slabels_visible;
};
};
+static const struct ctables_cat_value *ctables_categories_match (
+ const struct ctables_categories *, const union value *,
+ const struct variable *);
+
static void
ctables_cat_value_uninit (struct ctables_cat_value *cv)
{
{
struct ctables_var var;
bool scale;
- struct ctables_summary *summaries;
+ struct ctables_summary_spec *summaries;
size_t n_summaries;
size_t allocated_summaries;
};
CTFA_MRSETS, /* Only multiple-response sets */
};
-struct ctables_summary
+struct ctables_summary_spec
{
enum ctables_summary_function function;
double percentile; /* CTSF_PTILE only. */
};
static void
-ctables_summary_uninit (struct ctables_summary *s)
+ctables_summary_spec_uninit (struct ctables_summary_spec *s)
{
if (s)
free (s->label);
{
case CTAO_VAR:
for (size_t i = 0; i < axis->n_summaries; i++)
- ctables_summary_uninit (&axis->summaries[i]);
+ ctables_summary_spec_uninit (&axis->summaries[i]);
free (axis->summaries);
break;
}
}
+static char *
+ctables_summary_default_label (enum ctables_summary_function function,
+ double percentile)
+{
+ static const char *default_labels[] = {
+#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
+ SUMMARIES
+#undef S
+ };
+
+ return (function == CTSF_PTILE
+ ? xasprintf (_("Percentile %.2f"), percentile)
+ : xstrdup (gettext (default_labels[function])));
+}
+
static const char *
ctables_summary_function_name (enum ctables_summary_function function)
{
}
static bool
-add_summary (struct ctables_axis *axis,
- enum ctables_summary_function function, double percentile,
- const char *label, const struct fmt_spec *format,
- const struct msg_location *loc)
+add_summary_spec (struct ctables_axis *axis,
+ enum ctables_summary_function function, double percentile,
+ const char *label, const struct fmt_spec *format,
+ const struct msg_location *loc)
{
if (axis->op == CTAO_VAR)
{
break;
}
- struct ctables_summary *dst = &axis->summaries[axis->n_summaries++];
- *dst = (struct ctables_summary) {
+ struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++];
+ *dst = (struct ctables_summary_spec) {
.function = function,
.percentile = percentile,
.label = xstrdup (label),
else
{
for (size_t i = 0; i < 2; i++)
- if (!add_summary (axis->subs[i], function, percentile, label, format,
- loc))
+ if (!add_summary_spec (axis->subs[i], function, percentile, label,
+ format, loc))
return false;
return true;
}
label = ss_xstrdup (lex_tokss (ctx->lexer));
lex_get (ctx->lexer);
}
- else if (function == CTSF_PTILE)
- label = xasprintf (_("Percentile %.2f"), percentile);
else
- {
- static const char *default_labels[] = {
-#define S(ENUM, NAME, LABEL, FORMAT, AVAILABILITY) [ENUM] = LABEL,
- SUMMARIES
-#undef S
- };
- label = xstrdup (gettext (default_labels[function]));
- }
+ label = ctables_summary_default_label (function, percentile);
/* Parse format. */
struct fmt_spec format;
struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
lex_ofs (ctx->lexer) - 1);
- add_summary (sub, function, percentile, label, formatp, loc);
+ add_summary_spec (sub, function, percentile, label, formatp, loc);
free (label);
msg_location_destroy (loc);
}
static const struct ctables_axis *
-find_categorical_summary (const struct ctables_axis *axis)
+find_categorical_summary_spec (const struct ctables_axis *axis)
{
if (!axis)
return NULL;
for (size_t i = 0; i < 2; i++)
{
const struct ctables_axis *sum
- = find_categorical_summary (axis->subs[i]);
+ = find_categorical_summary_spec (axis->subs[i]);
if (sum)
return sum;
}
return NULL;
}
- const struct ctables_axis *outer_sum = find_categorical_summary (lhs);
+ const struct ctables_axis *outer_sum = find_categorical_summary_spec (lhs);
if (outer_sum)
{
msg_at (SE, nest->loc,
}
static void
-ctables_table_uninit (struct ctables_table *t)
+ctables_table_destroy (struct ctables_table *t)
{
if (!t)
return;
free (t->title);
ctables_chisq_destroy (t->chisq);
ctables_pairwise_destroy (t->pairwise);
+ free (t);
}
static void
free (ct->missing);
free (ct->vlabels);
for (size_t i = 0; i < ct->n_tables; i++)
- ctables_table_uninit (&ct->tables[i]);
+ ctables_table_destroy (ct->tables[i]);
free (ct->tables);
free (ct);
}
return true;
}
-struct var_array
- {
- struct variable **vars;
- size_t n;
- };
-
static void
var_array_uninit (struct var_array *va)
{
free (va->vars);
}
-struct var_array2
- {
- struct var_array *vas;
- size_t n;
- };
-
static void
var_array2_uninit (struct var_array2 *vaa)
{
for (size_t i = 0; i < va0.n; i++)
for (size_t j = 0; j < va1.n; j++)
{
- size_t allocate = va0.vas[i].n + va1.vas[j].n;
+ const struct var_array *a = &va0.vas[i];
+ const struct var_array *b = &va1.vas[j];
+
+ size_t allocate = a->n + b->n;
struct variable **vars = xnmalloc (allocate, sizeof *vars);
+ enum pivot_axis_type *axes = xnmalloc (allocate, sizeof *axes);
size_t n = 0;
- for (size_t k = 0; k < va0.vas[i].n; k++)
- vars[n++] = va0.vas[i].vars[k];
- for (size_t k = 0; k < va1.vas[j].n; k++)
- vars[n++] = va1.vas[j].vars[k];
+ for (size_t k = 0; k < a->n; k++)
+ vars[n++] = a->vars[k];
+ for (size_t k = 0; k < b->n; k++)
+ vars[n++] = b->vars[k];
assert (n == allocate);
- vaa.vas[vaa.n++] = (struct var_array) { .vars = vars, n = n };
+ const struct var_array *summary_src;
+ if (!a->summary_var)
+ summary_src = b;
+ else if (!b->summary_var)
+ summary_src = a;
+ else
+ NOT_REACHED ();
+ vaa.vas[vaa.n++] = (struct var_array) {
+ .vars = vars,
+ .scale_idx = (a->scale_idx != SIZE_MAX ? a->scale_idx
+ : b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
+ : SIZE_MAX),
+ .n = n,
+ .summaries = summary_src->summaries,
+ .n_summaries = summary_src->n_summaries,
+ .summary_var = summary_src->summary_var,
+ };
}
var_array2_uninit (&va0);
var_array2_uninit (&va1);
}
static struct var_array2
-enumerate_fts (const struct ctables_axis *a)
+enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
{
if (!a)
return (struct var_array2) { .n = 0 };
{
case CTAO_VAR:
assert (!a->var.is_mrset);
- struct variable **v = xmalloc (sizeof *v);
- *v = a->var.var;
+
+ struct variable **vars = xmalloc (sizeof *vars);
+ *vars = a->var.var;
+
struct var_array *va = xmalloc (sizeof *va);
- *va = (struct var_array) { .vars = v, .n = 1 };
+ *va = (struct var_array) {
+ .vars = vars,
+ .n = 1,
+ .scale_idx = a->scale ? 0 : SIZE_MAX,
+ };
+ if (a->n_summaries || a->scale)
+ {
+ va->summaries = a->summaries;
+ va->n_summaries = a->n_summaries;
+ va->summary_var = a->var.var;
+ }
return (struct var_array2) { .vas = va, .n = 1 };
case CTAO_STACK:
- return stack_fts (enumerate_fts (a->subs[0]),
- enumerate_fts (a->subs[1]));
+ return stack_fts (enumerate_fts (axis_type, a->subs[0]),
+ enumerate_fts (axis_type, a->subs[1]));
case CTAO_NEST:
- return nest_fts (enumerate_fts (a->subs[0]),
- enumerate_fts (a->subs[1]));
+ return nest_fts (enumerate_fts (axis_type, a->subs[0]),
+ enumerate_fts (axis_type, a->subs[1]));
}
NOT_REACHED ();
}
-struct ctables_freqtab
+union ctables_summary
{
- struct var_array vars;
- struct hmap data; /* Contains "struct freq"s. */
+ /* COUNT, VALIDN, TOTALN. */
+ struct
+ {
+ double valid;
+ double missing;
+ };
+
+ /* MINIMUM, MAXIMUM, RANGE. */
+ struct
+ {
+ double min;
+ double max;
+ };
+
+ /* MEAN, SEMEAN, STDDEV, SUM, VARIANCE, *.SUM. */
+ struct moments1 *moments;
+
+ /* XXX percentiles, median, mode, multiple response */
};
-static bool
-ctables_execute (struct dataset *ds, struct ctables *ct)
+static void
+ctables_summary_init (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
{
- struct ctables_freqtab **fts = NULL;
- size_t n_fts = 0;
- size_t allocated_fts = 0;
- for (size_t i = 0; i < ct->n_tables; i++)
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ s->missing = s->valid = 0;
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ s->min = s->max = SYSMIS;
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ s->moments = moments1_create (MOMENT_VARIANCE);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+}
+
+static void UNUSED
+ctables_summary_uninit (union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ moments1_destroy (s->moments);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+}
+
+static void
+ctables_summary_add (union ctables_summary *s,
+ const struct ctables_summary_spec *ss,
+ const struct variable *var, const union value *value,
+ double weight)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ case CTSF_ROWPCT_COUNT:
+ case CTSF_COLPCT_COUNT:
+ case CTSF_TABLEPCT_COUNT:
+ case CTSF_SUBTABLEPCT_COUNT:
+ case CTSF_LAYERPCT_COUNT:
+ case CTSF_LAYERROWPCT_COUNT:
+ case CTSF_LAYERCOLPCT_COUNT:
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ if (var_is_value_missing (var, value))
+ s->missing += weight;
+ else
+ s->valid += weight;
+ break;
+
+ case CTSF_MAXIMUM:
+ case CTSF_MINIMUM:
+ case CTSF_RANGE:
+ if (!var_is_value_missing (var, value))
+ {
+ assert (!var_is_alpha (var)); /* XXX? */
+ if (s->min == SYSMIS || value->f < s->min)
+ s->min = value->f;
+ if (s->max == SYSMIS || value->f > s->max)
+ s->max = value->f;
+ }
+ break;
+
+ case CTSF_MEAN:
+ case CTSF_SEMEAN:
+ case CTSF_STDDEV:
+ case CTSF_SUM:
+ case CTSF_VARIANCE:
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ moments1_add (s->moments, value->f, weight);
+ break;
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+}
+
+static double
+ctables_summary_value (const struct ctables_freq *f,
+ union ctables_summary *s,
+ const struct ctables_summary_spec *ss)
+{
+ switch (ss->function)
+ {
+ case CTSF_COUNT:
+ case CTSF_ECOUNT:
+ return s->valid;
+
+ case CTSF_SUBTABLEPCT_COUNT:
+ return f->domains[CTDT_SUBTABLE]->valid ? s->valid / f->domains[CTDT_SUBTABLE]->valid * 100 : SYSMIS;
+
+ case CTSF_ROWPCT_COUNT:
+ return f->domains[CTDT_ROW]->valid ? s->valid / f->domains[CTDT_ROW]->valid * 100 : SYSMIS;
+
+ case CTSF_COLPCT_COUNT:
+ return f->domains[CTDT_COL]->valid ? s->valid / f->domains[CTDT_COL]->valid * 100 : SYSMIS;
+
+ case CTSF_TABLEPCT_COUNT:
+ return f->domains[CTDT_TABLE]->valid ? s->valid / f->domains[CTDT_TABLE]->valid * 100 : SYSMIS;
+
+ case CTSF_LAYERPCT_COUNT:
+ return f->domains[CTDT_LAYER]->valid ? s->valid / f->domains[CTDT_LAYER]->valid * 100 : SYSMIS;
+
+ case CTSF_LAYERROWPCT_COUNT:
+ return f->domains[CTDT_LAYERROW]->valid ? s->valid / f->domains[CTDT_LAYERROW]->valid * 100 : SYSMIS;
+
+ case CTSF_LAYERCOLPCT_COUNT:
+ return f->domains[CTDT_LAYERCOL]->valid ? s->valid / f->domains[CTDT_LAYERCOL]->valid * 100 : SYSMIS;
+
+ case CTSF_ROWPCT_VALIDN:
+ case CTSF_COLPCT_VALIDN:
+ case CTSF_TABLEPCT_VALIDN:
+ case CTSF_SUBTABLEPCT_VALIDN:
+ case CTSF_LAYERPCT_VALIDN:
+ case CTSF_LAYERROWPCT_VALIDN:
+ case CTSF_LAYERCOLPCT_VALIDN:
+ case CTSF_ROWPCT_TOTALN:
+ case CTSF_COLPCT_TOTALN:
+ case CTSF_TABLEPCT_TOTALN:
+ case CTSF_SUBTABLEPCT_TOTALN:
+ case CTSF_LAYERPCT_TOTALN:
+ case CTSF_LAYERROWPCT_TOTALN:
+ case CTSF_LAYERCOLPCT_TOTALN:
+ NOT_REACHED ();
+
+ case CSTF_TOTALN:
+ case CTSF_ETOTALN:
+ return s->valid + s->missing;
+
+ case CTSF_VALIDN:
+ case CTSF_EVALIDN:
+ return s->valid;
+
+ case CTSF_MAXIMUM:
+ return s->max;
+
+ case CTSF_MINIMUM:
+ return s->min;
+
+ case CTSF_RANGE:
+ return s->max != SYSMIS && s->min != SYSMIS ? s->max - s->min : SYSMIS;
+
+ case CTSF_MEAN:
+ {
+ double mean;
+ moments1_calculate (s->moments, NULL, &mean, NULL, NULL, NULL);
+ return mean;
+ }
+
+ case CTSF_SEMEAN:
+ {
+ double weight, variance;
+ moments1_calculate (s->moments, &weight, NULL, &variance, NULL, NULL);
+ return calc_semean (variance, weight);
+ }
+
+ case CTSF_STDDEV:
+ {
+ double variance;
+ moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
+ return variance != SYSMIS ? sqrt (variance) : SYSMIS;
+ }
+
+ case CTSF_SUM:
+ {
+ double weight, mean;
+ moments1_calculate (s->moments, &weight, &mean, NULL, NULL, NULL);
+ return weight != SYSMIS && mean != SYSMIS ? weight * mean : SYSMIS;
+ }
+
+ case CTSF_VARIANCE:
+ {
+ double variance;
+ moments1_calculate (s->moments, NULL, NULL, &variance, NULL, NULL);
+ return variance;
+ }
+
+ case CTSF_ROWPCT_SUM:
+ case CTSF_COLPCT_SUM:
+ case CTSF_TABLEPCT_SUM:
+ case CTSF_SUBTABLEPCT_SUM:
+ case CTSF_LAYERPCT_SUM:
+ case CTSF_LAYERROWPCT_SUM:
+ case CTSF_LAYERCOLPCT_SUM:
+ NOT_REACHED ();
+
+ case CTSF_MEDIAN:
+ case CTSF_MISSING:
+ case CTSF_MODE:
+ case CTSF_PTILE:
+ NOT_REACHED ();
+
+ case CTSF_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES:
+ case CTSF_COLPCT_RESPONSES:
+ case CTSF_TABLEPCT_RESPONSES:
+ case CTSF_SUBTABLEPCT_RESPONSES:
+ case CTSF_LAYERPCT_RESPONSES:
+ case CTSF_LAYERROWPCT_RESPONSES:
+ case CTSF_LAYERCOLPCT_RESPONSES:
+ case CTSF_ROWPCT_RESPONSES_COUNT:
+ case CTSF_COLPCT_RESPONSES_COUNT:
+ case CTSF_TABLEPCT_RESPONSES_COUNT:
+ case CTSF_SUBTABLEPCT_RESPONSES_COUNT:
+ case CTSF_LAYERPCT_RESPONSES_COUNT:
+ case CTSF_LAYERROWPCT_RESPONSES_COUNT:
+ case CTSF_LAYERCOLPCT_RESPONSES_COUNT:
+ case CTSF_ROWPCT_COUNT_RESPONSES:
+ case CTSF_COLPCT_COUNT_RESPONSES:
+ case CTSF_TABLEPCT_COUNT_RESPONSES:
+ case CTSF_SUBTABLEPCT_COUNT_RESPONSES:
+ case CTSF_LAYERPCT_COUNT_RESPONSES:
+ case CTSF_LAYERROWPCT_COUNT_RESPONSES:
+ case CTSF_LAYERCOLPCT_COUNT_RESPONSES:
+ NOT_REACHED ();
+ }
+
+ NOT_REACHED ();
+}
+
+struct ctables_freq_sort_aux
+ {
+ const struct ctables_table *t;
+ enum pivot_axis_type a;
+ };
+
+static int
+ctables_freq_compare_3way (const void *a_, const void *b_, const void *aux_)
+{
+ const struct ctables_freq_sort_aux *aux = aux_;
+ struct ctables_freq *const *ap = a_;
+ struct ctables_freq *const *bp = b_;
+ const struct ctables_freq *a = *ap;
+ const struct ctables_freq *b = *bp;
+
+ size_t a_idx = a->axes[aux->a].vaa_idx;
+ size_t b_idx = b->axes[aux->a].vaa_idx;
+ if (a_idx != b_idx)
+ return a_idx < b_idx ? -1 : 1;
+
+ const struct var_array *va = &aux->t->vaas[aux->a].vas[a_idx];
+ for (size_t i = 0; i < va->n; i++)
+ if (i != va->scale_idx)
+ {
+ const struct variable *var = va->vars[i];
+ const union value *val_a = &a->axes[aux->a].values[i];
+ const union value *val_b = &b->axes[aux->a].values[i];
+ int cmp = value_compare_3way (val_a, val_b, var_get_width (var));
+ if (!cmp)
+ continue;
+
+ const struct ctables_categories *cats = aux->t->categories[var_get_dict_index (var)];
+ if (cats && cats->n_values)
+ {
+ const struct ctables_cat_value *a_cv = ctables_categories_match (cats, val_a, var);
+ const struct ctables_cat_value *b_cv = ctables_categories_match (cats, val_b, var);
+ assert (a_cv && b_cv);
+ return (a_cv == b_cv ? cmp
+ : a_cv > b_cv ? 1
+ : -1);
+ }
+
+ return cmp;
+ }
+ return 0;
+}
+
+/* Algorithm:
+
+ For each row:
+ For each ctables_table:
+ For each combination of row vars:
+ For each combination of column vars:
+ For each combination of layer vars:
+ Add entry
+ Make a table of row values:
+ Sort entries by row values
+ Assign a 0-based index to each actual value
+ Construct a dimension
+ Make a table of column values
+ Make a table of layer values
+ For each entry:
+ Fill the table entry using the indexes from before.
+ */
+
+static struct ctables_domain *
+ctables_domain_insert (struct ctables_table *t, struct ctables_freq *f,
+ enum ctables_domain_type domain)
+{
+ size_t hash = 0;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ size_t idx = f->axes[a].vaa_idx;
+ const struct var_array *va = &t->vaas[a].vas[idx];
+ hash = hash_int (idx, hash);
+ for (size_t i = 0; i < va->n_domains[domain]; i++)
+ {
+ size_t v_idx = va->domains[domain][i];
+ hash = value_hash (&f->axes[a].values[v_idx],
+ var_get_width (va->vars[v_idx]), hash);
+ }
+ }
+
+ struct ctables_domain *d;
+ HMAP_FOR_EACH_WITH_HASH (d, struct ctables_domain, node, hash, &t->domains[domain])
{
- struct ctables_table *t = &ct->tables[i];
- struct var_array2 vaa = enumerate_fts (t->axes[PIVOT_AXIS_ROW]);
- vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_COLUMN]));
- vaa = nest_fts (vaa, enumerate_fts (t->axes[PIVOT_AXIS_LAYER]));
- for (size_t i = 0; i < vaa.n; i++)
+ const struct ctables_freq *df = d->example;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- for (size_t j = 0; j < vaa.vas[i].n; j++)
+ size_t idx = f->axes[a].vaa_idx;
+ if (idx != df->axes[a].vaa_idx)
+ goto not_equal;
+
+ const struct var_array *va = &t->vaas[a].vas[idx];
+ for (size_t i = 0; i < va->n_domains[domain]; i++)
{
- if (j)
- fputs (", ", stdout);
- fputs (var_get_name (vaa.vas[i].vars[j]), stdout);
+ size_t v_idx = va->domains[domain][i];
+ if (!value_equal (&df->axes[a].values[v_idx],
+ &f->axes[a].values[v_idx],
+ var_get_width (va->vars[v_idx])))
+ goto not_equal;
}
- putchar ('\n');
}
+ return d;
+
+ not_equal: ;
+ }
+
+ d = xmalloc (sizeof *d);
+ *d = (struct ctables_domain) { .example = f };
+ hmap_insert (&t->domains[domain], &d->node, hash);
+ return d;
+}
+
+static const struct ctables_cat_value *
+ctables_categories_match (const struct ctables_categories *cats,
+ const union value *v, const struct variable *var)
+{
+ const struct ctables_cat_value *othernm = NULL;
+ for (size_t i = cats->n_values; i-- > 0; )
+ {
+ const struct ctables_cat_value *cv = &cats->values[i];
+ switch (cv->type)
+ {
+ case CCVT_NUMBER:
+ if (cv->number == v->f)
+ return cv;
+ break;
+
+ case CCVT_STRING:
+ NOT_REACHED ();
+
+ case CCVT_RANGE:
+ if ((cv->range[0] == -DBL_MAX || v->f >= cv->range[0])
+ && (cv->range[1] == DBL_MAX || v->f <= cv->range[1]))
+ return cv;
+ break;
+
+ case CCVT_MISSING:
+ if (var_is_value_missing (var, v))
+ return cv;
+ break;
+
+ case CCVT_OTHERNM:
+ if (!othernm)
+ othernm = cv;
+ break;
+
+ case CCVT_SUBTOTAL:
+ case CCVT_HSUBTOTAL:
+ break;
+ }
+ }
- for (size_t j = 0; j < vaa.n; j++)
+ return var_is_value_missing (var, v) ? NULL : othernm;
+}
+
+static void
+ctables_freqtab_insert (struct ctables_table *t,
+ const struct ccase *c,
+ size_t ir, size_t ic, size_t il,
+ double weight)
+{
+ size_t ix[PIVOT_N_AXES] = {
+ [PIVOT_AXIS_ROW] = ir,
+ [PIVOT_AXIS_COLUMN] = ic,
+ [PIVOT_AXIS_LAYER] = il,
+ };
+ const struct var_array *ss = &t->vaas[t->summary_axis].vas[ix[t->summary_axis]];
+
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ for (size_t i = 0; i < va->n; i++)
{
- if (n_fts >= allocated_fts)
- fts = x2nrealloc (fts, &allocated_fts, sizeof *fts);
+ if (i == va->scale_idx)
+ continue;
- struct ctables_freqtab *ft = xmalloc (sizeof *ft);
- *ft = (struct ctables_freqtab) {
- .vars = vaa.vas[j],
- .data = HMAP_INITIALIZER (ft->data),
- };
- fts[n_fts++] = ft;
+ const struct ctables_categories *cats = t->categories[var_get_dict_index (va->vars[i])];
+ if (!cats || !cats->n_values)
+ continue;
+
+ if (!ctables_categories_match (cats, case_data (c, va->vars[i]), va->vars[i]))
+ return;
+ }
+ }
+
+ size_t hash = 0;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ hash = hash_int (ix[a], hash);
+ for (size_t i = 0; i < va->n; i++)
+ if (i != va->scale_idx)
+ hash = value_hash (case_data (c, va->vars[i]),
+ var_get_width (va->vars[i]), hash);
+ }
+
+ struct ctables_freq *f;
+ HMAP_FOR_EACH_WITH_HASH (f, struct ctables_freq, node, hash, &t->ft)
+ {
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ if (f->axes[a].vaa_idx != ix[a])
+ goto not_equal;
+ for (size_t i = 0; i < va->n; i++)
+ if (i != va->scale_idx
+ && !value_equal (case_data (c, va->vars[i]),
+ &f->axes[a].values[i],
+ var_get_width (va->vars[i])))
+ goto not_equal;
}
- free (vaa.vas);
+ goto summarize;
+
+ not_equal: ;
+ }
+
+ f = xmalloc (sizeof *f);
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ const struct var_array *va = &t->vaas[a].vas[ix[a]];
+ f->axes[a].vaa_idx = ix[a];
+ f->axes[a].values = (va->n
+ ? xnmalloc (va->n, sizeof *f->axes[a].values)
+ : NULL);
+ for (size_t i = 0; i < va->n; i++)
+ value_clone (&f->axes[a].values[i], case_data (c, va->vars[i]),
+ var_get_width (va->vars[i]));
+ }
+ f->summaries = xmalloc (ss->n_summaries * sizeof *f->summaries);
+ for (size_t i = 0; i < ss->n_summaries; i++)
+ ctables_summary_init (&f->summaries[i], &ss->summaries[i]);
+ for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
+ f->domains[dt] = ctables_domain_insert (t, f, dt);
+ hmap_insert (&t->ft, &f->node, hash);
+
+summarize:
+ for (size_t i = 0; i < ss->n_summaries; i++)
+ ctables_summary_add (&f->summaries[i], &ss->summaries[i], ss->summary_var,
+ case_data (c, ss->summary_var), weight);
+ for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
+ f->domains[dt]->valid += weight;
+}
+
+static bool
+ctables_execute (struct dataset *ds, struct ctables *ct)
+{
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = ct->tables[i];
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ if (t->axes[a])
+ {
+ t->vaas[a] = enumerate_fts (a, t->axes[a]);
+
+ for (size_t j = 0; j < t->vaas[a].n; j++)
+ {
+ struct var_array *va = &t->vaas[a].vas[j];
+ for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
+ {
+ va->domains[dt] = xmalloc (va->n * sizeof *va->domains[dt]);
+ va->n_domains[dt] = 0;
+
+ for (size_t k = 0; k < va->n; k++)
+ {
+ if (k == va->scale_idx)
+ continue;
+
+ switch (dt)
+ {
+ case CTDT_TABLE:
+ continue;
+
+ case CTDT_LAYER:
+ if (a != PIVOT_AXIS_LAYER)
+ continue;
+ break;
+
+ case CTDT_SUBTABLE:
+ case CTDT_ROW:
+ case CTDT_COL:
+ if (dt == CTDT_SUBTABLE ? a != PIVOT_AXIS_LAYER
+ : dt == CTDT_ROW ? a == PIVOT_AXIS_COLUMN
+ : a == PIVOT_AXIS_ROW)
+ {
+ if (k == va->n - 1
+ || (va->scale_idx == va->n - 1
+ && k == va->n - 2))
+ continue;
+ }
+ break;
+
+ case CTDT_LAYERROW:
+ if (a == PIVOT_AXIS_COLUMN)
+ continue;
+ break;
+
+ case CTDT_LAYERCOL:
+ if (a == PIVOT_AXIS_ROW)
+ continue;
+ break;
+ }
+
+ va->domains[dt][va->n_domains[dt]++] = k;
+ }
+ }
+ }
+ }
+ else
+ {
+ struct var_array *va = xmalloc (sizeof *va);
+ *va = (struct var_array) { .n = 0 };
+ t->vaas[a] = (struct var_array2) { .vas = va, .n = 1 };
+ }
+
+ for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++)
+ {
+ struct var_array *va = &t->vaas[t->summary_axis].vas[i];
+ if (!va->n_summaries)
+ {
+ va->summaries = xmalloc (sizeof *va->summaries);
+ va->n_summaries = 1;
+
+ enum ctables_summary_function function
+ = va->summary_var ? CTSF_MEAN : CTSF_COUNT;
+ struct ctables_var var = { .is_mrset = false, .var = va->summary_var };
+
+ *va->summaries = (struct ctables_summary_spec) {
+ .function = function,
+ .format = ctables_summary_default_format (function, &var),
+ .label = ctables_summary_default_label (function, 0),
+ };
+ if (!va->summary_var)
+ va->summary_var = va->vars[0];
+ }
+ }
}
struct casereader *input = casereader_create_filter_weight (proc_open (ds),
dataset_dict (ds),
NULL, NULL);
bool warn_on_invalid = true;
+ double total_weight = 0;
for (struct ccase *c = casereader_read (input); c;
case_unref (c), c = casereader_read (input))
{
double weight = dict_get_case_weight (dataset_dict (ds), c,
&warn_on_invalid);
- for (size_t i = 0; i < n_fts; i++)
+ total_weight += weight;
+
+ for (size_t i = 0; i < ct->n_tables; i++)
{
- struct ctables_freqtab *ft = fts[i];
+ struct ctables_table *t = ct->tables[i];
- size_t hash = 0;
+ for (size_t ir = 0; ir < t->vaas[PIVOT_AXIS_ROW].n; ir++)
+ for (size_t ic = 0; ic < t->vaas[PIVOT_AXIS_COLUMN].n; ic++)
+ for (size_t il = 0; il < t->vaas[PIVOT_AXIS_LAYER].n; il++)
+ ctables_freqtab_insert (t, c, ir, ic, il, weight);
+ }
+ }
+ casereader_destroy (input);
- for (size_t j = 0; j < ft->vars.n; j++)
+ for (size_t i = 0; i < ct->n_tables; i++)
+ {
+ struct ctables_table *t = ct->tables[i];
+
+ struct pivot_table *pt = pivot_table_create__ (
+ (t->title
+ ? pivot_value_new_user_text (t->title, SIZE_MAX)
+ : pivot_value_new_text (N_("Custom Tables"))),
+ NULL);
+ if (t->caption)
+ pivot_table_set_caption (
+ pt, pivot_value_new_user_text (t->caption, SIZE_MAX));
+ if (t->corner)
+ pivot_table_set_caption (
+ pt, pivot_value_new_user_text (t->corner, SIZE_MAX));
+
+ pivot_table_set_look (pt, ct->look);
+ struct pivot_dimension *d[PIVOT_N_AXES];
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ static const char *names[] = {
+ [PIVOT_AXIS_ROW] = N_("Rows"),
+ [PIVOT_AXIS_COLUMN] = N_("Columns"),
+ [PIVOT_AXIS_LAYER] = N_("Layers"),
+ };
+ d[a] = (t->axes[a] || a == t->summary_axis
+ ? pivot_dimension_create (pt, a, names[a])
+ : NULL);
+ if (!d[a])
+ continue;
+
+ assert (t->axes[a]);
+
+ struct ctables_freq **sorted = xnmalloc (t->ft.count, sizeof *sorted);
+
+ struct ctables_freq *f;
+ size_t n = 0;
+ HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft)
+ sorted[n++] = f;
+ assert (n == t->ft.count);
+
+ struct ctables_freq_sort_aux aux = { .t = t, .a = a };
+ sort (sorted, n, sizeof *sorted, ctables_freq_compare_3way, &aux);
+
+ size_t max_depth = 0;
+ for (size_t j = 0; j < t->vaas[a].n; j++)
+ if (t->vaas[a].vas[j].n > max_depth)
+ max_depth = t->vaas[a].vas[j].n;
+
+ struct pivot_category **groups = xnmalloc (max_depth, sizeof *groups);
+ struct pivot_category *top = NULL;
+ int prev_leaf = 0;
+ for (size_t j = 0; j < n; j++)
{
- const struct variable *var = ft->vars.vars[j];
- hash = value_hash (case_data (c, var), var_get_width (var), hash);
- }
+ struct ctables_freq *f = sorted[j];
+ const struct var_array *va = &t->vaas[a].vas[f->axes[a].vaa_idx];
- struct freq *f;
- HMAP_FOR_EACH_WITH_HASH (f, struct freq, node, hash, &ft->data)
- {
- for (size_t j = 0; j < ft->vars.n; j++)
+ size_t n_common = 0;
+ bool new_subtable = false;
+ if (j > 0)
{
- const struct variable *var = ft->vars.vars[j];
- if (!value_equal (case_data (c, var), &f->values[j],
- var_get_width (var)))
- goto next_hash_node;
+ struct ctables_freq *prev = sorted[j - 1];
+ if (prev->axes[a].vaa_idx == f->axes[a].vaa_idx)
+ {
+ for (; n_common < va->n; n_common++)
+ if (n_common != va->scale_idx
+ && !value_equal (&prev->axes[a].values[n_common],
+ &f->axes[a].values[n_common],
+ var_get_type (va->vars[n_common])))
+ break;
+ }
+ else
+ new_subtable = true;
}
+ else
+ new_subtable = true;
- f->count += weight;
- goto next_ft;
+ if (new_subtable)
+ {
+ enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (va->vars[0])];
+ top = d[a]->root;
+ if (vlabel != CTVL_NONE)
+ top = pivot_category_create_group__ (
+ top, pivot_value_new_variable (va->vars[0]));
+ }
+ if (n_common == va->n)
+ {
+ f->axes[a].leaf = prev_leaf;
+ continue;
+ }
- next_hash_node: ;
- }
+ for (size_t k = n_common; k < va->n; k++)
+ {
+ struct pivot_category *parent = k > 0 ? groups[k - 1] : top;
+
+ struct pivot_value *label
+ = (k != va->scale_idx
+ ? pivot_value_new_var_value (va->vars[k],
+ &f->axes[a].values[k])
+ : NULL);
+ if (k == va->n - 1)
+ {
+ if (a == t->summary_axis)
+ {
+ if (label)
+ parent = pivot_category_create_group__ (parent, label);
+ for (size_t m = 0; m < va->n_summaries; m++)
+ {
+ int leaf = pivot_category_create_leaf (
+ parent, pivot_value_new_text (va->summaries[m].label));
+ if (m == 0)
+ prev_leaf = leaf;
+ }
+ }
+ else
+ {
+ /* This assertion is true as long as the summary axis
+ is the axis where the summaries are displayed. */
+ assert (label);
- f = xmalloc (table_entry_size (ft->vars.n));
- f->count = weight;
- for (size_t j = 0; j < ft->vars.n; j++)
- {
- const struct variable *var = ft->vars.vars[j];
- value_clone (&f->values[j], case_data (c, var),
- var_get_width (var));
- }
- hmap_insert (&ft->data, &f->node, hash);
+ prev_leaf = pivot_category_create_leaf (parent, label);
+ }
+ break;
+ }
+
+ if (label)
+ parent = pivot_category_create_group__ (parent, label);
- next_ft: ;
+ enum ctables_vlabel vlabel = ct->vlabels[var_get_dict_index (va->vars[k + 1])];
+ if (vlabel != CTVL_NONE)
+ parent = pivot_category_create_group__ (
+ parent, pivot_value_new_variable (va->vars[k + 1]));
+ groups[k] = parent;
+ }
+
+ f->axes[a].leaf = prev_leaf;
+ }
+ free (sorted);
+ free (groups);
+ }
+ struct ctables_freq *f;
+ HMAP_FOR_EACH (f, struct ctables_freq, node, &t->ft)
+ {
+ const struct var_array *ss = &t->vaas[t->summary_axis].vas[f->axes[t->summary_axis].vaa_idx];
+ for (size_t j = 0; j < ss->n_summaries; j++)
+ {
+ size_t dindexes[3];
+ size_t n_dindexes = 0;
+
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ if (d[a])
+ {
+ int leaf = f->axes[a].leaf;
+ if (a == t->summary_axis)
+ leaf += j;
+ dindexes[n_dindexes++] = leaf;
+ }
+
+ double d = ctables_summary_value (f, &f->summaries[j], &ss->summaries[j]);
+ struct pivot_value *value = pivot_value_new_number (d);
+ value->numeric.format = ss->summaries[j].format;
+ pivot_table_put (pt, dindexes, n_dindexes, value);
+ }
}
+
+ pivot_table_submit (pt);
}
- casereader_destroy (input);
- for (size_t i = 0; i < n_fts; i++)
+#if 0
+ for (size_t i = 0; i < ct->n_tables; i++)
{
- struct ctables_freqtab *ft = fts[i];
- struct freq *f, *next;
- HMAP_FOR_EACH_SAFE (f, next, struct freq, node, &ft->data)
+ struct ctables_table *t = ct->tables[i];
+
+ for (size_t j = 0; j < t->n_fts; j++)
{
- hmap_delete (&ft->data, &f->node);
- for (size_t j = 0; j < ft->vars.n; j++)
+ struct ctables_freqtab *ft = t->fts[j];
+ struct ctables_freq *f, *next;
+ HMAP_FOR_EACH_SAFE (f, next, struct ctables_freq, node, &ft->data)
{
- const struct variable *var = ft->vars.vars[j];
- value_destroy (&f->values[j], var_get_width (var));
+ hmap_delete (&ft->data, &f->node);
+ for (size_t k = 0; k < ft->n_summaries; k++)
+ ctables_summary_uninit (&f->summaries[k], &ft->summaries[k]);
+ free (f->summaries);
+ for (size_t k = 0; k < ft->vars.n; k++)
+ {
+ const struct variable *var = ft->vars.vars[k];
+ value_destroy (&f->values[k], var_get_width (var));
+ }
+ free (f);
}
- free (f);
+ hmap_destroy (&ft->data);
+ var_array_uninit (&ft->vars);
+ free (ft);
}
- hmap_destroy (&ft->data);
- var_array_uninit (&ft->vars);
- free (ft);
+ free (t->fts);
}
- free (fts);
-
+#endif
+
return proc_commit (ds);
}
{
size_t n_vars = dict_get_n_vars (dataset_dict (ds));
enum ctables_vlabel *vlabels = xnmalloc (n_vars, sizeof *vlabels);
+ enum settings_value_show tvars = settings_get_show_variables ();
for (size_t i = 0; i < n_vars; i++)
- vlabels[i] = CTVL_DEFAULT;
+ vlabels[i] = (enum ctables_vlabel) tvars;
struct ctables *ct = xmalloc (sizeof *ct);
*ct = (struct ctables) {
.vlabels = vlabels,
.hide_threshold = 5,
};
+ ct->look->omit_empty = false;
if (!lex_force_match (lexer, T_SLASH))
goto error;
enum ctables_vlabel vlabel;
if (lex_match_id (lexer, "DEFAULT"))
- vlabel = CTVL_DEFAULT;
+ vlabel = (enum ctables_vlabel) settings_get_show_variables ();
else if (lex_match_id (lexer, "NAME"))
vlabel = CTVL_NAME;
else if (lex_match_id (lexer, "LABEL"))
ct->tables = x2nrealloc (ct->tables, &allocated_tables,
sizeof *ct->tables);
- struct ctables_table *t = &ct->tables[ct->n_tables++];
+ struct ctables_table *t = xmalloc (sizeof *t);
*t = (struct ctables_table) {
+ .ft = HMAP_INITIALIZER (t->ft),
.slabels_position = PIVOT_AXIS_COLUMN,
.slabels_visible = true,
.row_labels = CTLP_NORMAL,
.n_categories = dict_get_n_vars (dataset_dict (ds)),
.cilevel = 95,
};
+ for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
+ hmap_init (&t->domains[dt]);
+ ct->tables[ct->n_tables++] = t;
lex_match (lexer, T_EQUALS);
if (!ctables_axis_parse (lexer, dataset_dict (ds), ct, t, PIVOT_AXIS_ROW))
const struct ctables_axis *scales[PIVOT_N_AXES];
size_t n_scales = 0;
- for (size_t i = 0; i < 3; i++)
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
- scales[i] = find_scale (t->axes[i]);
- if (scales[i])
+ scales[a] = find_scale (t->axes[a]);
+ if (scales[a])
n_scales++;
}
if (n_scales > 1)
{
- msg (SE, _("Scale variables may appear only on one dimension."));
+ msg (SE, _("Scale variables may appear only on one axis."));
if (scales[PIVOT_AXIS_ROW])
msg_at (SN, scales[PIVOT_AXIS_ROW]->loc,
- _("This scale variable appears in the rows dimension."));
+ _("This scale variable appears on the rows axis."));
if (scales[PIVOT_AXIS_COLUMN])
msg_at (SN, scales[PIVOT_AXIS_COLUMN]->loc,
- _("This scale variable appears in the columns dimension."));
+ _("This scale variable appears on the columns axis."));
if (scales[PIVOT_AXIS_LAYER])
msg_at (SN, scales[PIVOT_AXIS_LAYER]->loc,
- _("This scale variable appears in the layer dimension."));
+ _("This scale variable appears on the layer axis."));
goto error;
}
+ const struct ctables_axis *summaries[PIVOT_N_AXES];
+ size_t n_summaries = 0;
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ {
+ summaries[a] = (scales[a]
+ ? scales[a]
+ : find_categorical_summary_spec (t->axes[a]));
+ if (summaries[a])
+ n_summaries++;
+ }
+ if (n_summaries > 1)
+ {
+ msg (SE, _("Summaries may appear only on one axis."));
+ if (summaries[PIVOT_AXIS_ROW])
+ msg_at (SN, summaries[PIVOT_AXIS_ROW]->loc,
+ _("This variable on the rows axis has a summary."));
+ if (summaries[PIVOT_AXIS_COLUMN])
+ msg_at (SN, summaries[PIVOT_AXIS_COLUMN]->loc,
+ _("This variable on the columns axis has a summary."));
+ if (summaries[PIVOT_AXIS_LAYER])
+ msg_at (SN, summaries[PIVOT_AXIS_LAYER]->loc,
+ _("This variable on the layers axis has a summary."));
+ goto error;
+ }
+ for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
+ if (n_summaries ? summaries[a] : t->axes[a])
+ {
+ t->summary_axis = a;
+ break;
+ }
+
if (lex_token (lexer) == T_ENDCMD)
break;
if (!lex_force_match (lexer, T_SLASH))