projects
/
pspp
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
f235067
)
work on configurable summaries for totals
author
Ben Pfaff
<blp@cs.stanford.edu>
Thu, 13 Jan 2022 05:51:10 +0000
(21:51 -0800)
committer
Ben Pfaff
<blp@cs.stanford.edu>
Sat, 2 Apr 2022 01:48:55 +0000
(18:48 -0700)
src/language/stats/ctables.c
patch
|
blob
|
history
diff --git
a/src/language/stats/ctables.c
b/src/language/stats/ctables.c
index 74afa0136da77b7dd10570f38c8be122f247d827..e5c1328f039dd3f9bcb3639c2cd497d967f3bc67 100644
(file)
--- a/
src/language/stats/ctables.c
+++ b/
src/language/stats/ctables.c
@@
-181,6
+181,7
@@
struct ctables_cell
struct ctables_domain *domains[N_CTDTS];
bool hide;
struct ctables_domain *domains[N_CTDTS];
bool hide;
+ bool total;
struct
{
struct
{
@@
-276,6
+277,17
@@
enum ctables_label_position
CTLP_LAYER,
};
CTLP_LAYER,
};
+struct ctables_summary_spec_set
+ {
+ struct ctables_summary_spec *summaries;
+ size_t n;
+ size_t allocated;
+
+ struct variable *var;
+ };
+
+static void ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *);
+
struct var_array
{
struct variable **vars;
struct var_array
{
struct variable **vars;
@@
-284,9
+296,8
@@
struct var_array
size_t *domains[N_CTDTS];
size_t n_domains[N_CTDTS];
size_t *domains[N_CTDTS];
size_t n_domains[N_CTDTS];
- struct ctables_summary_spec *summaries;
- size_t n_summaries;
- struct variable *summary_var;
+ struct ctables_summary_spec_set cell_summaries;
+ struct ctables_summary_spec_set total_summaries;
};
struct var_array2
};
struct var_array2
@@
-487,9
+498,8
@@
struct ctables_axis
{
struct ctables_var var;
bool scale;
{
struct ctables_var var;
bool scale;
- struct ctables_summary_spec *summaries;
- size_t n_summaries;
- size_t allocated_summaries;
+ struct ctables_summary_spec_set cell_summaries;
+ struct ctables_summary_spec_set total_summaries;
};
/* Nonterminals. */
};
/* Nonterminals. */
@@
-530,6
+540,14
@@
ctables_summary_spec_uninit (struct ctables_summary_spec *s)
free (s->label);
}
free (s->label);
}
+static void
+ctables_summary_spec_set_uninit (struct ctables_summary_spec_set *set)
+{
+ for (size_t i = 0; i < set->n; i++)
+ ctables_summary_spec_uninit (&set->summaries[i]);
+ free (set->summaries);
+}
+
static bool
parse_col_width (struct lexer *lexer, const char *name, double *width)
{
static bool
parse_col_width (struct lexer *lexer, const char *name, double *width)
{
@@
-623,9
+641,8
@@
ctables_axis_destroy (struct ctables_axis *axis)
switch (axis->op)
{
case CTAO_VAR:
switch (axis->op)
{
case CTAO_VAR:
- for (size_t i = 0; i < axis->n_summaries; i++)
- ctables_summary_spec_uninit (&axis->summaries[i]);
- free (axis->summaries);
+ ctables_summary_spec_set_uninit (&axis->cell_summaries);
+ ctables_summary_spec_set_uninit (&axis->total_summaries);
break;
case CTAO_STACK:
break;
case CTAO_STACK:
@@
-716,15
+733,10
@@
static bool
add_summary_spec (struct ctables_axis *axis,
enum ctables_summary_function function, double percentile,
const char *label, const struct fmt_spec *format,
add_summary_spec (struct ctables_axis *axis,
enum ctables_summary_function function, double percentile,
const char *label, const struct fmt_spec *format,
- const struct msg_location *loc)
+ const struct msg_location *loc
, bool totals
)
{
if (axis->op == CTAO_VAR)
{
{
if (axis->op == CTAO_VAR)
{
- if (axis->n_summaries >= axis->allocated_summaries)
- axis->summaries = x2nrealloc (axis->summaries,
- &axis->allocated_summaries,
- sizeof *axis->summaries);
-
const char *function_name = ctables_summary_function_name (function);
const char *var_name = ctables_var_name (&axis->var);
switch (ctables_function_availability (function))
const char *function_name = ctables_summary_function_name (function);
const char *var_name = ctables_var_name (&axis->var);
switch (ctables_function_availability (function))
@@
-756,7
+768,13
@@
add_summary_spec (struct ctables_axis *axis,
break;
}
break;
}
- struct ctables_summary_spec *dst = &axis->summaries[axis->n_summaries++];
+ struct ctables_summary_spec_set *set = (totals ? &axis->total_summaries
+ : &axis->cell_summaries);
+ if (set->n >= set->allocated)
+ set->summaries = x2nrealloc (set->summaries, &set->allocated,
+ sizeof *set->summaries);
+
+ struct ctables_summary_spec *dst = &set->summaries[set->n++];
*dst = (struct ctables_summary_spec) {
.function = function,
.percentile = percentile,
*dst = (struct ctables_summary_spec) {
.function = function,
.percentile = percentile,
@@
-770,7
+788,7
@@
add_summary_spec (struct ctables_axis *axis,
{
for (size_t i = 0; i < 2; i++)
if (!add_summary_spec (axis->subs[i], function, percentile, label,
{
for (size_t i = 0; i < 2; i++)
if (!add_summary_spec (axis->subs[i], function, percentile, label,
- format, loc))
+ format, loc
, totals
))
return false;
return true;
}
return false;
return true;
}
@@
-851,7
+869,8
@@
ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
if (!sub || !lex_match (ctx->lexer, T_LBRACK))
return sub;
if (!sub || !lex_match (ctx->lexer, T_LBRACK))
return sub;
- do
+ bool totals = false;
+ for (;;)
{
int start_ofs = lex_ofs (ctx->lexer);
{
int start_ofs = lex_ofs (ctx->lexer);
@@
-899,15
+918,28
@@
ctables_axis_parse_postfix (struct ctables_axis_parse_ctx *ctx)
struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
lex_ofs (ctx->lexer) - 1);
struct msg_location *loc = lex_ofs_location (ctx->lexer, start_ofs,
lex_ofs (ctx->lexer) - 1);
- add_summary_spec (sub, function, percentile, label, formatp, loc);
+ add_summary_spec (sub, function, percentile, label, formatp, loc,
+ totals);
free (label);
msg_location_destroy (loc);
free (label);
msg_location_destroy (loc);
- lex_match (ctx->lexer, T_COMMA);
+ if (lex_match (ctx->lexer, T_COMMA))
+ {
+ if (!totals && lex_match_id (ctx->lexer, "TOTALS"))
+ {
+ if (!lex_force_match (ctx->lexer, T_LBRACK))
+ goto error;
+ }
+ }
+ else if (lex_force_match (ctx->lexer, T_RBRACK))
+ {
+ if (totals && !lex_force_match (ctx->lexer, T_RBRACK))
+ goto error;
+ return sub;
+ }
+ else
+ goto error;
}
}
- while (!lex_match (ctx->lexer, T_RBRACK));
-
- return sub;
error:
ctables_axis_destroy (sub);
error:
ctables_axis_destroy (sub);
@@
-947,7
+979,7
@@
find_categorical_summary_spec (const struct ctables_axis *axis)
if (!axis)
return NULL;
else if (axis->op == CTAO_VAR)
if (!axis)
return NULL;
else if (axis->op == CTAO_VAR)
- return !axis->scale && axis->
n_summaries
? axis : NULL;
+ return !axis->scale && axis->
cell_summaries.n
? axis : NULL;
else
{
for (size_t i = 0; i < 2; i++)
else
{
for (size_t i = 0; i < 2; i++)
@@
-1446,9
+1478,9
@@
nest_fts (struct var_array2 va0, struct var_array2 va1)
assert (n == allocate);
const struct var_array *summary_src;
assert (n == allocate);
const struct var_array *summary_src;
- if (!a->
summary_
var)
+ if (!a->
cell_summaries.
var)
summary_src = b;
summary_src = b;
- else if (!b->
summary_
var)
+ else if (!b->
cell_summaries.
var)
summary_src = a;
else
NOT_REACHED ();
summary_src = a;
else
NOT_REACHED ();
@@
-1458,9
+1490,8
@@
nest_fts (struct var_array2 va0, struct var_array2 va1)
: b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
: SIZE_MAX),
.n = n,
: b->scale_idx != SIZE_MAX ? a->n + b->scale_idx
: SIZE_MAX),
.n = n,
- .summaries = summary_src->summaries,
- .n_summaries = summary_src->n_summaries,
- .summary_var = summary_src->summary_var,
+ .cell_summaries = summary_src->cell_summaries,
+ .total_summaries = summary_src->total_summaries,
};
}
var_array2_uninit (&va0);
};
}
var_array2_uninit (&va0);
@@
-1502,11
+1533,12
@@
enumerate_fts (enum pivot_axis_type axis_type, const struct ctables_axis *a)
.n = 1,
.scale_idx = a->scale ? 0 : SIZE_MAX,
};
.n = 1,
.scale_idx = a->scale ? 0 : SIZE_MAX,
};
- if (a->
n_summaries
|| a->scale)
+ if (a->
cell_summaries.n
|| a->scale)
{
{
- va->summaries = a->summaries;
- va->n_summaries = a->n_summaries;
- va->summary_var = a->var.var;
+ va->cell_summaries = a->cell_summaries;
+ va->total_summaries = a->total_summaries;
+ va->cell_summaries.var = a->var.var;
+ va->total_summaries.var = a->var.var;
}
return (struct var_array2) { .vas = va, .n = 1 };
}
return (struct var_array2) { .vas = va, .n = 1 };
@@
-2180,6
+2212,7
@@
ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
const struct var_array *ss = &t->vaas[t->summary_axis].vas[ix[t->summary_axis]];
size_t hash = 0;
const struct var_array *ss = &t->vaas[t->summary_axis].vas[ix[t->summary_axis]];
size_t hash = 0;
+ bool total = false;
for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
const struct var_array *va = &t->vaas[a].vas[ix[a]];
for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
const struct var_array *va = &t->vaas[a].vas[ix[a]];
@@
-2193,6
+2226,8
@@
ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
&& cats[a][i]->type != CCT_HSUBTOTAL)
hash = value_hash (case_data (c, va->vars[i]),
var_get_width (va->vars[i]), hash);
&& cats[a][i]->type != CCT_HSUBTOTAL)
hash = value_hash (case_data (c, va->vars[i]),
var_get_width (va->vars[i]), hash);
+ else
+ total = true;
}
}
}
}
@@
-2223,6
+2258,7
@@
ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
cell = xmalloc (sizeof *cell);
cell->hide = false;
cell = xmalloc (sizeof *cell);
cell->hide = false;
+ cell->total = total;
for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
const struct var_array *va = &t->vaas[a].vas[ix[a]];
for (enum pivot_axis_type a = 0; a < PIVOT_N_AXES; a++)
{
const struct var_array *va = &t->vaas[a].vas[ix[a]];
@@
-2244,17
+2280,22
@@
ctables_cell_insert__ (struct ctables_table *t, const struct ccase *c,
var_get_width (va->vars[i]));
}
}
var_get_width (va->vars[i]));
}
}
- cell->summaries = xmalloc (ss->n_summaries * sizeof *cell->summaries);
- for (size_t i = 0; i < ss->n_summaries; i++)
- ctables_summary_init (&cell->summaries[i], &ss->summaries[i]);
+
+ {
+ const struct ctables_summary_spec_set *sss = &ss->cell_summaries;
+ cell->summaries = xmalloc (sss->n * sizeof *cell->summaries);
+ for (size_t i = 0; i < sss->n; i++)
+ ctables_summary_init (&cell->summaries[i], &sss->summaries[i]);
+ }
for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
cell->domains[dt] = ctables_domain_insert (t, cell, dt);
hmap_insert (&t->cells, &cell->node, hash);
for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
cell->domains[dt] = ctables_domain_insert (t, cell, dt);
hmap_insert (&t->cells, &cell->node, hash);
-summarize:
- for (size_t i = 0; i < ss->n_summaries; i++)
- ctables_summary_add (&cell->summaries[i], &ss->summaries[i], ss->summary_var,
- case_data (c, ss->summary_var), weight);
+summarize: ;
+ const struct ctables_summary_spec_set *sss = &ss->cell_summaries;
+ for (size_t i = 0; i < sss->n; i++)
+ ctables_summary_add (&cell->summaries[i], &sss->summaries[i], sss->var,
+ case_data (c, sss->var), weight);
for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
cell->domains[dt]->valid += weight;
}
for (enum ctables_domain_type dt = 0; dt < N_CTDTS; dt++)
cell->domains[dt]->valid += weight;
}
@@
-2422,22
+2463,25
@@
ctables_execute (struct dataset *ds, struct ctables *ct)
for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++)
{
struct var_array *va = &t->vaas[t->summary_axis].vas[i];
for (size_t i = 0; i < t->vaas[t->summary_axis].n; i++)
{
struct var_array *va = &t->vaas[t->summary_axis].vas[i];
- if (!va->
n_summaries
)
+ if (!va->
cell_summaries.n
)
{
{
- va->summaries = xmalloc (sizeof *va->summaries);
- va->n_summaries = 1;
+ struct ctables_summary_spec_set *css = &va->cell_summaries;
+ css->summaries = xmalloc (sizeof *css->summaries);
+ css->n = 1;
enum ctables_summary_function function
enum ctables_summary_function function
- =
va->summary_
var ? CTSF_MEAN : CTSF_COUNT;
- struct ctables_var var = { .is_mrset = false, .var =
va->summary_
var };
+ =
css->
var ? CTSF_MEAN : CTSF_COUNT;
+ struct ctables_var var = { .is_mrset = false, .var =
css->
var };
- *
va
->summaries = (struct ctables_summary_spec) {
+ *
css
->summaries = (struct ctables_summary_spec) {
.function = function,
.format = ctables_summary_default_format (function, &var),
.label = ctables_summary_default_label (function, 0),
};
.function = function,
.format = ctables_summary_default_format (function, &var),
.label = ctables_summary_default_label (function, 0),
};
- if (!va->summary_var)
- va->summary_var = va->vars[0];
+ if (!css->var)
+ css->var = va->vars[0];
+
+ va->total_summaries = va->cell_summaries;
}
}
}
}
}
}
@@
-2579,10
+2623,10
@@
ctables_execute (struct dataset *ds, struct ctables *ct)
{
if (label)
parent = pivot_category_create_group__ (parent, label);
{
if (label)
parent = pivot_category_create_group__ (parent, label);
- for (size_t m = 0; m < va->
n_summaries
; m++)
+ for (size_t m = 0; m < va->
cell_summaries.n
; m++)
{
int leaf = pivot_category_create_leaf (
{
int leaf = pivot_category_create_leaf (
- parent, pivot_value_new_text (va->summaries[m].label));
+ parent, pivot_value_new_text (va->
cell_summaries.
summaries[m].label));
if (m == 0)
prev_leaf = leaf;
}
if (m == 0)
prev_leaf = leaf;
}
@@
-2619,8
+2663,8
@@
ctables_execute (struct dataset *ds, struct ctables *ct)
if (cell->hide)
continue;
if (cell->hide)
continue;
- const struct
var_array *ss = &t->vaas[t->summary_axis].vas[cell->axes[t->summary_axis].vaa_idx]
;
- for (size_t j = 0; j < ss
->n_summaries
; j++)
+ const struct
ctables_summary_spec_set *sss = &t->vaas[t->summary_axis].vas[cell->axes[t->summary_axis].vaa_idx].cell_summaries
;
+ for (size_t j = 0; j < ss
s->n
; j++)
{
size_t dindexes[3];
size_t n_dindexes = 0;
{
size_t dindexes[3];
size_t n_dindexes = 0;
@@
-2634,9
+2678,9
@@
ctables_execute (struct dataset *ds, struct ctables *ct)
dindexes[n_dindexes++] = leaf;
}
dindexes[n_dindexes++] = leaf;
}
- double d = ctables_summary_value (cell, &cell->summaries[j], &ss->summaries[j]);
+ double d = ctables_summary_value (cell, &cell->summaries[j], &ss
s
->summaries[j]);
struct pivot_value *value = pivot_value_new_number (d);
struct pivot_value *value = pivot_value_new_number (d);
- value->numeric.format = ss->summaries[j].format;
+ value->numeric.format = ss
s
->summaries[j].format;
pivot_table_put (pt, dindexes, n_dindexes, value);
}
}
pivot_table_put (pt, dindexes, n_dindexes, value);
}
}