From: Ben Pfaff Date: Mon, 15 Mar 2010 05:00:50 +0000 (-0700) Subject: work on pivot table and FREQUENCIES usage X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp;a=commitdiff_plain;h=refs%2Fheads%2Fpivot-table work on pivot table and FREQUENCIES usage --- diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index c360713b56..8b256c78a7 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -49,6 +49,7 @@ #include "output/chart-item.h" #include "output/charts/piechart.h" #include "output/charts/plot-hist.h" +#include "output/pivot-table.h" #include "output/tab.h" #include "gl/minmax.h" @@ -162,6 +163,7 @@ struct frq_chart struct freq_tab { struct casewriter *sorter; + struct casereader *data; struct freq *valid; /* Valid freqs. */ int n_valid; /* Number of valid freqs. */ @@ -596,6 +598,7 @@ postprocess_freq_tab (const struct frq_proc *frq, struct var_freqs *vf) /* Extract data from hash table. */ reader = casewriter_make_reader (ft->sorter); + ft->data = casereader_clone (reader); freqs = xnmalloc (casereader_count_cases (reader), sizeof *freqs); for (count = 0; (c = casereader_read (reader)) != NULL; count++) { @@ -649,6 +652,7 @@ cleanup_freq_tab (struct var_freqs *vf) for (i = 0; i < vf->tab.n_valid + vf->tab.n_missing; i++) value_destroy (&vf->tab.valid[i].value, vf->width); } + casereader_destroy (vf->tab.data); free (vf->tab.valid); } @@ -843,83 +847,68 @@ add_percentile (struct frq_proc *frq, double x, bool show, static void dump_freq_table (const struct var_freqs *vf, const struct variable *wv) { - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - const struct freq_tab *ft = &vf->tab; - int n_categories; - struct freq *f; - struct tab_table *t; - int r, x; - double cum_total = 0.0; - double cum_freq = 0.0; - - static const char *headings[] = { - N_("Value Label"), - N_("Value"), - N_("Frequency"), - N_("Percent"), - N_("Valid Percent"), - N_("Cum Percent") - }; - - n_categories = ft->n_valid + ft->n_missing; - t = tab_create (6, n_categories + 2); - tab_headers (t, 0, 0, 1, 0); - - for (x = 0; x < 6; x++) - tab_text (t, x, 0, TAB_CENTER | TAT_TITLE, gettext (headings[x])); - - r = 1; - for (f = ft->valid; f < ft->missing; f++) - { - const char *label; - double percent, valid_percent; + struct variable *value_var, *freq_var; + struct pivot_table *pt; - cum_freq += f->count; + pt = xmalloc (sizeof *pt); - percent = f->count / ft->total_cases * 100.0; - valid_percent = f->count / ft->valid_cases * 100.0; - cum_total += valid_percent; + pt->data = casereader_clone (vf->tab.data); - label = var_lookup_value_label (vf->var, &f->value); - if (label != NULL) - tab_text (t, 0, r, TAB_LEFT, label); - - tab_value (t, 1, r, TAB_NONE, &f->value, ft->dict, &vf->print); - tab_double (t, 2, r, TAB_NONE, f->count, wfmt); - tab_double (t, 3, r, TAB_NONE, percent, NULL); - tab_double (t, 4, r, TAB_NONE, valid_percent, NULL); - tab_double (t, 5, r, TAB_NONE, cum_total, NULL); - r++; - } - for (; f < &ft->valid[n_categories]; f++) + pt->dict = dict_create (); + value_var = dict_clone_var_assert (pt->dict, vf->var); + if (wv != NULL) + freq_var = dict_clone_var_assert (pt->dict, wv); + else { - const char *label; - - cum_freq += f->count; - - label = var_lookup_value_label (vf->var, &f->value); - if (label != NULL) - tab_text (t, 0, r, TAB_LEFT, label); - - tab_value (t, 1, r, TAB_NONE, &f->value, ft->dict, &vf->print); - tab_double (t, 2, r, TAB_NONE, f->count, wfmt); - tab_double (t, 3, r, TAB_NONE, - f->count / ft->total_cases * 100.0, NULL); - tab_text (t, 4, r, TAB_NONE, _("Missing")); - r++; + freq_var = dict_create_var (pt->dict, "$Frequency", 0); + var_set_both_formats (freq_var, &F_8_0); + var_set_label (freq_var, _("Frequency")); } - tab_box (t, TAL_1, TAL_1, -1, TAL_1, 0, 0, 5, r); - tab_hline (t, TAL_2, 0, 5, 1); - tab_hline (t, TAL_2, 0, 5, r); - tab_joint_text (t, 0, r, 1, r, TAB_RIGHT | TAT_TITLE, _("Total")); - tab_vline (t, TAL_0, 1, r, r); - tab_double (t, 2, r, TAB_NONE, cum_freq, wfmt); - tab_fixed (t, 3, r, TAB_NONE, 100.0, 5, 1); - tab_fixed (t, 4, r, TAB_NONE, 100.0, 5, 1); - - tab_title (t, "%s", var_to_string (vf->var)); - tab_submit (t); + subcase_init_empty (&pt->split); + subcase_init_empty (&pt->dimensions[TABLE_HORZ]); + subcase_init_empty (&pt->dimensions[TABLE_VERT]); + subcase_add_var (&pt->dimensions[TABLE_VERT], value_var, SC_ASCEND); + + pt->pane.n[TABLE_HORZ] = 4; + pt->pane.n[TABLE_VERT] = 1; + pt->pane.cells = xmalloc (sizeof *pt->pane.cells); + pt->pane.cells[0] = xmalloc (4 * sizeof **pt->pane.cells); + + /* Frequency. */ + pivot_cell_init ( + &pt->pane.cells[0][0], + pivot_value_create (freq_var, PIVOT_SUM, 0, 0, PIVOT_INCLUDE_ALL)); + + /* Percent. */ + pivot_cell_init ( + &pt->pane.cells[0][1], + pivot_value_create (freq_var, PIVOT_SUM, 0, 0, PIVOT_INCLUDE_ALL)); + pt->pane.cells[0][1].cmp = PIVOT_PERCENT; + pt->pane.cells[0][1].cmp_args[0] = pivot_value_create ( + freq_var, PIVOT_SUM, 0, 1, PIVOT_INCLUDE_ALL); + pt->pane.cells[0][1].label = xstrdup ("Percent"); + + /* Valid Percent. */ + pivot_cell_init ( + &pt->pane.cells[0][2], + pivot_value_create (freq_var, PIVOT_SUM, 0, 0, PIVOT_INCLUDE_VALID)); + pt->pane.cells[0][2].cmp = PIVOT_PERCENT; + pt->pane.cells[0][2].cmp_args[0] = pivot_value_create ( + freq_var, PIVOT_SUM, 0, 1, PIVOT_INCLUDE_VALID); + pt->pane.cells[0][2].label = xstrdup ("Valid Percent"); + + /* Cumulative (Valid) Percent. */ + pivot_cell_init ( + &pt->pane.cells[0][3], + pivot_value_create (freq_var, PIVOT_SUM, 0, 0, PIVOT_INCLUDE_VALID)); + pt->pane.cells[0][3].base->n_cum_vars[TABLE_VERT] = 1; + pt->pane.cells[0][3].cmp = PIVOT_PERCENT; + pt->pane.cells[0][3].cmp_args[0] = pivot_value_create ( + freq_var, PIVOT_SUM, 0, 1, PIVOT_INCLUDE_VALID); + pt->pane.cells[0][3].label = xstrdup ("Cum Percent"); + + pivot_table_dump (pt); } /* Statistical display. */ diff --git a/src/output/automake.mk b/src/output/automake.mk index e5781b0af7..5210b0b97e 100644 --- a/src/output/automake.mk +++ b/src/output/automake.mk @@ -39,6 +39,8 @@ src_output_liboutput_la_SOURCES = \ src/output/output-item-provider.h \ src/output/output-item.c \ src/output/output-item.h \ + src/output/pivot-table.c \ + src/output/pivot-table.h \ src/output/render.c \ src/output/render.h \ src/output/tab.c \ diff --git a/src/output/pivot-table.c b/src/output/pivot-table.c new file mode 100644 index 0000000000..54af3f9f03 --- /dev/null +++ b/src/output/pivot-table.c @@ -0,0 +1,166 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "output/pivot-table.h" + +#include "data/case.h" +#include "data/casereader.h" +#include "math/sort.h" + +#include "gl/xalloc.h" + +void +pivot_cell_init (struct pivot_cell *cell, struct pivot_value *value) +{ + cell->base = value; + cell->cmp = PIVOT_RAW; + cell->cmp_args[0] = cell->cmp_args[1] = cell->cmp_args[2] = NULL; + cell->label = NULL; + cell->format = F_8_0; +} + +struct pivot_value * +pivot_value_create (const struct variable *var, + enum pivot_operator operator, + int n_vars_horz, + int n_vars_vert, + enum pivot_value_include include) +{ + struct pivot_value *value = xmalloc (sizeof *value); + value->var = var; + value->operator = operator; + value->n_agg_vars[TABLE_HORZ] = n_vars_horz; + value->n_agg_vars[TABLE_VERT] = n_vars_vert; + value->include = include; + return value; +} + +static struct ccase * +keep_one (struct ccase *a, struct ccase *b, void *aux UNUSED) +{ + case_unref (b); + return a; +} + +static void +dump_projection (struct casereader *data, const struct subcase *split, + const struct subcase *vars) +{ + struct subcase sc; + struct ccase *c; + + subcase_clone (&sc, split); + subcase_concat_always (&sc, vars); + + data = casereader_project (data, &sc); + data = sort_distinct_execute (data, &sc, keep_one, NULL, NULL); + +#if 0 + for (; (c = casereader_read (data)) != NULL; case_unref (c)) + { + const struct caseproto *proto = case_get_proto (c); + size_t i; + + for (i = 0; i < caseproto_get_n_widths (proto); i++) + { + int width = caseproto_get_width (proto, i); + + if (i > 0) + putchar (' '); + + if (width == 0) + printf ("%8.2g", case_num_idx (c, i)); + else + printf ("\"%.*s\"", width, case_str_idx (c, i)); + } + printf ("\n"); + } +#endif + + subcase_destroy (&sc); +} + +static void +dump_data (struct pivot_table *pt) +{ + struct casereader *data; + struct subcase sc; + struct ccase *c; + + subcase_clone (&sc, &pt->split); + subcase_concat_always (&sc, &pt->dimensions[TABLE_HORZ]); + subcase_concat_always (&sc, &pt->dimensions[TABLE_VERT]); + + data = sort_distinct_execute (casereader_clone (pt->data), &sc, keep_one, NULL, NULL); + for (; (c = casereader_read (data)) != NULL; case_unref (c)) + { + const struct caseproto *proto = case_get_proto (c); + size_t i; + + for (i = 0; i < caseproto_get_n_widths (proto); i++) + { + int width = caseproto_get_width (proto, i); + + if (i > 0) + putchar (' '); + + if (width == 0) + printf ("%8.2f", case_num_idx (c, i)); + else + printf ("\"%.*s\"", width, case_str_idx (c, i)); + } + printf ("\n"); + } + +} + +void +pivot_table_dump (struct pivot_table *pt) +{ + /* Strategy: + + Determine row and column labels: + + Project onto (splits, rows) and sort, discarding duplicates. + + Project onto (splits, columns) and sort, discarding duplicates. + + Sort data on (splits, rows, columns). + + For each split group: + + For each row value: + + For each column value: + + For each value in the pane: + + Evaluate and print value. + + */ + + dump_projection (casereader_clone (pt->data), + &pt->split, &pt->dimensions[TABLE_HORZ]); + dump_projection (casereader_clone (pt->data), + &pt->split, &pt->dimensions[TABLE_VERT]); + + dump_data (pt); + + + casereader_destroy (pt->data); +} diff --git a/src/output/pivot-table.h b/src/output/pivot-table.h new file mode 100644 index 0000000000..504a6b7150 --- /dev/null +++ b/src/output/pivot-table.h @@ -0,0 +1,142 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#ifndef OUTPUT_PIVOT_TABLE_H +#define OUTPUT_PIVOT_TABLE_H 1 + +#include "data/format.h" +#include "data/subcase.h" +#include "output/table.h" + +struct pivot_pane + { + int n[TABLE_N_AXES]; + struct pivot_cell **cells; /* cells[y][x] */ + }; + +struct pivot_table + { + struct casereader *data; + struct dictionary *dict; + struct subcase split; + struct subcase dimensions[TABLE_N_AXES]; + struct pivot_pane pane; + struct pivot_pane *summaries[TABLE_N_AXES]; + }; + +void pivot_table_dump (struct pivot_table *); + +enum pivot_comparison_type + { + PIVOT_RAW, /* No comparison. */ + + /* 1-dimensional comparisons. */ + PIVOT_FRACTION, /* Fraction of an aggregate. */ + PIVOT_PERCENT, /* Percentage of an aggregate. */ + + /* 2-dimensional comparisons. */ + PIVOT_EXPECTED, /* row_sum * col_sum / grand_total */ + PIVOT_RESIDUAL, /* value - expected */ + PIVOT_SRESIDUAL, /* (value - expected) / sqrt(expected) */ + PIVOT_ASRESIDUAL /* (value - expected) + / sqrt(expected + * (1 - row_sum / grand_total) + * (1 - col_sum / grand_total)) */ + }; + +/* A value, possibly compared against other values. */ +struct pivot_cell + { + struct pivot_value *base; + + /* Comparison. */ + enum pivot_comparison_type cmp; + struct pivot_value *cmp_args[3]; + + /* Formatting. */ + char *label; + struct fmt_spec format; + }; + +void pivot_cell_init (struct pivot_cell *, struct pivot_value *); + +/* How to combine potentially multiple values. */ +enum pivot_operator + { + PIVOT_EXACTLY_ONE, /* SYSMIS if no values or more than one. */ + PIVOT_SUM, + PIVOT_MEAN, + + PIVOT_N, + PIVOT_N_VALID, + PIVOT_N_MISSING, + + PIVOT_MODE, + + PIVOT_STDDEV, + PIVOT_VARIANCE, + + PIVOT_MINIMUM, + PIVOT_MAXIMUM, + PIVOT_RANGE, + PIVOT_MEDIAN, + PIVOT_PERCENTILE + }; + +enum pivot_value_include + { + PIVOT_INCLUDE_VALID = 1 << 0, + PIVOT_INCLUDE_SYSTEM_MISSING = 1 << 1, + PIVOT_INCLUDE_USER_MISSING = 1 << 2, + PIVOT_INCLUDE_ALL = 7 + }; + +/* A pivot_value specifies a function whose arguments are the values of: + + - The split and dimension variables of the pivot table. + + - The variable specified as part of the pivot_value. (PIVOT_N does not + require a variable.) + + A pivot_value is notionally evaluated as: + + Given a value for every variable: + For every row in the input data: + If the row's split variable equal their assigned values: + and the row's dimension variables equal their assigned values, + except that the aggregate variables, if any, are ignored, + and that the cumulative variables are equal or lexicographically + less than the assigned values, + and the value variable is included according to "include": + Submit the value to the operator. +*/ +struct pivot_value + { + const struct variable *var; + enum pivot_operator operator; + int n_agg_vars[TABLE_N_AXES]; + int n_cum_vars[TABLE_N_AXES]; + enum pivot_value_include include; + }; + +struct pivot_value *pivot_value_create (const struct variable *, + enum pivot_operator, + int n_vars_horz, + int n_vars_vert, + enum pivot_value_include); + + +#endif /* output/pivot-table.h */