From bd0fbcb4295cf0fbcfa5c2a8fc607842c958ad65 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sun, 28 Sep 2014 20:52:36 -0700 Subject: [PATCH] pivot table procedure conceptually works --- examples/automake.mk | 3 +- examples/pivot.sps | 74 ++++++++++++ src/language/command.def | 3 +- src/language/data-io/automake.mk | 1 + src/language/data-io/pivot.c | 189 +++++++++++++++++++++++++++++++ 5 files changed, 268 insertions(+), 2 deletions(-) create mode 100644 examples/pivot.sps create mode 100644 src/language/data-io/pivot.c diff --git a/examples/automake.mk b/examples/automake.mk index 6f716e4132..9233eee278 100644 --- a/examples/automake.mk +++ b/examples/automake.mk @@ -10,6 +10,7 @@ examples_DATA = \ examples/physiology.sav \ examples/repairs.sav \ examples/regress.sps \ - examples/regress_categorical.sps + examples/regress_categorical.sps \ + examples/pivot.sps EXTRA_DIST += $(examples_DATA) diff --git a/examples/pivot.sps b/examples/pivot.sps new file mode 100644 index 0000000000..bad932c237 --- /dev/null +++ b/examples/pivot.sps @@ -0,0 +1,74 @@ +* Based on the tutorial at http://www.ats.ucla.edu/stat/spss/library/sp_pivot.htm. +data list list notable/sex tumor dose statistics (f1) data (f5.1). +value labels + /sex 0 'Female' 1 'Male' + /tumor 0 'Absent' 1 'Present' 2 'Total' + /dose 0 'Control' 1 'Low' 2 'Medium' 3 'High' 4 'Total' + /statistics 0 'Count' 1 'Tumor%'. +begin data. +0 0 0 0 6 +0 0 0 1 21.4 +0 0 1 0 11 +0 0 1 1 39.3 +0 0 2 0 6 +0 0 2 1 21.4 +0 0 3 0 5 +0 0 3 1 17.9 +0 0 4 0 28 +0 0 4 1 100 +0 1 0 0 6 +0 1 0 1 26.1 +0 1 1 0 5 +0 1 1 1 21.7 +0 1 2 0 6 +0 1 2 1 26.1 +0 1 3 0 6 +0 1 3 1 26.1 +0 1 4 0 23 +0 1 4 1 100 +0 2 0 0 12 +0 2 0 1 23.5 +0 2 1 0 16 +0 2 1 1 31.4 +0 2 2 0 12 +0 2 2 1 23.5 +0 2 3 0 11 +0 2 3 1 21.6 +0 2 4 0 51 +0 2 4 1 100 +1 0 0 0 18 +1 0 0 1 30.5 +1 0 1 0 11 +1 0 1 1 18.6 +1 0 2 0 17 +1 0 2 1 28.8 +1 0 3 0 13 +1 0 3 1 22.0 +1 0 4 0 59 +1 0 4 1 100 +1 1 0 0 3 +1 1 0 1 25.0 +1 1 1 0 3 +1 1 1 1 25.0 +1 1 2 0 2 +1 1 2 1 16.7 +1 1 3 0 4 +1 1 3 1 33.3 +1 1 4 0 12 +1 1 4 1 100 +1 2 0 0 21 +1 2 0 1 29.6 +1 2 1 0 14 +1 2 1 1 19.7 +1 2 2 0 19 +1 2 2 1 26.8 +1 2 3 0 17 +1 2 3 1 23.9 +1 2 4 0 71 +1 2 4 1 100 +end data. + +format sex tumor dose(f1). +*select if tumor=1 and statistics=1. +debug pivot /rows=sex tumor statistics/columns=dose/data=data. + diff --git a/src/language/command.def b/src/language/command.def index c7b6325073..79ca4fd3e0 100644 --- a/src/language/command.def +++ b/src/language/command.def @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. + Copyright (C) 2006, 2009, 2010, 2011, 2013, 2014 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -158,6 +158,7 @@ DEF_CMD (S_ANY, F_TESTING, "DEBUG PAPER SIZE", cmd_debug_paper_size) DEF_CMD (S_ANY, F_TESTING, "DEBUG POOL", cmd_debug_pool) DEF_CMD (S_ANY, F_TESTING, "DEBUG FLOAT FORMAT", cmd_debug_float_format) DEF_CMD (S_ANY, F_TESTING, "DEBUG XFORM FAIL", cmd_debug_xform_fail) +DEF_CMD (S_DATA, F_TESTING, "DEBUG PIVOT", cmd_debug_pivot) /* Unimplemented commands. */ UNIMPL_CMD ("2SLS", "Two stage least squares regression") diff --git a/src/language/data-io/automake.mk b/src/language/data-io/automake.mk index 906a8ed158..762e8f0985 100644 --- a/src/language/data-io/automake.mk +++ b/src/language/data-io/automake.mk @@ -19,6 +19,7 @@ language_data_io_sources = \ src/language/data-io/inpt-pgm.c \ src/language/data-io/inpt-pgm.h \ src/language/data-io/list.c \ + src/language/data-io/pivot.c \ src/language/data-io/placement-parser.c \ src/language/data-io/placement-parser.h \ src/language/data-io/print-space.c \ diff --git a/src/language/data-io/pivot.c b/src/language/data-io/pivot.c new file mode 100644 index 0000000000..22d7d1b2a6 --- /dev/null +++ b/src/language/data-io/pivot.c @@ -0,0 +1,189 @@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2014 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "data/casereader.h" +#include "data/dataset.h" +#include "data/subcase.h" +#include "language/command.h" +#include "language/lexer/lexer.h" +#include "language/lexer/variable-parser.h" +#include "math/sort.h" +#include "output/tab.h" +#include "output/table-item.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +enum + { + ROW_VAR, + COL_VAR, + DATA_VAR + }; + +struct cmd_pivot + { + const struct variable **vars[3]; + size_t n_vars[3]; + }; + +struct ccase * +take_first_case (struct ccase *first, struct ccase *second, void *aux UNUSED) +{ + case_unref (second); + return first; +} + +int +cmd_debug_pivot (struct lexer *lexer, struct dataset *ds) +{ + const struct dictionary *dict = dataset_dict (ds); + struct cmd_pivot p; + struct casereader *reader; + struct subcase col_ordering, total_ordering, row_ordering; + struct casereader *columns, *cells, *columns_reader; + struct ccase *row, *prev, *column; + int col_idx; + + memset (&p, 0, sizeof p); + while (lex_token (lexer) != T_ENDCMD) + { + lex_match (lexer, T_SLASH); + if (lex_match_id (lexer, "ROWS") ) + { + lex_match (lexer, T_EQUALS); + if (!parse_variables_const (lexer, dict, + &p.vars[ROW_VAR], &p.n_vars[ROW_VAR], + PV_NO_DUPLICATE | PV_NO_SCRATCH)) + return CMD_FAILURE; + } + else if (lex_match_id (lexer, "COLUMNS") ) + { + lex_match (lexer, T_EQUALS); + if (!parse_variables_const (lexer, dict, + &p.vars[COL_VAR], &p.n_vars[COL_VAR], + PV_NO_DUPLICATE | PV_NO_SCRATCH)) + return CMD_FAILURE; + } + else if (lex_match_id (lexer, "DATA") ) + { + lex_match (lexer, T_EQUALS); + if (!parse_variables_const (lexer, dict, + &p.vars[DATA_VAR], &p.n_vars[DATA_VAR], + PV_NO_DUPLICATE | PV_NO_SCRATCH)) + return CMD_FAILURE; + } + } + + + reader = proc_open (ds); + + subcase_init_vars (&col_ordering, p.vars[COL_VAR], p.n_vars[COL_VAR]); + columns = sort_distinct_execute (casereader_clone (reader), &col_ordering, + take_first_case, NULL, NULL); + printf ("%lld column combinations\n", + (long long int) casereader_count_cases (columns)); + + subcase_init_vars (&total_ordering, p.vars[ROW_VAR], p.n_vars[ROW_VAR]); + subcase_add_vars_always (&total_ordering, + p.vars[COL_VAR], p.n_vars[COL_VAR]); + cells = sort_distinct_execute (reader, &total_ordering, + take_first_case, NULL, NULL); + printf ("%lld cells\n", + (long long int) casereader_count_cases (cells)); + + row = prev = column = NULL; + columns_reader = NULL; + subcase_init_vars (&row_ordering, p.vars[ROW_VAR], p.n_vars[ROW_VAR]); + for (;;) + { + struct ccase *c; + + c = casereader_read (cells); + if (!c) + break; + + if (!row || !subcase_equal (&row_ordering, row, &row_ordering, c)) + { + int i; + + if (row) + putchar ('\n'); + case_unref (prev); + prev = row; + row = case_ref (c); + + i = 0; + if (prev) + for (; i < p.n_vars[ROW_VAR]; i++) + { + if (!value_equal (case_data (row, p.vars[ROW_VAR][i]), + case_data (prev, p.vars[ROW_VAR][i]), + var_get_width (p.vars[ROW_VAR][i]))) + break; + printf (" "); + } + for (; i < p.n_vars[ROW_VAR]; i++) + { + union value value; + const char *label; + + value.f = case_num (row, p.vars[ROW_VAR][i]); + label = var_lookup_value_label (p.vars[ROW_VAR][i], &value); + if (label) + printf ("%7s ", label); + else + printf ("%7.0f ", value.f); + } + printf ("| "); + + case_unref (column); + casereader_destroy (columns_reader); + columns_reader = casereader_clone (columns); + column = casereader_read (columns_reader); + col_idx = 0; + } + + while (!subcase_equal (&col_ordering, column, &col_ordering, c)) + { + case_unref (column); + column = casereader_read (columns_reader); + printf (" "); + col_idx++; + } + + printf ("%7.0f ", case_num (c, p.vars[DATA_VAR][0])); + col_idx++; + column = casereader_read (columns_reader); + } + if (row) + putchar ('\n'); + case_unref (row); + case_unref (prev); + case_unref (column); + casereader_destroy (columns_reader); + + casereader_destroy (columns); + casereader_destroy (cells); + subcase_destroy (&col_ordering); + subcase_destroy (&row_ordering); + + proc_commit (ds); + + return CMD_SUCCESS; +} -- 2.30.2