From d6f63e70f4ec5f70e25f8c0bb9f33f65f8dc2f34 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Thu, 22 Mar 2012 10:56:01 +0100 Subject: [PATCH] EXAMINE: Allow the /ID subcommand to take a non-numeric variable --- src/language/stats/examine.c | 104 +++++++++++++++++++------------- src/math/box-whisker.c | 27 +++++++-- src/math/box-whisker.h | 6 +- tests/language/stats/examine.at | 53 ++++++++++++++++ 4 files changed, 140 insertions(+), 50 deletions(-) diff --git a/src/language/stats/examine.c b/src/language/stats/examine.c index 5d9f642641..069f807c0a 100644 --- a/src/language/stats/examine.c +++ b/src/language/stats/examine.c @@ -110,11 +110,9 @@ struct examine bool missing_pw; - /* Test options require that casenumbers are known */ - bool casenumbers; - /* The case index of the ID value (or -1) if not applicable */ size_t id_idx; + int id_width; enum pc_alg pc_alg; double *ptiles; @@ -138,7 +136,7 @@ struct extremity /* Either the casenumber or the value of the variable specified by the /ID subcommand which corresponds to this extremity */ - double identity; + union value identity; }; struct exploratory_stats @@ -1150,13 +1148,21 @@ extremes_report (const struct examine *cmd, int iact_idx) &F_8_0); /* The casenumber */ - tab_double (t, + if (cmd->id_var) + tab_value (t, + heading_columns, + heading_rows + v * rows_per_var + i * rows_per_cat + e, + TAB_RIGHT, + &es->maxima[e].identity, + cmd->id_var, + NULL); + else + tab_double (t, heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + e, - 0, - es->maxima[e].identity, - &F_8_0); - + heading_rows + v * rows_per_var + i * rows_per_cat + e, + TAB_RIGHT, + es->maxima[e].identity.f, + &F_8_0); tab_double (t, heading_columns + 1, @@ -1175,12 +1181,21 @@ extremes_report (const struct examine *cmd, int iact_idx) &F_8_0); /* The casenumber */ - tab_double (t, - heading_columns, - heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, - 0, - es->minima[e].identity, - &F_8_0); + if (cmd->id_var) + tab_value (t, + heading_columns, + heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, + TAB_RIGHT, + &es->minima[e].identity, + cmd->id_var, + NULL); + else + tab_double (t, + heading_columns, + heading_rows + v * rows_per_var + i * rows_per_cat + cmd->disp_extremes + e, + TAB_RIGHT, + es->minima[e].identity.f, + &F_8_0); tab_double (t, heading_columns + 1, @@ -1483,10 +1498,11 @@ update_n (const void *aux1, void *aux2 UNUSED, void *user_data, moments_pass_one (es[v].mom, x, weight); - /* Save the value and the casenumber to the writer */ + /* Save the value and the ID to the writer */ + assert (examine->id_idx != -1); case_data_rw_idx (outcase, EX_VAL)->f = x; - if ( examine->id_idx != -1) - case_data_rw_idx (outcase, EX_ID)->f = case_data_idx (c, examine->id_idx)->f; + value_copy (case_data_rw_idx (outcase, EX_ID), + case_data_idx (c, examine->id_idx), examine->id_width); case_data_rw_idx (outcase, EX_WT)->f = weight; @@ -1534,6 +1550,11 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) es[v].maxima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].maxima)); es[v].minima = pool_calloc (examine->pool, examine->calc_extremes, sizeof (*es[v].minima)); + for (i = 0; i < examine->calc_extremes; ++i) + { + value_init_pool (examine->pool, &es[v].maxima[i].identity, examine->id_width) ; + value_init_pool (examine->pool, &es[v].minima[i].identity, examine->id_width) ; + } for (reader = casereader_clone (es[v].sorted_reader); (c = casereader_read (reader)) != NULL; case_unref (c)) @@ -1553,7 +1574,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) { struct extremity *min = &es[v].minima[x]; min->val = val; - min->identity = case_data_idx (c, EX_ID)->f; + value_copy (&min->identity, case_data_idx (c, EX_ID), examine->id_width); } imin += wt; } @@ -1572,7 +1593,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) max = &es[v].maxima[x]; max->val = val; - max->identity = case_data_idx (c, EX_ID)->f; + value_copy (&max->identity, case_data_idx (c, EX_ID), examine->id_width); } } } @@ -1623,7 +1644,7 @@ calculate_n (const void *aux1, void *aux2 UNUSED, void *user_data) struct order_stats *os; es[v].box_whisker = box_whisker_create (es[v].hinges, - EX_ID); + EX_ID, examine->id_var); os = &es[v].box_whisker->parent; order_stats_accumulate_idx (&os, 1, @@ -1721,24 +1742,20 @@ run_examine (struct examine *cmd, struct casereader *input) cmd->wv = dict_get_weight (cmd->dict); - cmd->id_idx = -1; cmd->cats = categoricals_create (cmd->iacts, cmd->n_iacts, cmd->wv, cmd->exclude); categoricals_set_payload (cmd->cats, &payload, cmd, NULL); - if (cmd->casenumbers) + if (cmd->id_idx == -1) { struct ccase *c = casereader_peek (input, 0); - if (cmd->id_var) - cmd->id_idx = var_get_case_index (cmd->id_var); - else - { - cmd->id_idx = case_get_value_cnt (c); - input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); - } + assert (cmd->id_var == NULL); + + cmd->id_idx = case_get_value_cnt (c); + input = casereader_create_arithmetic_sequence (input, 1.0, 1.0); case_unref (c); } @@ -1814,7 +1831,6 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) struct examine examine; bool percentiles_seen = false; - examine.casenumbers = false; examine.missing_pw = false; examine.disp_extremes = 0; examine.calc_extremes = 0; @@ -1823,13 +1839,12 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) examine.pc_alg = PC_HAVERAGE; examine.ptiles = NULL; examine.n_percentiles = 0; - examine.id_var = 0; + examine.id_idx = -1; + examine.id_width = 0; + examine.id_var = NULL; examine.boxplot_mode = BP_GROUPS; examine.ex_proto = caseproto_create (); - examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */ - examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* id */ - examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */ examine.pool = pool_create (); @@ -2114,6 +2129,7 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } } + if ( totals_seen && nototals_seen) { msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); @@ -2133,18 +2149,22 @@ cmd_examine (struct lexer *lexer, struct dataset *ds) } - if (examine.disp_extremes > 0) + if ( examine.id_var ) { - examine.calc_extremes = examine.disp_extremes; - examine.casenumbers = true; + examine.id_idx = var_get_case_index (examine.id_var); + examine.id_width = var_get_width (examine.id_var); } - if (examine.boxplot) + examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* value */ + examine.ex_proto = caseproto_add_width (examine.ex_proto, examine.id_width); /* id */ + examine.ex_proto = caseproto_add_width (examine.ex_proto, 0); /* weight */ + + + if (examine.disp_extremes > 0) { - examine.casenumbers = true; + examine.calc_extremes = examine.disp_extremes; } - if (examine.descriptives && examine.calc_extremes == 0) { /* Descriptives always displays the max and min */ diff --git a/src/math/box-whisker.c b/src/math/box-whisker.c index fb5c2c62a2..0d893cb80c 100644 --- a/src/math/box-whisker.c +++ b/src/math/box-whisker.c @@ -22,6 +22,7 @@ #include #include "data/case.h" +#include "data/data-out.h" #include "data/val-type.h" #include "data/variable.h" #include "libpspp/assertion.h" @@ -82,9 +83,21 @@ acc (struct statistic *s, const struct ccase *cx, o->extreme = extreme; ds_init_empty (&o->label); - ds_put_format (&o->label, - "%ld", - (casenumber) case_data_idx (cx, bw->casenumber_idx)->f); + if (bw->id_var) + { + char *s = data_out (case_data_idx (cx, bw->id_idx), + var_get_encoding (bw->id_var), + var_get_print_format (bw->id_var)); + + ds_put_cstr (&o->label, s); + free (s); + } + else + { + ds_put_format (&o->label, + "%ld", + (casenumber) case_data_idx (cx, bw->id_idx)->f); + } ll_push_head (&bw->outliers, &o->ll); } @@ -115,12 +128,13 @@ box_whisker_outliers (const struct box_whisker *bw) TH are the tukey hinges of the dataset. - Casenumber_idx is the index into the casereader which will be used to label + id_idx is the index into the casereader which will be used to label outliers. + id_var is the variable from which that label came, or NULL */ struct box_whisker * box_whisker_create (const struct tukey_hinges *th, - size_t casenumber_idx) + size_t id_idx, const struct variable *id_var) { struct box_whisker *w = xzalloc (sizeof (*w)); struct order_stats *os = &w->parent; @@ -133,7 +147,8 @@ box_whisker_create (const struct tukey_hinges *th, tukey_hinges_calculate (th, w->hinges); - w->casenumber_idx = casenumber_idx; + w->id_idx = id_idx; + w->id_var = id_var; w->step = (w->hinges[2] - w->hinges[0]) * 1.5; diff --git a/src/math/box-whisker.h b/src/math/box-whisker.h index c9b2bef8d3..ccafef1fc9 100644 --- a/src/math/box-whisker.h +++ b/src/math/box-whisker.h @@ -26,6 +26,7 @@ However, there's no reason not to use it for other purposes too. */ struct tukey_hinges; +struct variable; struct outlier { @@ -47,11 +48,12 @@ struct box_whisker double step; - size_t casenumber_idx; + size_t id_idx; + const struct variable *id_var; }; struct box_whisker * box_whisker_create (const struct tukey_hinges *, - size_t); + size_t id_idx, const struct variable *id_var); void box_whisker_whiskers (const struct box_whisker *bw, double whiskers[2]); diff --git a/tests/language/stats/examine.at b/tests/language/stats/examine.at index 40c5c1a966..64f6d7c573 100644 --- a/tests/language/stats/examine.at +++ b/tests/language/stats/examine.at @@ -587,3 +587,56 @@ AT_CHECK([$PERL more-big-input.pl >> large.txt]) AT_CHECK([pspp -o pspp.csv examine.sps]) dnl Ignore output -- this is just a no-crash check. AT_CLEANUP + + +dnl Test that the ID command works with non-numberic variables +AT_SETUP([EXAMINE -- non-numeric ID]) + +AT_DATA([examine-id.sps], [dnl +data list notable list /x * y (a12). +begin data. +1 one +2 two +3 three +4 four +5 five +6 six +7 seven +8 eight +9 nine +10 ten +11 eleven +12 twelve +30 thirty +300 threehundred +end data. + +examine x + /statistics = extreme + /id = y + /plot = boxplot + . +]) + +AT_CHECK([pspp -O format=csv examine-id.sps], [0], +[Table: Case Processing Summary +,Cases,,,,, +,Valid,,Missing,,Total, +,N,Percent,N,Percent,N,Percent +x,14,100%,0,0%,14,100% + +Table: Extreme Values +,,,y,Value +x,Highest,1,threehundred,300.00 +,,2,thirty ,30.00 +,,3,twelve ,12.00 +,,4,eleven ,11.00 +,,5,ten ,10.00 +,Lowest,1,one ,1.00 +,,2,two ,2.00 +,,3,three ,3.00 +,,4,four ,4.00 +,,5,five ,5.00 +]) + +AT_CLEANUP \ No newline at end of file -- 2.30.2