X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Ft-test.q;h=5fdb802d633be7246b49c2f639c130e63905cf41;hb=012b055f5f7b4ddd2a608e64c53eaae101b5634e;hp=a991f29ed6d09bb9bf4f2c0e99b8dc918a6ca619;hpb=c33f6387da9826d640ef1f21068bf5c94055ba77;p=pspp-builds.git diff --git a/src/t-test.q b/src/t-test.q index a991f29e..5fdb802d 100644 --- a/src/t-test.q +++ b/src/t-test.q @@ -16,8 +16,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ #include #include @@ -26,27 +26,34 @@ #include #include #include "alloc.h" -#include "str.h" #include "case.h" +#include "casefile.h" #include "command.h" -#include "lexer.h" +#include "dictionary.h" #include "error.h" +#include "group_proc.h" +#include "hash.h" +#include "levene.h" +#include "lexer.h" #include "magic.h" #include "misc.h" -#include "tab.h" +#include "size_max.h" #include "som.h" +#include "str.h" +#include "tab.h" #include "value-labels.h" #include "var.h" #include "vfm.h" -#include "hash.h" -#include "group_proc.h" -#include "casefile.h" -#include "levene.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +/* (headers) */ /* (specification) "T-TEST" (tts_): +groups=custom; - +testval=double; + testval=double; variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); pairs=custom; +missing=miss:!analysis/listwise, @@ -61,14 +68,16 @@ /* Function to use for testing for missing values */ -static is_missing_func value_is_missing; +static is_missing_func *value_is_missing; /* Variable for the GROUPS subcommand, if given. */ static struct variable *indep_var; -/* GROUPS: Number of values specified by the user; the values - specified if any. */ - +enum comparison + { + CMP_LE = -2, + CMP_EQ = 0, + }; struct group_properties { @@ -220,9 +229,6 @@ static int group_calc (const struct ccase *, struct cmd_t_test *); static void group_postcalc (struct cmd_t_test *); -static int compare_var_name (const void *a_, const void *b_, void *v_ UNUSED); -static unsigned hash_var_name (const void *a_, void *v_ UNUSED); - static void calculate(const struct casefile *cf, void *_mode); static int mode; @@ -234,11 +240,11 @@ static int bad_weight_warn; static int compare_group_binary(const struct group_statistics *a, const struct group_statistics *b, - struct group_properties *p); + const struct group_properties *p); static unsigned hash_group_binary(const struct group_statistics *g, - struct group_properties *p); + const struct group_properties *p); @@ -263,6 +269,7 @@ cmd_t_test(void) msg(SE, _("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.") ); + free_t_test(&cmd); return CMD_FAILURE; } } @@ -279,6 +286,7 @@ cmd_t_test(void) if (cmd.sbc_variables) { msg(SE, _("VARIABLES subcommand is not appropriate with PAIRS")); + free_t_test(&cmd); return CMD_FAILURE; } else @@ -291,7 +299,7 @@ cmd_t_test(void) struct hsh_table *hash; struct variable *v; - hash=hsh_create(n_pairs,compare_var_name,hash_var_name,0,0); + hash = hsh_create (n_pairs, compare_var_names, hash_var_name, 0, 0); for (i=0; i < n_pairs; ++i) { @@ -302,8 +310,8 @@ cmd_t_test(void) assert(cmd.n_variables == 0); cmd.n_variables = hsh_count(hash); - cmd.v_variables = xrealloc(cmd.v_variables, - sizeof(struct variable) * cmd.n_variables); + cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables, + sizeof *cmd.v_variables); /* Iterate through the hash */ for (i=0,v = (struct variable *) hsh_first(hash,&hi); v != 0; @@ -316,15 +324,16 @@ cmd_t_test(void) else if ( !cmd.sbc_variables) { msg(SE, _("One or more VARIABLES must be specified.")); + free_t_test(&cmd); return CMD_FAILURE; } /* If /MISSING=INCLUDE is set, then user missing values are ignored */ if (cmd.incl == TTS_INCLUDE ) - value_is_missing = is_system_missing; + value_is_missing = mv_is_value_system_missing; else - value_is_missing = is_missing; + value_is_missing = mv_is_value_missing; bad_weight_warn = 1; @@ -340,11 +349,12 @@ cmd_t_test(void) /* Destroy any group statistics we created */ for (v = 0 ; v < cmd.n_variables ; ++v ) { - struct group_proc *grpp = &cmd.v_variables[v]->p.grp_data; - free(grpp->group_hash); + struct group_proc *grpp = group_proc_get (cmd.v_variables[v]); + hsh_destroy (grpp->group_hash); } } + free_t_test(&cmd); return CMD_SUCCESS; } @@ -355,14 +365,6 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) lex_match('='); - if (token != T_ALL && - (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - ) - { - msg(SE,_("`%s' is not a variable name"),tokid); - return 0; - } - indep_var = parse_variable (); if (!indep_var) { @@ -392,18 +394,25 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) } else { - msg (SE, _("When applying GROUPS to a string variable, at " - "least one value must be specified.")); + msg (SE, _("When applying GROUPS to a string variable, two " + "values must be specified.")); return 0; } } - if (!parse_value (&gp.v.g_value[0],indep_var->type)) + if (!parse_value (&gp.v.g_value[0], indep_var->type)) return 0; lex_match (','); if (lex_match (')')) { + if (indep_var->type != NUMERIC) + { + + msg (SE, _("When applying GROUPS to a string variable, two " + "values must be specified.")); + return 0; + } gp.criterion = CMP_LE; gp.v.critical_value = gp.v.g_value[0].f; @@ -411,7 +420,7 @@ tts_custom_groups (struct cmd_t_test *cmd UNUSED) return 1; } - if (!parse_value (&gp.v.g_value[1],indep_var->type)) + if (!parse_value (&gp.v.g_value[1], indep_var->type)) return 0; n_group_values = 2; @@ -432,22 +441,15 @@ static int tts_custom_pairs (struct cmd_t_test *cmd UNUSED) { struct variable **vars; - int n_vars; - int n_pairs_local; + size_t n_vars; + size_t n_pairs_local; - int n_before_WITH ; - int n_after_WITH = -1; + size_t n_before_WITH; + size_t n_after_WITH = SIZE_MAX; int paired ; /* Was the PAIRED keyword given ? */ lex_match('='); - if ((token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) - && token != T_ALL) - { - msg(SE,_("`%s' is not a variable name"),tokid); - return 0; - } - n_vars=0; if (!parse_variables (default_dict, &vars, &n_vars, PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) @@ -457,7 +459,7 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) } assert (n_vars); - n_before_WITH=0; + n_before_WITH = 0; if (lex_match (T_WITH)) { n_before_WITH = n_vars; @@ -485,11 +487,11 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) n_before_WITH, n_after_WITH ); return 0; } - n_pairs_local=n_before_WITH; + n_pairs_local = n_before_WITH; } else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { - n_pairs_local=n_before_WITH * n_after_WITH ; + n_pairs_local = n_before_WITH * n_after_WITH ; } else /* Neither WITH nor PAIRED keyword given */ { @@ -502,29 +504,29 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) } /* how many ways can you pick 2 from n_vars ? */ - n_pairs_local = n_vars * (n_vars -1 ) /2 ; + n_pairs_local = n_vars * (n_vars - 1) / 2; } /* Allocate storage for the pairs */ - pairs = xrealloc(pairs, sizeof(struct pair) * (n_pairs + n_pairs_local) ); + pairs = xnrealloc (pairs, n_pairs + n_pairs_local, sizeof *pairs); /* Populate the pairs with the appropriate variables */ if ( paired ) { int i; - assert(n_pairs_local == n_vars/2); - for (i = 0; i < n_pairs_local ; ++i) + assert(n_pairs_local == n_vars / 2); + for (i = 0; i < n_pairs_local; ++i) { - pairs[i].v[n_pairs+0] = vars[i]; - pairs[i].v[n_pairs+1] = vars[i+n_pairs_local]; + pairs[i].v[n_pairs] = vars[i]; + pairs[i].v[n_pairs + 1] = vars[i + n_pairs_local]; } } else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ { int i,j; - int p=n_pairs; + size_t p = n_pairs; for(i=0 ; i < n_before_WITH ; ++i ) { @@ -538,8 +540,8 @@ tts_custom_pairs (struct cmd_t_test *cmd UNUSED) } else /* Neither WITH nor PAIRED given */ { - int i,j; - int p=n_pairs; + size_t i,j; + size_t p=n_pairs; for(i=0 ; i < n_vars ; ++i ) { @@ -747,7 +749,7 @@ ssbox_independent_samples_populate(struct ssbox *ssb, for (i=0; i < cmd->n_variables; ++i) { struct variable *var = cmd->v_variables[i]; - struct hsh_table *grp_hash = var->p.grp_data.group_hash; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; int count=0; tab_text (ssb->t, 0, i*2+1, TAB_LEFT, cmd->v_variables[i]->name); @@ -847,7 +849,7 @@ ssbox_paired_populate(struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) { struct group_statistics *gs; - gs=&pairs[i].v[j]->p.grp_data.ugs; + gs = &group_proc_get (pairs[i].v[j])->ugs; /* Titles */ @@ -873,8 +875,7 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) for (i=0; i < cmd->n_variables; ++i) { - struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.grp_data.ugs; + struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (ssb->t, 0, i+1, TAB_LEFT, cmd->v_variables[i]->name); tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 2, 0); @@ -889,7 +890,7 @@ ssbox_one_sample_populate(struct ssbox *ssb, struct cmd_t_test *cmd) /* Implementation of the Test Results box struct */ -void trbox_base_init(struct trbox *self,int n_vars, int cols); +void trbox_base_init(struct trbox *self,size_t n_vars, int cols); void trbox_base_finalize(struct trbox *trb); void trbox_independent_samples_init(struct trbox *trb, @@ -1009,8 +1010,9 @@ trbox_independent_samples_populate(struct trbox *self, double mean_diff; struct variable *var = cmd->v_variables[i]; + struct group_proc *grp_data = group_proc_get (var); - struct hsh_table *grp_hash = var->p.grp_data.group_hash; + struct hsh_table *grp_hash = grp_data->group_hash; struct group_statistics *gs0 ; struct group_statistics *gs1 ; @@ -1039,12 +1041,11 @@ trbox_independent_samples_populate(struct trbox *self, tab_text (self->t, 1, i*2+3, TAB_LEFT, _("Equal variances assumed")); - tab_float(self->t, 2, i*2+3, TAB_CENTER, - cmd->v_variables[i]->p.grp_data.levene, 8,3); + tab_float(self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, 8,3); /* Now work out the significance of the Levene test */ - df1 = 1; df2 = cmd->v_variables[i]->p.grp_data.ugs.n - 2; - q = gsl_cdf_fdist_Q(cmd->v_variables[i]->p.grp_data.levene, df1, df2); + df1 = 1; df2 = grp_data->ugs.n - 2; + q = gsl_cdf_fdist_Q(grp_data->levene, df1, df2); tab_float(self->t, 3, i*2+3, TAB_CENTER, q, 8,3 ); @@ -1246,7 +1247,7 @@ trbox_one_sample_init(struct trbox *self, struct cmd_t_test *cmd ) tab_vline(self->t, TAL_2, 1, 0, vsize - 1); tab_joint_text(self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, - _("Test Value = %f"),cmd->n_testval); + _("Test Value = %f"), cmd->n_testval[0]); tab_box(self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); @@ -1280,13 +1281,12 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) double t; double p,q; double df; - struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.grp_data.ugs; + struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (trb->t, 0, i+3, TAB_LEFT, cmd->v_variables[i]->name); - t = (gs->mean - cmd->n_testval ) * sqrt(gs->n) / gs->std_dev ; + t = (gs->mean - cmd->n_testval[0] ) * sqrt(gs->n) / gs->std_dev ; tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); @@ -1318,9 +1318,9 @@ trbox_one_sample_populate(struct trbox *trb, struct cmd_t_test *cmd) /* Base initializer for the generalized trbox */ void -trbox_base_init(struct trbox *self, int data_rows, int cols) +trbox_base_init(struct trbox *self, size_t data_rows, int cols) { - const int rows = 3 + data_rows; + const size_t rows = 3 + data_rows; self->finalize = trbox_base_finalize; self->t = tab_create (cols, rows, 0); @@ -1419,7 +1419,7 @@ common_calc (const struct ccase *c, void *_cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1430,7 +1430,7 @@ common_calc (const struct ccase *c, void *_cmd) if ( cmd->sbc_groups ) { const union value *gv = case_data (c, indep_var->fv); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1443,9 +1443,9 @@ common_calc (const struct ccase *c, void *_cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - gs= &cmd->v_variables[i]->p.grp_data.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; - if (! value_is_missing(val,v) ) + if (! value_is_missing(&v->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; @@ -1464,7 +1464,7 @@ common_precalc ( struct cmd_t_test *cmd ) for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.grp_data.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->sum=0; gs->n=0; @@ -1483,7 +1483,7 @@ common_postcalc ( struct cmd_t_test *cmd ) for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.grp_data.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->mean=gs->sum / gs->n; gs->s_std_dev= sqrt( @@ -1518,7 +1518,7 @@ one_sample_calc (const struct ccase *c, void *cmd_) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1531,10 +1531,10 @@ one_sample_calc (const struct ccase *c, void *cmd_) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - gs= &cmd->v_variables[i]->p.grp_data.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; - if ( ! value_is_missing(val,v)) - gs->sum_diff += weight * (val->f - cmd->n_testval); + if ( ! value_is_missing(&v->miss, val)) + gs->sum_diff += weight * (val->f - cmd->n_testval[0]); } return 0; @@ -1549,7 +1549,7 @@ one_sample_precalc ( struct cmd_t_test *cmd ) for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.grp_data.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->sum_diff=0; } @@ -1564,7 +1564,7 @@ one_sample_postcalc (struct cmd_t_test *cmd) for(i=0; i< cmd->n_variables ; ++i) { struct group_statistics *gs; - gs= &cmd->v_variables[i]->p.grp_data.ugs; + gs= &group_proc_get (cmd->v_variables[i])->ugs; gs->mean_diff = gs->sum_diff / gs->n ; } @@ -1572,25 +1572,6 @@ one_sample_postcalc (struct cmd_t_test *cmd) -static int -compare_var_name (const void *a_, const void *b_, void *v_ UNUSED) -{ - const struct variable *a = a_; - const struct variable *b = b_; - - return strcmp(a->name,b->name); -} - -static unsigned -hash_var_name (const void *a_, void *v_ UNUSED) -{ - const struct variable *a = a_; - - return hsh_hash_bytes (a->name, strlen(a->name)); -} - - - static void paired_precalc (struct cmd_t_test *cmd UNUSED) { @@ -1631,8 +1612,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( value_is_missing(val0,v0) || - value_is_missing(val1,v1) ) + if ( value_is_missing(&v0->miss, val0) || + value_is_missing(&v1->miss, val1) ) { return 0; } @@ -1647,7 +1628,8 @@ paired_calc (const struct ccase *c, void *cmd_) const union value *val0 = case_data (c, v0->fv); const union value *val1 = case_data (c, v1->fv); - if ( ( !value_is_missing(val0,v0) && !value_is_missing(val1,v1) ) ) + if ( ( !value_is_missing(&v0->miss, val0) + && !value_is_missing(&v1->miss, val1) ) ) { pairs[i].n += weight; pairs[i].sum[0] += weight * val0->f; @@ -1713,7 +1695,7 @@ group_precalc (struct cmd_t_test *cmd ) for(i=0; i< cmd->n_variables ; ++i) { - struct group_proc *ttpr = &cmd->v_variables[i]->p.grp_data; + struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]); /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; @@ -1729,8 +1711,7 @@ group_precalc (struct cmd_t_test *cmd ) for (j=0 ; j < 2 ; ++j) { - struct group_statistics *gs = (struct group_statistics *) - xmalloc (sizeof(struct group_statistics)); + struct group_statistics *gs = xmalloc (sizeof *gs); gs->sum = 0; gs->n = 0; @@ -1764,7 +1745,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) const double weight = dict_get_case_weight(default_dict,c,&bad_weight_warn); - if ( value_is_missing(gv,indep_var) ) + if ( value_is_missing(&indep_var->miss, gv) ) { return 0; } @@ -1776,7 +1757,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) struct variable *v = cmd->v_variables[i]; const union value *val = case_data (c, v->fv); - if (value_is_missing(val,v) ) + if (value_is_missing(&v->miss, val) ) { return 0; } @@ -1789,7 +1770,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) { struct variable *var = cmd->v_variables[i]; const union value *val = case_data (c, var->fv); - struct hsh_table *grp_hash = var->p.grp_data.group_hash; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; struct group_statistics *gs; gs = hsh_find(grp_hash, (void *) gv); @@ -1799,7 +1780,7 @@ group_calc (const struct ccase *c, struct cmd_t_test *cmd) if ( ! gs ) return 0; - if ( !value_is_missing(val,var) ) + if ( !value_is_missing(&var->miss, val) ) { gs->n+=weight; gs->sum+=weight * val->f; @@ -1819,7 +1800,7 @@ group_postcalc ( struct cmd_t_test *cmd ) for(i=0; i< cmd->n_variables ; ++i) { struct variable *var = cmd->v_variables[i]; - struct hsh_table *grp_hash = var->p.grp_data.group_hash; + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; struct hsh_iterator g; struct group_statistics *gs; int count=0; @@ -1926,61 +1907,79 @@ calculate(const struct casefile *cf, void *cmd_) } +short which_group(const struct group_statistics *g, + const struct group_properties *p); /* Return -1 if the id of a is less than b; +1 if greater than and 0 if equal */ static int compare_group_binary(const struct group_statistics *a, const struct group_statistics *b, - struct group_properties *p) + const struct group_properties *p) { - short flag_a; short flag_b; - - assert(p->indep_width == 0 ) ; - + if ( p->criterion == CMP_LE ) { + /* less-than-or-equal comparision is not meaningfull for + alpha variables, so we shouldn't ever arrive here */ + assert(p->indep_width == 0 ) ; + flag_a = ( a->id.f < p->v.critical_value ) ; flag_b = ( b->id.f < p->v.critical_value ) ; } else { - flag_a = ( a->id.f == p->v.critical_value ) ; - flag_b = ( b->id.f == p->v.critical_value ) ; + flag_a = which_group(a, p); + flag_b = which_group(b, p); } - - if ( flag_a == flag_b) - return 0 ; - - return ( flag_a < flag_b); + if (flag_a < flag_b ) + return -1; + + return (flag_a > flag_b); } +/* This is a degenerate case of a hash, since it can only return three possible + values. It's really a comparison, being used as a hash function */ + static unsigned -hash_group_binary(const struct group_statistics *g, struct group_properties *p) +hash_group_binary(const struct group_statistics *g, + const struct group_properties *p) { short flag = -1; - assert(p->indep_width == 0 ) ; - - /* FIXME: should compare union values */ if ( p->criterion == CMP_LE ) { + /* Not meaningfull to do a less than compare for alpha values ? */ + assert(p->indep_width == 0 ) ; flag = ( g->id.f < p->v.critical_value ) ; } else if ( p->criterion == CMP_EQ) { - if ( g->id.f == p->v.g_value[0].f ) - flag = 0 ; - else if ( g->id.f == p->v.g_value[1].f ) - flag = 1; - else - flag = 2; + flag = which_group(g,p); } else assert(0); return flag; } + +/* return 0 if G belongs to group 0, + 1 if it belongs to group 1, + 2 if it belongs to neither group */ +short +which_group(const struct group_statistics *g, + const struct group_properties *p) +{ + + if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_width)) + return 0; + + if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_width)) + return 1; + + return 2; +} +