From: John Darrington Date: Tue, 7 Apr 2009 11:30:23 +0000 (+0800) Subject: Merge commit 'origin/stable' X-Git-Tag: v0.7.3~176^2 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7fbfc32fc3c636959b0a25b3e76609f86519e84a;hp=-c;p=pspp-builds.git Merge commit 'origin/stable' Conflicts: src/language/stats/crosstabs.q src/language/stats/examine.q src/language/stats/frequencies.q src/language/stats/oneway.q tests/command/examine-extremes.sh tests/command/examine.sh --- 7fbfc32fc3c636959b0a25b3e76609f86519e84a diff --combined src/language/stats/binomial.c index b7b672ef,7a0ac722..f4344b76 --- a/src/language/stats/binomial.c +++ b/src/language/stats/binomial.c @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -18,6 -18,7 +18,7 @@@ #include #include + #include #include #include #include @@@ -98,51 -99,41 +99,51 @@@ do_binomial (const struct dictionary *d bool warn = true; const struct one_sample_test *ost = (const struct one_sample_test *) bst; - struct ccase c; + struct ccase *c; - while (casereader_read(input, &c)) + while ((c = casereader_read(input)) != NULL) { int v; - double w = dict_get_case_weight (dict, &c, &warn); + double w = dict_get_case_weight (dict, c, &warn); for (v = 0 ; v < ost->n_vars ; ++v ) { const struct variable *var = ost->vars[v]; - const union value *value = case_data (&c, var); + const union value *value = case_data (c, var); int width = var_get_width (var); if (var_is_value_missing (var, value, exclude)) - break; + continue; - if ( NULL == cat1[v].value ) + if (bst->cutpoint != SYSMIS) { - cat1[v].value = value_dup (value, width); - cat1[v].count = w; + if ( compare_values_short (cat1[v].value, value, var) >= 0 ) + cat1[v].count += w; + else + cat2[v].count += w; } - else if ( 0 == compare_values (cat1[v].value, value, width)) - cat1[v].count += w; - else if ( NULL == cat2[v].value ) + else { - cat2[v].value = value_dup (value, width); - cat2[v].count = w; + if ( NULL == cat1[v].value ) + { + cat1[v].value = value_dup (value, width); + cat1[v].count = w; + } + else if ( 0 == compare_values_short (cat1[v].value, value, var)) + cat1[v].count += w; + else if ( NULL == cat2[v].value ) + { + cat2[v].value = value_dup (value, width); + cat2[v].count = w; + } + else if ( 0 == compare_values_short (cat2[v].value, value, var)) + cat2[v].count += w; + else if ( bst->category1 == SYSMIS) + msg (ME, _("Variable %s is not dichotomous"), var_get_name (var)); } - else if ( 0 == compare_values (cat2[v].value, value, width)) - cat2[v].count += w; - else if ( bst->category1 == SYSMIS) - msg (ME, _("Variable %s is not dichotomous"), var_get_name (var)); } - case_destroy (&c); + case_unref (c); } return casereader_destroy (input); } @@@ -153,47 -144,38 +154,52 @@@ voi binomial_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, - const struct npar_test *test) + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) { int v; + const struct dictionary *dict = dataset_dict (ds); const struct binomial_test *bst = (const struct binomial_test *) test; const struct one_sample_test *ost = (const struct one_sample_test*) test; struct freq_mutable *cat1 = xzalloc (sizeof (*cat1) * ost->n_vars); struct freq_mutable *cat2 = xzalloc (sizeof (*cat1) * ost->n_vars); - assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) ); + assert ((bst->category1 == SYSMIS) == (bst->category2 == SYSMIS) || bst->cutpoint != SYSMIS); - if ( bst->category1 != SYSMIS ) + if ( bst->cutpoint != SYSMIS ) { + int i; + union value v; + v.f = bst->cutpoint; + for (i = 0; i < ost->n_vars; i++) + cat1[i].value = value_dup (&v, 0); + } + else if ( bst->category1 != SYSMIS ) + { + int i; union value v; v.f = bst->category1; - cat1->value = value_dup (&v, 0); + for (i = 0; i < ost->n_vars; i++) + cat1[i].value = value_dup (&v, 0); } if ( bst->category2 != SYSMIS ) { + int i; union value v; v.f = bst->category2; - cat2->value = value_dup (&v, 0); + for (i = 0; i < ost->n_vars; i++) + cat2[i].value = value_dup (&v, 0); } - if (do_binomial (dataset_dict(ds), input, bst, cat1, cat2, exclude)) + if (do_binomial (dict, input, bst, cat1, cat2, exclude)) { + const struct variable *wvar = dict_get_weight (dict); + const struct fmt_spec *wfmt = wvar ? + var_get_print_format (wvar) : & F_8_0; + struct tab_table *table = tab_create (7, ost->n_vars * 3 + 1, 0); tab_dim (table, tab_natural_dimensions); @@@ -215,15 -197,8 +221,15 @@@ ds_init_empty (&catstr1); ds_init_empty (&catstr2); - var_append_value_name (var, cat1[v].value, &catstr1); - var_append_value_name (var, cat2[v].value, &catstr2); + if ( bst->cutpoint != SYSMIS) + { + ds_put_format (&catstr1, "<= %g", bst->cutpoint); + } + else + { + var_append_value_name (var, cat1[v].value, &catstr1); + var_append_value_name (var, cat2[v].value, &catstr2); + } tab_hline (table, TAL_1, 0, tab_nc (table) -1, 1 + v * 3); @@@ -234,30 -209,31 +240,31 @@@ tab_text (table, 1, 3 + v * 3, TAB_LEFT, _("Total")); /* Test Prop */ - tab_float (table, 5, 1 + v * 3, TAB_NONE, bst->p, 8, 3); + tab_double (table, 5, 1 + v * 3, TAB_NONE, bst->p, NULL); /* Category labels */ tab_text (table, 2, 1 + v * 3, TAB_NONE, ds_cstr (&catstr1)); tab_text (table, 2, 2 + v * 3, TAB_NONE, ds_cstr (&catstr2)); /* Observed N */ - tab_float (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, 8, 0); - tab_float (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, 8, 0); + tab_double (table, 3, 1 + v * 3, TAB_NONE, cat1[v].count, wfmt); + tab_double (table, 3, 2 + v * 3, TAB_NONE, cat2[v].count, wfmt); n_total = cat1[v].count + cat2[v].count; - tab_float (table, 3, 3 + v * 3, TAB_NONE, n_total, 8, 0); + tab_double (table, 3, 3 + v * 3, TAB_NONE, n_total, wfmt); /* Observed Proportions */ - tab_float (table, 4, 1 + v * 3, TAB_NONE, - cat1[v].count / n_total, 8, 3); - tab_float (table, 4, 2 + v * 3, TAB_NONE, - cat2[v].count / n_total, 8, 3); - tab_float (table, 4, 3 + v * 3, TAB_NONE, - (cat1[v].count + cat2[v].count) / n_total, 8, 2); + tab_double (table, 4, 1 + v * 3, TAB_NONE, + cat1[v].count / n_total, NULL); + tab_double (table, 4, 2 + v * 3, TAB_NONE, + cat2[v].count / n_total, NULL); + + tab_double (table, 4, 3 + v * 3, TAB_NONE, - (cat1[v].count + cat2[v].count) / n_total, wfmt); ++ (cat1[v].count + cat2[v].count) / n_total, NULL); /* Significance */ sig = calculate_binomial (cat1[v].count, cat2[v].count, bst->p); - tab_float (table, 6, 1 + v * 3, TAB_NONE, sig, 8, 3); + tab_double (table, 6, 1 + v * 3, TAB_NONE, sig, NULL); ds_destroy (&catstr1); ds_destroy (&catstr2); diff --combined src/language/stats/chisquare.c index 19496d7e,6287977b..6cb7fc0b --- a/src/language/stats/chisquare.c +++ b/src/language/stats/chisquare.c @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006, 2007 Free Software Foundation, Inc. + Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -21,6 -21,7 +21,7 @@@ #include #include + #include #include #include #include @@@ -57,7 -58,7 +58,7 @@@ create_freq_hash_with_range (const stru { bool warn = true; float i_d; - struct ccase c; + struct ccase *c; struct hsh_table *freq_hash = hsh_create (4, compare_freq, hash_freq, @@@ -78,21 -79,21 +79,21 @@@ hsh_insert (freq_hash, fr); } - while (casereader_read (input, &c)) + while ((c = casereader_read (input)) != NULL) { union value obs_value; struct freq **existing_fr; struct freq *fr = xmalloc(sizeof (*fr)); - fr->value = case_data (&c, var); + fr->value = case_data (c, var); - fr->count = dict_get_case_weight (dict, &c, &warn); + fr->count = dict_get_case_weight (dict, c, &warn); obs_value.f = trunc (fr->value->f); if ( obs_value.f < lo || obs_value.f > hi) { free (fr); - case_destroy (&c); + case_unref (c); continue; } @@@ -107,7 -108,7 +108,7 @@@ (*existing_fr)->count += fr->count; free (fr); - case_destroy (&c); + case_unref (c); } if (casereader_destroy (input)) return freq_hash; @@@ -130,20 -131,20 +131,20 @@@ create_freq_hash (const struct dictiona const struct variable *var) { bool warn = true; - struct ccase c; + struct ccase *c; struct hsh_table *freq_hash = hsh_create (4, compare_freq, hash_freq, free_freq_mutable_hash, (void *) var); - for (; casereader_read (input, &c); case_destroy (&c)) + for (; (c = casereader_read (input)) != NULL; case_unref (c)) { struct freq **existing_fr; struct freq *fr = xmalloc(sizeof (*fr)); - fr->value = case_data (&c, var); + fr->value = case_data (c, var); - fr->count = dict_get_case_weight (dict, &c, &warn); + fr->count = dict_get_case_weight (dict, c, &warn); existing_fr = (struct freq **) hsh_probe (freq_hash, fr); if ( *existing_fr) @@@ -269,8 -270,8 +270,8 @@@ create_combo_frequency_table (const str } for ( i = test->lo ; i <= test->hi ; ++i ) - tab_float (table, 0, 2 + i - test->lo, - TAB_LEFT, 1 + i - test->lo, 8, 0); + tab_fixed (table, 0, 2 + i - test->lo, + TAB_LEFT, 1 + i - test->lo, 8, 0); tab_headers (table, 1, 0, 2, 0); @@@ -320,9 -321,7 +321,9 @@@ voi chisquare_execute (const struct dataset *ds, struct casereader *input, enum mv_class exclude, - const struct npar_test *test) + const struct npar_test *test, + bool exact UNUSED, + double timer UNUSED) { const struct dictionary *dict = dataset_dict (ds); int v, i; @@@ -330,6 -329,9 +331,9 @@@ struct chisquare_test *cst = (struct chisquare_test *) test; int n_cells = 0; double total_expected = 0.0; + const struct variable *wvar = dict_get_weight (dict); + const struct fmt_spec *wfmt = wvar ? + var_get_print_format (wvar) : & F_8_0; double *df = xzalloc (sizeof (*df) * ost->n_vars); double *xsq = xzalloc (sizeof (*df) * ost->n_vars); @@@ -346,8 -348,7 +350,8 @@@ struct hsh_table *freq_hash = NULL; struct casereader *reader = casereader_create_filter_missing (casereader_clone (input), - &ost->vars[v], 1, exclude, NULL); + &ost->vars[v], 1, exclude, + NULL, NULL); struct tab_table *freq_table = create_variable_frequency_table(dict, reader, cst, v, &freq_hash); @@@ -378,28 -379,28 +382,28 @@@ /* The observed N */ - tab_float (freq_table, 1, i + 1, TAB_NONE, - ff[i]->count, 8, 0); + tab_double (freq_table, 1, i + 1, TAB_NONE, + ff[i]->count, wfmt); if ( cst->n_expected > 0 ) exp = cst->expected[i] * total_obs / total_expected ; else exp = total_obs / (double) n_cells; - tab_float (freq_table, 2, i + 1, TAB_NONE, - exp, 8, 2); + tab_double (freq_table, 2, i + 1, TAB_NONE, + exp, NULL); /* The residual */ - tab_float (freq_table, 3, i + 1, TAB_NONE, - ff[i]->count - exp, 8, 2); + tab_double (freq_table, 3, i + 1, TAB_NONE, + ff[i]->count - exp, NULL); xsq[v] += (ff[i]->count - exp) * (ff[i]->count - exp) / exp; } df[v] = n_cells - 1.0; - tab_float (freq_table, 1, i + 1, TAB_NONE, - total_obs, 8, 0); + tab_double (freq_table, 1, i + 1, TAB_NONE, + total_obs, wfmt); tab_submit (freq_table); @@@ -417,8 -418,7 +421,8 @@@ double total_obs = 0.0; struct casereader *reader = casereader_create_filter_missing (casereader_clone (input), - &ost->vars[v], 1, exclude, NULL); + &ost->vars[v], 1, exclude, + NULL, NULL); struct hsh_table *freq_hash = create_freq_hash_with_range (dict, reader, ost->vars[v], cst->lo, cst->hi); @@@ -450,8 -450,8 +454,8 @@@ ds_destroy (&str); /* The observed N */ - tab_float (freq_table, v * 4 + 2, i + 2 , TAB_NONE, - ff[i]->count, 8, 0); + tab_double (freq_table, v * 4 + 2, i + 2 , TAB_NONE, + ff[i]->count, wfmt); if ( cst->n_expected > 0 ) exp = cst->expected[i] * total_obs / total_expected ; @@@ -459,19 -459,19 +463,19 @@@ exp = total_obs / (double) hsh_count (freq_hash); /* The expected N */ - tab_float (freq_table, v * 4 + 3, i + 2 , TAB_NONE, - exp, 8, 2); + tab_double (freq_table, v * 4 + 3, i + 2 , TAB_NONE, + exp, NULL); /* The residual */ - tab_float (freq_table, v * 4 + 4, i + 2 , TAB_NONE, - ff[i]->count - exp, 8, 2); + tab_double (freq_table, v * 4 + 4, i + 2 , TAB_NONE, + ff[i]->count - exp, NULL); xsq[v] += (ff[i]->count - exp) * (ff[i]->count - exp) / exp; } - tab_float (freq_table, v * 4 + 2, tab_nr (freq_table) - 1, TAB_NONE, - total_obs, 8, 0); + tab_double (freq_table, v * 4 + 2, tab_nr (freq_table) - 1, TAB_NONE, + total_obs, wfmt); df[v] = n_cells - 1.0; @@@ -494,11 -494,11 +498,11 @@@ tab_text (stats_table, 1 + v, 0, TAB_CENTER, var_get_name (var)); - tab_float (stats_table, 1 + v, 1, TAB_NONE, xsq[v], 8,3); - tab_float (stats_table, 1 + v, 2, TAB_NONE, df[v], 8,0); + tab_double (stats_table, 1 + v, 1, TAB_NONE, xsq[v], NULL); + tab_fixed (stats_table, 1 + v, 2, TAB_NONE, df[v], 8, 0); - tab_float (stats_table, 1 + v, 3, TAB_NONE, - gsl_cdf_chisq_Q (xsq[v], df[v]), 8,3); + tab_double (stats_table, 1 + v, 3, TAB_NONE, + gsl_cdf_chisq_Q (xsq[v], df[v]), NULL); } tab_submit (stats_table); } diff --combined src/language/stats/crosstabs.q index 68d11e30,186ee12b..309b27fc --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -177,9 -177,9 +177,10 @@@ static struct pool *pl_col; /* For colu static int internal_cmd_crosstabs (struct lexer *lexer, struct dataset *ds); static void precalc (struct casereader *, const struct dataset *); -static void calc_general (struct ccase *, const struct dataset *); -static void calc_integer (struct ccase *, const struct dataset *); +static void calc_general (const struct ccase *, const struct dataset *); +static void calc_integer (const struct ccase *, const struct dataset *); - static void postcalc (void); + static void postcalc (const struct dataset *); ++ static void submit (struct tab_table *); static void format_short (char *s, const struct fmt_spec *fp, @@@ -305,20 -305,20 +306,20 @@@ internal_cmd_crosstabs (struct lexer *l grouper = casegrouper_create_splits (input, dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) { - struct ccase c; + struct ccase *c; precalc (group, ds); - for (; casereader_read (group, &c); case_destroy (&c)) + for (; (c = casereader_read (group)) != NULL; case_unref (c)) { if (mode == GENERAL) - calc_general (&c, ds); + calc_general (c, ds); else - calc_integer (&c, ds); + calc_integer (c, ds); } casereader_destroy (group); - postcalc (); + postcalc (ds); } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; @@@ -518,13 -518,12 +519,13 @@@ static unsigned hash_table_entry (cons static void precalc (struct casereader *input, const struct dataset *ds) { - struct ccase c; + struct ccase *c; - if (casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c != NULL) { - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); } if (mode == GENERAL) @@@ -599,7 -598,7 +600,7 @@@ /* Form crosstabulations for general mode. */ static void -calc_general (struct ccase *c, const struct dataset *ds) +calc_general (const struct ccase *c, const struct dataset *ds) { /* Missing values to exclude. */ enum mv_class exclude = (cmd.miss == CRS_TABLE ? MV_ANY @@@ -673,7 -672,7 +674,7 @@@ } static void -calc_integer (struct ccase *c, const struct dataset *ds) +calc_integer (const struct ccase *c, const struct dataset *ds) { bool bad_warn = true; @@@ -789,12 -788,13 +790,13 @@@ static void enum_var_values (struct tab int var_idx, union value **values, int *value_cnt); static void output_pivot_table (struct table_entry **, struct table_entry **, + const struct dictionary *, double **, double **, double **, int *, int *, int *); - static void make_summary_table (void); + static void make_summary_table (const struct dictionary *); static void - postcalc (void) + postcalc (const struct dataset *ds) { if (mode == GENERAL) { @@@ -802,7 -802,7 +804,7 @@@ sorted_tab = (struct table_entry **) hsh_sort (gen_tab); } - make_summary_table (); + make_summary_table (dataset_dict (ds)); /* Identify all the individual crosstabulation tables, and deal with them. */ @@@ -819,7 -819,8 +821,8 @@@ if (pe == NULL) break; - output_pivot_table (pb, pe, &mat, &row_tot, &col_tot, + output_pivot_table (pb, pe, dataset_dict (ds), + &mat, &row_tot, &col_tot, &maxrows, &maxcols, &maxcells); pb = pe; @@@ -842,11 -843,13 +845,13 @@@ } } - static void insert_summary (struct tab_table *, int tab_index, double valid); + static void insert_summary (struct tab_table *, int tab_index, + const struct dictionary *, + double valid); /* Output a table summarizing the cases processed. */ static void - make_summary_table (void) + make_summary_table (const struct dictionary *dict) { struct tab_table *summary; @@@ -885,7 -888,7 +890,7 @@@ break; while (cur_tab < (*pb)->table) - insert_summary (summary, cur_tab++, 0.); + insert_summary (summary, cur_tab++, dict, 0.); if (mode == GENERAL) for (valid = 0.; pb < pe; pb++) @@@ -906,13 -909,13 +911,13 @@@ valid += *data++; } } - insert_summary (summary, cur_tab++, valid); + insert_summary (summary, cur_tab++, dict, valid); pb = pe; } while (cur_tab < nxtab) - insert_summary (summary, cur_tab++, 0.); + insert_summary (summary, cur_tab++, dict, 0.); submit (summary); } @@@ -920,10 -923,15 +925,15 @@@ /* Inserts a line into T describing the crosstabulation at index TAB_INDEX, which has VALID valid observations. */ static void - insert_summary (struct tab_table *t, int tab_index, double valid) + insert_summary (struct tab_table *t, int tab_index, + const struct dictionary *dict, + double valid) { struct crosstab *x = xtab[tab_index]; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + tab_hline (t, TAL_1, 0, 6, 0); /* Crosstabulation name. */ @@@ -956,7 -964,7 +966,7 @@@ for (i = 0; i < 3; i++) { - tab_float (t, i * 2 + 1, 0, TAB_RIGHT, n[i], 8, 0); + tab_double (t, i * 2 + 1, 0, TAB_RIGHT, n[i], wfmt); tab_text (t, i * 2 + 2, 0, TAB_RIGHT | TAT_PRINTF, "%.1f%%", n[i] / n[2] * 100.); } @@@ -1005,9 -1013,9 +1015,9 @@@ static double W; /* Grand total. * static void display_dimensions (struct tab_table *, int first_difference, struct table_entry *); static void display_crosstabulation (void); - static void display_chisq (void); - static void display_symmetric (void); - static void display_risk (void); + static void display_chisq (const struct dictionary *); + static void display_symmetric (const struct dictionary *); + static void display_risk (const struct dictionary *); static void display_directional (void); static void crosstabs_dim (struct tab_table *, struct outp_driver *); static void table_value_missing (struct tab_table *table, int c, int r, @@@ -1020,6 -1028,7 +1030,7 @@@ static void delete_missing (void) hold *MAXROWS entries. */ static void output_pivot_table (struct table_entry **pb, struct table_entry **pe, + const struct dictionary *dict, double **matp, double **row_totp, double **col_totp, int *maxrows, int *maxcols, int *maxcells) { @@@ -1426,11 -1435,11 +1437,11 @@@ if (cmd.miss == CRS_REPORT) delete_missing (); if (chisq) - display_chisq (); + display_chisq (dict); if (sym) - display_symmetric (); + display_symmetric (dict); if (risk) - display_risk (); + display_risk (dict); if (direct) display_directional (); @@@ -1970,8 -1979,11 +1981,11 @@@ static void calc_chisq (double[N_CHISQ] /* Display chi-square statistics. */ static void - display_chisq (void) + display_chisq (const struct dictionary *dict) { + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + static const char *chisq_stats[N_CHISQ] = { N_("Pearson Chi-Square"), @@@ -2001,22 -2013,22 +2015,22 @@@ tab_text (chisq, 0, 0, TAB_LEFT, gettext (chisq_stats[i])); if (i != 2) { - tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3); - tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0); - tab_float (chisq, 3, 0, TAB_RIGHT, - gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3); + tab_double (chisq, 1, 0, TAB_RIGHT, chisq_v[i], NULL); + tab_double (chisq, 2, 0, TAB_RIGHT, df[i], wfmt); + tab_double (chisq, 3, 0, TAB_RIGHT, + gsl_cdf_chisq_Q (chisq_v[i], df[i]), NULL); } else { chisq_fisher = 1; - tab_float (chisq, 4, 0, TAB_RIGHT, fisher2, 8, 3); - tab_float (chisq, 5, 0, TAB_RIGHT, fisher1, 8, 3); + tab_double (chisq, 4, 0, TAB_RIGHT, fisher2, NULL); + tab_double (chisq, 5, 0, TAB_RIGHT, fisher1, NULL); } tab_next_row (chisq); } tab_text (chisq, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_float (chisq, 1, 0, TAB_RIGHT, W, 8, 0); + tab_double (chisq, 1, 0, TAB_RIGHT, W, wfmt); tab_next_row (chisq); tab_offset (chisq, 0, -1); @@@ -2027,8 -2039,11 +2041,11 @@@ static int calc_symmetric (double[N_SYM /* Display symmetric measures. */ static void - display_symmetric (void) + display_symmetric (const struct dictionary *dict) { + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + static const char *categories[] = { N_("Nominal by Nominal"), @@@ -2076,17 -2091,17 +2093,17 @@@ } tab_text (sym, 1, 0, TAB_LEFT, gettext (stats[i])); - tab_float (sym, 2, 0, TAB_RIGHT, sym_v[i], 8, 3); + tab_double (sym, 2, 0, TAB_RIGHT, sym_v[i], NULL); if (sym_ase[i] != SYSMIS) - tab_float (sym, 3, 0, TAB_RIGHT, sym_ase[i], 8, 3); + tab_double (sym, 3, 0, TAB_RIGHT, sym_ase[i], NULL); if (sym_t[i] != SYSMIS) - tab_float (sym, 4, 0, TAB_RIGHT, sym_t[i], 8, 3); - /*tab_float (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), 8, 3);*/ + tab_double (sym, 4, 0, TAB_RIGHT, sym_t[i], NULL); + /*tab_double (sym, 5, 0, TAB_RIGHT, normal_sig (sym_v[i]), NULL);*/ tab_next_row (sym); } tab_text (sym, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_float (sym, 2, 0, TAB_RIGHT, W, 8, 0); + tab_double (sym, 2, 0, TAB_RIGHT, W, wfmt); tab_next_row (sym); tab_offset (sym, 0, -1); @@@ -2096,8 -2111,11 +2113,11 @@@ static int calc_risk (double[], double[ /* Display risk estimate. */ static void - display_risk (void) + display_risk (const struct dictionary *dict) { + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; + char buf[256]; double risk_v[3], lower[3], upper[3]; union value c[2]; @@@ -2138,14 -2156,14 +2158,14 @@@ } tab_text (risk, 0, 0, TAB_LEFT, buf); - tab_float (risk, 1, 0, TAB_RIGHT, risk_v[i], 8, 3); - tab_float (risk, 2, 0, TAB_RIGHT, lower[i], 8, 3); - tab_float (risk, 3, 0, TAB_RIGHT, upper[i], 8, 3); + tab_double (risk, 1, 0, TAB_RIGHT, risk_v[i], NULL); + tab_double (risk, 2, 0, TAB_RIGHT, lower[i], NULL); + tab_double (risk, 3, 0, TAB_RIGHT, upper[i], NULL); tab_next_row (risk); } tab_text (risk, 0, 0, TAB_LEFT, _("N of Valid Cases")); - tab_float (risk, 1, 0, TAB_RIGHT, W, 8, 0); + tab_double (risk, 1, 0, TAB_RIGHT, W, wfmt); tab_next_row (risk); tab_offset (risk, 0, -1); @@@ -2258,12 -2276,12 +2278,12 @@@ display_directional (void } } - tab_float (direct, 3, 0, TAB_RIGHT, direct_v[i], 8, 3); + tab_double (direct, 3, 0, TAB_RIGHT, direct_v[i], NULL); if (direct_ase[i] != SYSMIS) - tab_float (direct, 4, 0, TAB_RIGHT, direct_ase[i], 8, 3); + tab_double (direct, 4, 0, TAB_RIGHT, direct_ase[i], NULL); if (direct_t[i] != SYSMIS) - tab_float (direct, 5, 0, TAB_RIGHT, direct_t[i], 8, 3); - /*tab_float (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), 8, 3);*/ + tab_double (direct, 5, 0, TAB_RIGHT, direct_t[i], NULL); + /*tab_double (direct, 6, 0, TAB_RIGHT, normal_sig (direct_v[i]), NULL);*/ tab_next_row (direct); } @@@ -2456,7 -2474,7 +2476,7 @@@ calc_r (double *X, double *Y, double *r for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++) { sum_Xr += X[i] * row_tot[i]; - sum_X2r += X[i] * X[i] * row_tot[i]; + sum_X2r += pow2 (X[i]) * row_tot[i]; } Xbar = sum_Xr / W; @@@ -2468,11 -2486,11 +2488,11 @@@ Ybar = sum_Yc / W; S = sum_XYf - sum_Xr * sum_Yc / W; - SX = sum_X2r - sum_Xr * sum_Xr / W; - SY = sum_Y2c - sum_Yc * sum_Yc / W; + SX = sum_X2r - pow2 (sum_Xr) / W; + SY = sum_Y2c - pow2 (sum_Yc) / W; T = sqrt (SX * SY); *r = S / T; - *ase_0 = sqrt ((sum_X2Y2f - (sum_XYf * sum_XYf) / W) / (sum_X2r * sum_Y2c)); + *ase_0 = sqrt ((sum_X2Y2f - pow2 (sum_XYf) / W) / (sum_X2r * sum_Y2c)); { double s, c, y, t; @@@ -2562,9 -2580,9 +2582,9 @@@ calc_symmetric (double v[N_SYMMETRIC], Dr = Dc = W * W; for (r = 0; r < n_rows; r++) - Dr -= row_tot[r] * row_tot[r]; + Dr -= pow2 (row_tot[r]); for (c = 0; c < n_cols; c++) - Dc -= col_tot[c] * col_tot[c]; + Dc -= pow2 (col_tot[c]); } { @@@ -3073,10 -3091,10 +3093,10 @@@ calc_directional (double v[N_DIRECTIONA } for (sum_ri2 = 0., i = 0; i < n_rows; i++) - sum_ri2 += row_tot[i] * row_tot[i]; + sum_ri2 += pow2 (row_tot[i]); for (sum_cj2 = 0., j = 0; j < n_cols; j++) - sum_cj2 += col_tot[j] * col_tot[j]; + sum_cj2 += pow2 (col_tot[j]); v[3] = (W * sum_fij2_ci - sum_ri2) / (W * W - sum_ri2); v[4] = (W * sum_fij2_ri - sum_cj2) / (W * W - sum_cj2); @@@ -3166,9 -3184,9 +3186,9 @@@ for (sum_Xr = sum_X2r = 0., i = 0; i < n_rows; i++) { sum_Xr += rows[i].f * row_tot[i]; - sum_X2r += rows[i].f * rows[i].f * row_tot[i]; + sum_X2r += pow2 (rows[i].f) * row_tot[i]; } - SX = sum_X2r - sum_Xr * sum_Xr / W; + SX = sum_X2r - pow2 (sum_Xr) / W; for (SXW = 0., j = 0; j < n_cols; j++) { @@@ -3176,7 -3194,7 +3196,7 @@@ for (cum = 0., i = 0; i < n_rows; i++) { - SXW += rows[i].f * rows[i].f * mat[j + i * n_cols]; + SXW += pow2 (rows[i].f) * mat[j + i * n_cols]; cum += rows[i].f * mat[j + i * n_cols]; } @@@ -3193,7 -3211,7 +3213,7 @@@ for (sum_Yc = sum_Y2c = 0., i = 0; i < n_cols; i++) { sum_Yc += cols[i].f * col_tot[i]; - sum_Y2c += cols[i].f * cols[i].f * col_tot[i]; + sum_Y2c += pow2 (cols[i].f) * col_tot[i]; } SY = sum_Y2c - sum_Yc * sum_Yc / W; @@@ -3203,7 -3221,7 +3223,7 @@@ for (cum = 0., j = 0; j < n_cols; j++) { - SYW += cols[j].f * cols[j].f * mat[j + i * n_cols]; + SYW += pow2 (cols[j].f) * mat[j + i * n_cols]; cum += cols[j].f * mat[j + i * n_cols]; } diff --combined src/language/stats/descriptives.c index 68b19300,e26eadf9..23bb1aa8 --- a/src/language/stats/descriptives.c +++ b/src/language/stats/descriptives.c @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -583,7 -583,7 +583,7 @@@ dump_z_table (struct dsc_proc *dsc (either system or user-missing values that weren't included). */ static int -descriptives_trns_proc (void *trns_, struct ccase * c, +descriptives_trns_proc (void *trns_, struct ccase **c, casenumber case_idx UNUSED) { struct dsc_trns *t = trns_; @@@ -596,7 -596,7 +596,7 @@@ assert(t->vars); for (vars = t->vars; vars < t->vars + t->var_cnt; vars++) { - double score = case_num (c, *vars); + double score = case_num (*c, *vars); if (var_is_num_missing (*vars, score, t->exclude)) { all_sysmis = 1; @@@ -605,11 -605,10 +605,11 @@@ } } + *c = case_unshare (*c); for (z = t->z_scores; z < t->z_scores + t->z_score_cnt; z++) { - double input = case_num (c, z->src_var); - double *output = &case_data_rw (c, z->z_var)->f; + double input = case_num (*c, z->src_var); + double *output = &case_data_rw (*c, z->z_var)->f; if (z->mean == SYSMIS || z->std_dev == SYSMIS || all_sysmis || var_is_num_missing (z->src_var, input, t->exclude)) @@@ -696,17 -695,16 +696,17 @@@ calc_descriptives (struct dsc_proc *dsc struct dataset *ds) { struct casereader *pass1, *pass2; - struct ccase c; + struct ccase *c; size_t i; - if (!casereader_peek (group, 0, &c)) + c = casereader_peek (group, 0); + if (c == NULL) { casereader_destroy (group); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); group = casereader_create_filter_weight (group, dataset_dict (ds), NULL, NULL); @@@ -728,12 -726,12 +728,12 @@@ dsc->valid = 0.; /* First pass to handle most of the work. */ - for (; casereader_read (pass1, &c); case_destroy (&c)) + for (; (c = casereader_read (pass1)) != NULL; case_unref (c)) { - double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL); + double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); /* Check for missing values. */ - if (listwise_missing (dsc, &c)) + if (listwise_missing (dsc, c)) { dsc->missing_listwise += weight; if (dsc->missing_type == DSC_LISTWISE) @@@ -744,7 -742,7 +744,7 @@@ for (i = 0; i < dsc->var_cnt; i++) { struct dsc_var *dv = &dsc->vars[i]; - double x = case_num (&c, dv->v); + double x = case_num (c, dv->v); if (var_is_num_missing (dv->v, x, dsc->exclude)) { @@@ -770,18 -768,18 +770,18 @@@ /* Second pass for higher-order moments. */ if (dsc->max_moment > MOMENT_MEAN) { - for (; casereader_read (pass2, &c); case_destroy (&c)) + for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) { - double weight = dict_get_case_weight (dataset_dict (ds), &c, NULL); + double weight = dict_get_case_weight (dataset_dict (ds), c, NULL); /* Check for missing values. */ - if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, &c)) + if (dsc->missing_type == DSC_LISTWISE && listwise_missing (dsc, c)) continue; for (i = 0; i < dsc->var_cnt; i++) { struct dsc_var *dv = &dsc->vars[i]; - double x = case_num (&c, dv->v); + double x = case_num (c, dv->v); if (var_is_num_missing (dv->v, x, dsc->exclude)) continue; @@@ -908,9 -906,10 +908,10 @@@ display (struct dsc_proc *dsc tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->valid); if (dsc->format == DSC_SERIAL) tab_text (t, nc++, i + 1, TAT_PRINTF, "%g", dv->missing); + for (j = 0; j < DSC_N_STATS; j++) if (dsc->show_stats & (1ul << j)) - tab_float (t, nc++, i + 1, TAB_NONE, dv->stats[j], 10, 3); + tab_double (t, nc++, i + 1, TAB_NONE, dv->stats[j], NULL); } tab_title (t, _("Valid cases = %g; cases with missing value(s) = %g."), diff --combined src/language/stats/examine.q index 2649968b,febb60fe..51f28320 --- a/src/language/stats/examine.q +++ b/src/language/stats/examine.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 2004, 2009 Free Software Foundation, Inc. + Copyright (C) 2004, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -22,23 -22,14 +22,23 @@@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include +#include #include #include +#include #include #include -#include #include #include #include @@@ -47,7 -38,9 +47,7 @@@ #include #include #include -#include #include -#include #include #include #include @@@ -64,7 -57,6 +64,7 @@@ #include #include #include +#include /* (specification) "EXAMINE" (xmn_): @@@ -72,8 -64,8 +72,8 @@@ +total=custom; +nototal=custom; missing=miss:pairwise/!listwise, - rep:report/!noreport, - incl:include/!exclude; + rep:report/!noreport, + incl:include/!exclude; +compare=cmp:variables/!groups; +percentiles=custom; +id=var; @@@ -87,143 -79,74 +87,144 @@@ /* (functions) */ - static struct cmd_examine cmd; static const struct variable **dependent_vars; - static size_t n_dependent_vars; +/* PERCENTILES */ + +static subc_list_double percentile_list; +static enum pc_alg percentile_algorithm; -struct factor +struct factor_metrics { - /* The independent variable */ - struct variable *indep_var[2]; + struct moments1 *moments; + + struct percentile **ptl; + size_t n_ptiles; + + struct statistic *tukey_hinges; + struct statistic *box_whisker; + struct statistic *trimmed_mean; + struct statistic *histogram; + struct order_stats *np; + + /* Three quartiles indexing into PTL */ + struct percentile **quartiles; + + /* A reader sorted in ASCENDING order */ + struct casereader *up_reader; + + /* The minimum value of all the weights */ + double cmin; + + /* Sum of all weights, including those for missing values */ + double n; + + /* Sum of weights of non_missing values */ + double n_valid; + double mean; - /* Hash table of factor stats indexed by 2 values */ - struct hsh_table *fstats; + double variance; - /* The hash table after it has been crunched */ - struct factor_statistics **fs; + double skewness; - struct factor *next; + double kurtosis; + double se_mean; + + struct extrema *minima; + struct extrema *maxima; }; -/* Linked list of factors */ -static struct factor *factors = 0; +struct factor_result +{ + struct ll ll; -static struct metrics *totals = 0; + union value *value[2]; -/* Parse the clause specifying the factors */ -static int examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *dict, struct cmd_examine *cmd); + /* An array of factor metrics, one for each variable */ + struct factor_metrics *metrics; +}; +struct xfactor +{ + /* We need to make a list of this structure */ + struct ll ll; + /* The independent variable */ + const struct variable const* indep_var[2]; -/* Output functions */ -static void show_summary (const struct variable **dependent_var, int n_dep_var, - const struct dictionary *dict, - const struct factor *f); + /* A list of results for this factor */ + struct ll_list result_list ; +}; -static void show_extremes (const struct variable **dependent_var, - int n_dep_var, - const struct factor *factor, - int n_extremities); -static void show_descriptives (const struct variable **dependent_var, - int n_dep_var, - struct factor *factor); +static void +factor_destroy (struct xfactor *fctr) +{ + struct ll *ll = ll_head (&fctr->result_list); + while (ll != ll_null (&fctr->result_list)) + { + int v; + struct factor_result *result = + ll_data (ll, struct factor_result, ll); -static void show_percentiles (const struct variable **dependent_var, - int n_dep_var, - struct factor *factor); + for (v = 0; v < n_dependent_vars; ++v) + { + int i; + moments1_destroy (result->metrics[v].moments); + extrema_destroy (result->metrics[v].minima); + extrema_destroy (result->metrics[v].maxima); + statistic_destroy (result->metrics[v].trimmed_mean); + statistic_destroy (result->metrics[v].tukey_hinges); + statistic_destroy (result->metrics[v].box_whisker); + statistic_destroy (result->metrics[v].histogram); + for (i = 0 ; i < result->metrics[v].n_ptiles; ++i) + statistic_destroy ((struct statistic *) result->metrics[v].ptl[i]); + free (result->metrics[v].ptl); + free (result->metrics[v].quartiles); + casereader_destroy (result->metrics[v].up_reader); + } + + free (result->value[0]); + free (result->value[1]); + free (result->metrics); + ll = ll_next (ll); + free (result); + } +} +static struct xfactor level0_factor; +static struct ll_list factor_list; + +/* Parse the clause specifying the factors */ +static int examine_parse_independent_vars (struct lexer *lexer, + const struct dictionary *dict, + struct cmd_examine *cmd); +/* Output functions */ +static void show_summary (const struct variable **dependent_var, int n_dep_var, ++ const struct dictionary *dict, + const struct xfactor *f); -void np_plot (const struct metrics *m, const char *factorname); +static void show_descriptives (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); -void box_plot_group (const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id - ) ; +static void show_percentiles (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); -void box_plot_variables (const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id - ); +static void show_extremes (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *f); + @@@ -231,7 -154,7 +232,7 @@@ static void run_examine (struct cmd_examine *, struct casereader *, struct dataset *); - static void output_examine (void); + static void output_examine (const struct dictionary *dict); void factor_calc (const struct ccase *c, int case_no, @@@ -240,24 -163,34 +241,24 @@@ /* Represent a factor as a string, so it can be printed in a human readable fashion */ -static void factor_to_string (const struct factor *fctr, - const struct factor_statistics *fs, - const struct variable *var, - struct string *str - ); +static void factor_to_string (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str); /* Represent a factor as a string, so it can be printed in a human readable fashion, but sacrificing some readablility for the sake of brevity */ -static void factor_to_string_concise (const struct factor *fctr, - const struct factor_statistics *fs, - struct string *); - +static void +factor_to_string_concise (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ); /* Categories of missing values to exclude. */ static enum mv_class exclude_values; -/* PERCENTILES */ - -static subc_list_double percentile_list; - -static enum pc_alg percentile_algorithm; - -static short sbc_percentile; - - int cmd_examine (struct lexer *lexer, struct dataset *ds) { @@@ -268,8 -201,6 +269,8 @@@ subc_list_double_create (&percentile_list); percentile_algorithm = PC_HAVERAGE; + ll_init (&factor_list); + if ( !parse_examine (lexer, ds, &cmd, NULL) ) { subc_list_double_destroy (&percentile_list); @@@ -295,404 -226,225 +296,404 @@@ } grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) - run_examine (&cmd, group, ds); + { + struct casereader *reader = + casereader_create_arithmetic_sequence (group, 1, 1); + + run_examine (&cmd, reader, ds); + } + ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - if ( totals ) + if ( dependent_vars ) + free (dependent_vars); + + subc_list_double_destroy (&percentile_list); + + return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; +}; + + +/* Plot the normal and detrended normal plots for RESULT. + Label the plots with LABEL */ +static void +np_plot (struct np *np, const char *label) +{ + double yfirst = 0, ylast = 0; + + double x_lower; + double x_upper; + double slack; + + /* Normal Plot */ + struct chart *np_chart; + + /* Detrended Normal Plot */ + struct chart *dnp_chart; + + /* The slope and intercept of the ideal normal probability line */ + const double slope = 1.0 / np->stddev; + const double intercept = -np->mean / np->stddev; + + if ( np->n < 1.0 ) { - free ( totals ); + msg (MW, _("Not creating plot because data set is empty.")); + return ; } - if ( dependent_vars ) - free (dependent_vars); + np_chart = chart_create (); + dnp_chart = chart_create (); + + if ( !np_chart || ! dnp_chart ) + return ; + + chart_write_title (np_chart, _("Normal Q-Q Plot of %s"), label); + chart_write_xlabel (np_chart, _("Observed Value")); + chart_write_ylabel (np_chart, _("Expected Normal")); + + chart_write_title (dnp_chart, _("Detrended Normal Q-Q Plot of %s"), + label); + chart_write_xlabel (dnp_chart, _("Observed Value")); + chart_write_ylabel (dnp_chart, _("Dev from Normal")); + + yfirst = gsl_cdf_ugaussian_Pinv (1 / (np->n + 1)); + ylast = gsl_cdf_ugaussian_Pinv (np->n / (np->n + 1)); + + /* Need to make sure that both the scatter plot and the ideal fit into the + plot */ + x_lower = MIN (np->y_min, (yfirst - intercept) / slope) ; + x_upper = MAX (np->y_max, (ylast - intercept) / slope) ; + slack = (x_upper - x_lower) * 0.05 ; + + chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5); + chart_write_xscale (dnp_chart, np->y_min, np->y_max, 5); + + chart_write_yscale (np_chart, yfirst, ylast, 5); + chart_write_yscale (dnp_chart, np->dns_min, np->dns_max, 5); { - struct factor *f = factors ; - while ( f ) + struct casereader *reader = casewriter_make_reader (np->writer); + struct ccase *c; + while ((c = casereader_read (reader)) != NULL) { - struct factor *ff = f; + chart_datum (np_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_NS)->f); + chart_datum (dnp_chart, 0, case_data_idx (c, NP_IDX_Y)->f, case_data_idx (c, NP_IDX_DNS)->f); - f = f->next; - free ( ff->fs ); - hsh_destroy ( ff->fstats ) ; - free ( ff ) ; + case_unref (c); } - factors = 0; + casereader_destroy (reader); } - subc_list_double_destroy (&percentile_list); - - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; -}; + chart_line (dnp_chart, 0, 0, np->y_min, np->y_max , CHART_DIM_X); + chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); + chart_submit (np_chart); + chart_submit (dnp_chart); +} -/* Show all the appropriate tables */ static void -output_examine (const struct dictionary *dict) +show_npplot (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - struct factor *fctr; + int v; - /* Show totals if appropriate */ - if ( ! cmd.sbc_nototal || factors == 0 ) + for (v = 0; v < n_dep_var; ++v) { - show_summary (dependent_vars, n_dependent_vars, dict, 0); + struct ll *ll; + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) + { + struct string str; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + ds_init_empty (&str); + ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + + factor_to_string (fctr, result, &str); + + np_plot ((struct np*) result->metrics[v].np, ds_cstr(&str)); + + statistic_destroy ((struct statistic *)result->metrics[v].np); + + ds_destroy (&str); + } + } +} + + +static void +show_histogram (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; - if ( cmd.sbc_statistics ) + for (v = 0; v < n_dep_var; ++v) + { + struct ll *ll; + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes (dependent_vars, n_dependent_vars, 0, cmd.st_n); + struct string str; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives (dependent_vars, n_dependent_vars, 0); + ds_init_empty (&str); + ds_put_format (&str, "%s ", var_get_name (dependent_var[v])); + factor_to_string (fctr, result, &str); + + histogram_plot ((struct histogram *) result->metrics[v].histogram, + ds_cstr (&str), + (struct moments1 *) result->metrics[v].moments); + + ds_destroy (&str); } - if ( sbc_percentile ) - show_percentiles (dependent_vars, n_dependent_vars, 0); + } +} + + + +static void +show_boxplot_groups (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) +{ + int v; + + for (v = 0; v < n_dep_var; ++v) + { + struct ll *ll; + int f = 0; + struct chart *ch = chart_create (); + double y_min = DBL_MAX; + double y_max = -DBL_MAX; - if ( cmd.sbc_plot) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) { - int v; - if ( cmd.a_plot[XMN_PLT_STEMLEAF] ) - msg (SW, _ ("%s is not currently supported."), "STEMLEAF"); + const struct extremum *max, *min; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - if ( cmd.a_plot[XMN_PLT_SPREADLEVEL] ) - msg (SW, _ ("%s is not currently supported."), "SPREADLEVEL"); + const struct ll_list *max_list = + extrema_list (result->metrics[v].maxima); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - { - for ( v = 0 ; v < n_dependent_vars; ++v ) - np_plot (&totals[v], var_to_string (dependent_vars[v])); - } + const struct ll_list *min_list = + extrema_list (result->metrics[v].minima); - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + if ( ll_is_empty (max_list)) { - if ( cmd.cmp == XMN_GROUPS ) - { - box_plot_group (0, (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); - } - else - box_plot_variables (0, - (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); + msg (MW, _("Not creating plot because data set is empty.")); + continue; } - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) - { - for ( v = 0 ; v < n_dependent_vars; ++v ) - { - struct normal_curve normal; + max = (const struct extremum *) + ll_data (ll_head(max_list), struct extremum, ll); - normal.N = totals[v].n; - normal.mean = totals[v].mean; - normal.stddev = totals[v].stddev; + min = (const struct extremum *) + ll_data (ll_head (min_list), struct extremum, ll); - histogram_plot (totals[v].histogram, - var_to_string (dependent_vars[v]), - &normal, 0); - } - } + y_max = MAX (y_max, max->value); + y_min = MIN (y_min, min->value); + } + + boxplot_draw_yscale (ch, y_max, y_min); + + if ( fctr->indep_var[0]) + chart_write_title (ch, _("Boxplot of %s vs. %s"), + var_to_string (dependent_var[v]), + var_to_string (fctr->indep_var[0]) ); + else + chart_write_title (ch, _("Boxplot of %s"), + var_to_string (dependent_var[v])); + + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); + ll = ll_next (ll)) + { + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + + struct string str; + const double box_width = (ch->data_right - ch->data_left) + / (ll_count (&fctr->result_list) * 2.0 ) ; + const double box_centre = (f++ * 2 + 1) * box_width + ch->data_left; + + ds_init_empty (&str); + factor_to_string_concise (fctr, result, &str); + + boxplot_draw_boxplot (ch, + box_centre, box_width, + (const struct box_whisker *) + result->metrics[v].box_whisker, + ds_cstr (&str)); + + ds_destroy (&str); } + chart_submit (ch); } +} - /* Show grouped statistics as appropriate */ - fctr = factors; - while ( fctr ) - { - show_summary (dependent_vars, n_dependent_vars, dict, fctr); - if ( cmd.sbc_statistics ) - { - if ( cmd.a_statistics[XMN_ST_EXTREME]) - show_extremes (dependent_vars, n_dependent_vars, fctr, cmd.st_n); +static void +show_boxplot_variables (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr + ) - if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES]) - show_descriptives (dependent_vars, n_dependent_vars, fctr); - } +{ + int v; + struct ll *ll; + const struct ll_list *result_list = &fctr->result_list; + + for (ll = ll_head (result_list); + ll != ll_null (result_list); + ll = ll_next (ll)) + + { + struct string title; + struct chart *ch = chart_create (); + double y_min = DBL_MAX; + double y_max = -DBL_MAX; - if ( sbc_percentile ) - show_percentiles (dependent_vars, n_dependent_vars, fctr); + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); + const double box_width = (ch->data_right - ch->data_left) + / (n_dep_var * 2.0 ) ; - if ( cmd.sbc_plot) + for (v = 0; v < n_dep_var; ++v) { - size_t v; + const struct ll *max_ll = + ll_head (extrema_list (result->metrics[v].maxima)); + const struct ll *min_ll = + ll_head (extrema_list (result->metrics[v].minima)); - struct factor_statistics **fs = fctr->fs ; + const struct extremum *max = + (const struct extremum *) ll_data (max_ll, struct extremum, ll); - if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) - { - if ( cmd.cmp == XMN_VARIABLES ) - box_plot_variables (fctr, - (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); - else - box_plot_group (fctr, - (const struct variable **) dependent_vars, - n_dependent_vars, cmd.v_id); - } + const struct extremum *min = + (const struct extremum *) ll_data (min_ll, struct extremum, ll); - for ( v = 0 ; v < n_dependent_vars; ++v ) - { + y_max = MAX (y_max, max->value); + y_min = MIN (y_min, min->value); + } - for ( fs = fctr->fs ; *fs ; ++fs ) - { - struct string str; - ds_init_empty (&str); - factor_to_string (fctr, *fs, dependent_vars[v], &str); - if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot (& (*fs)->m[v], ds_cstr (&str)); + boxplot_draw_yscale (ch, y_max, y_min); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) - { - struct normal_curve normal; + ds_init_empty (&title); + factor_to_string (fctr, result, &title); - normal.N = (*fs)->m[v].n; - normal.mean = (*fs)->m[v].mean; - normal.stddev = (*fs)->m[v].stddev; +#if 0 + ds_put_format (&title, "%s = ", var_get_name (fctr->indep_var[0])); + var_append_value_name (fctr->indep_var[0], result->value[0], &title); +#endif - histogram_plot ((*fs)->m[v].histogram, - ds_cstr (&str) , &normal, 0); - } + chart_write_title (ch, ds_cstr (&title)); + ds_destroy (&title); - ds_destroy (&str); + for (v = 0; v < n_dep_var; ++v) + { + struct string str; + const double box_centre = (v * 2 + 1) * box_width + ch->data_left; - } /* for ( fs .... */ + ds_init_empty (&str); + ds_init_cstr (&str, var_get_name (dependent_var[v])); - } /* for ( v = 0 ..... */ + boxplot_draw_boxplot (ch, + box_centre, box_width, + (const struct box_whisker *) result->metrics[v].box_whisker, + ds_cstr (&str)); + ds_destroy (&str); } - fctr = fctr->next; + chart_submit (ch); } - } -/* Create a hash table of percentiles and their values from the list of - percentiles */ -static struct hsh_table * -list_to_ptile_hash (const subc_list_double *l) +/* Show all the appropriate tables */ +static void - output_examine (void) ++output_examine (const struct dictionary *dict) { - int i; + struct ll *ll; + - show_summary (dependent_vars, n_dependent_vars, &level0_factor); ++ show_summary (dependent_vars, n_dependent_vars, dict, &level0_factor); - struct hsh_table *h ; + if ( cmd.a_statistics[XMN_ST_EXTREME] ) + show_extremes (dependent_vars, n_dependent_vars, &level0_factor); - h = hsh_create (subc_list_double_count (l), - (hsh_compare_func *) ptile_compare, - (hsh_hash_func *) ptile_hash, - (hsh_free_func *) free, - 0); + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + show_descriptives (dependent_vars, n_dependent_vars, &level0_factor); + if ( cmd.sbc_percentiles) + show_percentiles (dependent_vars, n_dependent_vars, &level0_factor); - for ( i = 0 ; i < subc_list_double_count (l) ; ++i ) + if ( cmd.sbc_plot) { - struct percentile *p = xmalloc (sizeof *p); - - p->p = subc_list_double_at (l,i); - p->v = SYSMIS; + if (cmd.a_plot[XMN_PLT_BOXPLOT]) + show_boxplot_groups (dependent_vars, n_dependent_vars, &level0_factor); - hsh_insert (h, p); + if (cmd.a_plot[XMN_PLT_HISTOGRAM]) + show_histogram (dependent_vars, n_dependent_vars, &level0_factor); + if (cmd.a_plot[XMN_PLT_NPPLOT]) + show_npplot (dependent_vars, n_dependent_vars, &level0_factor); } - return h; + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); ll = ll_next (ll)) + { + struct xfactor *factor = ll_data (ll, struct xfactor, ll); - show_summary (dependent_vars, n_dependent_vars, factor); ++ show_summary (dependent_vars, n_dependent_vars, dict, factor); + + if ( cmd.a_statistics[XMN_ST_EXTREME] ) + show_extremes (dependent_vars, n_dependent_vars, factor); + if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) + show_descriptives (dependent_vars, n_dependent_vars, factor); + + if ( cmd.sbc_percentiles) + show_percentiles (dependent_vars, n_dependent_vars, factor); + + if (cmd.a_plot[XMN_PLT_BOXPLOT] && + cmd.cmp == XMN_GROUPS) + show_boxplot_groups (dependent_vars, n_dependent_vars, factor); + + + if (cmd.a_plot[XMN_PLT_BOXPLOT] && + cmd.cmp == XMN_VARIABLES) + show_boxplot_variables (dependent_vars, n_dependent_vars, + factor); + + if (cmd.a_plot[XMN_PLT_HISTOGRAM]) + show_histogram (dependent_vars, n_dependent_vars, factor); + + if (cmd.a_plot[XMN_PLT_NPPLOT]) + show_npplot (dependent_vars, n_dependent_vars, factor); + } } /* Parse the PERCENTILES subcommand */ static int xmn_custom_percentiles (struct lexer *lexer, struct dataset *ds UNUSED, - struct cmd_examine *p UNUSED, void *aux UNUSED) + struct cmd_examine *p UNUSED, void *aux UNUSED) { - sbc_percentile = 1; - lex_match (lexer, '='); lex_match (lexer, '('); @@@ -744,12 -496,11 +745,12 @@@ /* TOTAL and NOTOTAL are simple, mutually exclusive flags */ static int -xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, struct cmd_examine *p, void *aux UNUSED) +xmn_custom_total (struct lexer *lexer UNUSED, struct dataset *ds UNUSED, + struct cmd_examine *p, void *aux UNUSED) { if ( p->sbc_nototal ) { - msg (SE, _ ("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); return 0; } @@@ -762,7 -513,7 +763,7 @@@ xmn_custom_nototal (struct lexer *lexe { if ( p->sbc_total ) { - msg (SE, _ ("%s and %s are mutually exclusive"),"TOTAL","NOTOTAL"); + msg (SE, _("%s and %s are mutually exclusive"), "TOTAL", "NOTOTAL"); return 0; } @@@ -774,21 -525,19 +775,21 @@@ /* Parser for the variables sub command Returns 1 on success */ static int -xmn_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_examine *cmd, void *aux UNUSED) +xmn_custom_variables (struct lexer *lexer, struct dataset *ds, + struct cmd_examine *cmd, + void *aux UNUSED) { const struct dictionary *dict = dataset_dict (ds); lex_match (lexer, '='); if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) + && lex_token (lexer) != T_ALL) { return 2; } if (!parse_variables_const (lexer, dict, &dependent_vars, &n_dependent_vars, - PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) + PV_NO_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH) ) { free (dependent_vars); return 0; @@@ -796,15 -545,16 +797,15 @@@ assert (n_dependent_vars); - totals = xnmalloc (n_dependent_vars, sizeof *totals); if ( lex_match (lexer, T_BY)) { int success ; success = examine_parse_independent_vars (lexer, dict, cmd); - if ( success != 1 ) { - free (dependent_vars); - free (totals) ; - } + if ( success != 1 ) + { + free (dependent_vars); + } return success; } @@@ -815,44 -565,47 +816,44 @@@ /* Parse the clause specifying the factors */ static int -examine_parse_independent_vars (struct lexer *lexer, const struct dictionary *dict, struct cmd_examine *cmd) +examine_parse_independent_vars (struct lexer *lexer, + const struct dictionary *dict, + struct cmd_examine *cmd) { int success; - struct factor *sf = xmalloc (sizeof *sf); + struct xfactor *sf = xmalloc (sizeof *sf); - if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) + ll_init (&sf->result_list); + + if ( (lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { free ( sf ) ; return 2; } - sf->indep_var[0] = parse_variable (lexer, dict); - sf->indep_var[1] = 0; + sf->indep_var[1] = NULL; if ( lex_token (lexer) == T_BY ) { - lex_match (lexer, T_BY); - if ( (lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) - && lex_token (lexer) != T_ALL) + if ( (lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + && lex_token (lexer) != T_ALL) { - free ( sf ) ; + free (sf); return 2; } sf->indep_var[1] = parse_variable (lexer, dict); + ll_push_tail (&factor_list, &sf->ll); } - - - sf->fstats = hsh_create (4, - (hsh_compare_func *) factor_statistics_compare, - (hsh_hash_func *) factor_statistics_hash, - (hsh_free_func *) factor_statistics_free, - 0); - - sf->next = factors; - factors = sf; + else + ll_push_tail (&factor_list, &sf->ll); lex_match (lexer, ','); @@@ -867,375 -620,339 +868,379 @@@ return success; } +static void +examine_group (struct cmd_examine *cmd, struct casereader *reader, int level, + const struct dictionary *dict, struct xfactor *factor) +{ + struct ccase *c; + const struct variable *wv = dict_get_weight (dict); + int v; + int n_extrema = 1; + struct factor_result *result = xzalloc (sizeof (*result)); + + result->metrics = xcalloc (n_dependent_vars, sizeof (*result->metrics)); + if ( cmd->a_statistics[XMN_ST_EXTREME] ) + n_extrema = cmd->st_n; -static void populate_percentiles (struct tab_table *tbl, int col, int row, - const struct metrics *m); + c = casereader_peek (reader, 0); + if (c != NULL) + { + if ( level > 0) + { + result->value[0] = + value_dup (case_data (c, factor->indep_var[0]), + var_get_width (factor->indep_var[0])); + + if ( level > 1) + result->value[1] = + value_dup (case_data (c, factor->indep_var[1]), + var_get_width (factor->indep_var[1])); + } + case_unref (c); + } -static void populate_descriptives (struct tab_table *t, int col, int row, - const struct variable *, - const struct metrics *fs); + for (v = 0; v < n_dependent_vars; ++v) + { + struct casewriter *writer; + struct casereader *input = casereader_clone (reader); + + result->metrics[v].moments = moments1_create (MOMENT_KURTOSIS); + result->metrics[v].minima = extrema_create (n_extrema, EXTREME_MINIMA); + result->metrics[v].maxima = extrema_create (n_extrema, EXTREME_MAXIMA); + result->metrics[v].cmin = DBL_MAX; + + if (cmd->a_statistics[XMN_ST_DESCRIPTIVES] || + cmd->a_plot[XMN_PLT_BOXPLOT] || + cmd->a_plot[XMN_PLT_NPPLOT] || + cmd->sbc_percentiles) + { + /* In this case, we need to sort the data, so we create a sorting + casewriter */ + struct subcase up_ordering; + subcase_init_var (&up_ordering, dependent_vars[v], SC_ASCEND); + writer = sort_create_writer (&up_ordering, + casereader_get_value_cnt (reader)); + subcase_destroy (&up_ordering); + } + else + { + /* but in this case, sorting is unnecessary, so an ordinary + casewriter is sufficient */ + writer = + autopaging_writer_create (casereader_get_value_cnt (reader)); + } -static void populate_extremes (struct tab_table *t, int col, int row, int n, - const struct variable *var, - const struct metrics *m); -static void populate_summary (struct tab_table *t, int col, int row, - const struct dictionary *dict, - const struct metrics *m); + /* Sort or just iterate, whilst calculating moments etc */ + while ((c = casereader_read (input)) != NULL) + { + const casenumber loc = + case_data_idx (c, casereader_get_value_cnt (reader) - 1)->f; + const double weight = wv ? case_data (c, wv)->f : 1.0; + const union value *value = case_data (c, dependent_vars[v]); + if (weight != SYSMIS) + minimize (&result->metrics[v].cmin, weight); + moments1_add (result->metrics[v].moments, + value->f, + weight); -/* Perform calculations for the sub factors */ -void -factor_calc (const struct ccase *c, int case_no, double weight, - bool case_missing) -{ - size_t v; - struct factor *fctr = factors; + result->metrics[v].n += weight; - while ( fctr) - { - struct factor_statistics **foo ; - union value *indep_vals[2] ; + if ( ! var_is_value_missing (dependent_vars[v], value, MV_ANY) ) + result->metrics[v].n_valid += weight; - indep_vals[0] = value_dup ( - case_data (c, fctr->indep_var[0]), - var_get_width (fctr->indep_var[0]) - ); + extrema_add (result->metrics[v].maxima, + value->f, + weight, + loc); - if ( fctr->indep_var[1] ) - indep_vals[1] = value_dup ( - case_data (c, fctr->indep_var[1]), - var_get_width (fctr->indep_var[1]) - ); - else - { - const union value sm = {SYSMIS}; - indep_vals[1] = value_dup (&sm, 0); + extrema_add (result->metrics[v].minima, + value->f, + weight, + loc); + + casewriter_write (writer, c); } + casereader_destroy (input); + result->metrics[v].up_reader = casewriter_make_reader (writer); + } - assert (fctr->fstats); + /* If percentiles or descriptives have been requested, then a + second pass through the data (which has now been sorted) + is necessary */ + if ( cmd->a_statistics[XMN_ST_DESCRIPTIVES] || + cmd->a_plot[XMN_PLT_BOXPLOT] || + cmd->a_plot[XMN_PLT_NPPLOT] || + cmd->sbc_percentiles) + { + for (v = 0; v < n_dependent_vars; ++v) + { + int i; + int n_os; + struct order_stats **os ; + struct factor_metrics *metric = &result->metrics[v]; - foo = ( struct factor_statistics ** ) - hsh_probe (fctr->fstats, (void *) &indep_vals); + metric->n_ptiles = percentile_list.n_data; - if ( !*foo ) - { + metric->ptl = xcalloc (metric->n_ptiles, + sizeof (struct percentile *)); - *foo = create_factor_statistics (n_dependent_vars, - indep_vals[0], - indep_vals[1]); + metric->quartiles = xcalloc (3, sizeof (*metric->quartiles)); - for ( v = 0 ; v < n_dependent_vars ; ++v ) + for (i = 0 ; i < metric->n_ptiles; ++i) { - metrics_precalc ( & (*foo)->m[v] ); + metric->ptl[i] = (struct percentile *) + percentile_create (percentile_list.data[i] / 100.0, metric->n_valid); + + if ( percentile_list.data[i] == 25) + metric->quartiles[0] = metric->ptl[i]; + else if ( percentile_list.data[i] == 50) + metric->quartiles[1] = metric->ptl[i]; + else if ( percentile_list.data[i] == 75) + metric->quartiles[2] = metric->ptl[i]; } - } - else - { - free (indep_vals[0]); - free (indep_vals[1]); - } + metric->tukey_hinges = tukey_hinges_create (metric->n, metric->cmin); + metric->trimmed_mean = trimmed_mean_create (metric->n, 0.05); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - union value *val = value_dup ( - case_data (c, var), - var_get_width (var) - ); + n_os = metric->n_ptiles + 2; - if (case_missing || var_is_value_missing (var, val, exclude_values)) + if ( cmd->a_plot[XMN_PLT_NPPLOT] ) { - free (val); - val = NULL; + metric->np = np_create (metric->moments); + n_os ++; } - metrics_calc ( & (*foo)->m[v], val, weight, case_no); + os = xcalloc (sizeof (struct order_stats *), n_os); - free (val); - } + for (i = 0 ; i < metric->n_ptiles ; ++i ) + { + os[i] = (struct order_stats *) metric->ptl[i]; + } - fctr = fctr->next; - } -} + os[i] = (struct order_stats *) metric->tukey_hinges; + os[i+1] = (struct order_stats *) metric->trimmed_mean; -static void -run_examine (struct cmd_examine *cmd, struct casereader *input, - struct dataset *ds) -{ - struct dictionary *dict = dataset_dict (ds); - casenumber case_no; - struct ccase c; - int v; - bool ok; + if (cmd->a_plot[XMN_PLT_NPPLOT]) + os[i+2] = metric->np; - struct factor *fctr; - - if (!casereader_peek (input, 0, &c)) - { - casereader_destroy (input); - return; + order_stats_accumulate (os, n_os, + casereader_clone (metric->up_reader), + wv, dependent_vars[v], MV_ANY); + free (os); + } } - output_split_file_values (ds, &c); - case_destroy (&c); - - input = casereader_create_filter_weight (input, dict, NULL, NULL); - input = casereader_create_counter (input, &case_no, 0); - /* Make sure we haven't got rubbish left over from a - previous split. */ - fctr = factors; - while (fctr) + /* FIXME: Do this in the above loop */ + if ( cmd->a_plot[XMN_PLT_HISTOGRAM] ) { - struct factor *next = fctr->next; + struct ccase *c; + struct casereader *input = casereader_clone (reader); - hsh_clear (fctr->fstats); + for (v = 0; v < n_dependent_vars; ++v) + { + const struct extremum *max, *min; + struct factor_metrics *metric = &result->metrics[v]; - fctr->fs = 0; + const struct ll_list *max_list = + extrema_list (result->metrics[v].maxima); - fctr = next; - } + const struct ll_list *min_list = + extrema_list (result->metrics[v].minima); - for ( v = 0 ; v < n_dependent_vars ; ++v ) - metrics_precalc (&totals[v]); + if ( ll_is_empty (max_list)) + { + msg (MW, _("Not creating plot because data set is empty.")); + continue; + } - for (; casereader_read (input, &c); case_destroy (&c)) - { - bool case_missing = false; - const double weight = dict_get_case_weight (dict, &c, NULL); + assert (! ll_is_empty (min_list)); - if ( cmd->miss == XMN_LISTWISE ) - { - for ( v = 0 ; v < n_dependent_vars ; ++v ) - { - const struct variable *var = dependent_vars[v]; - union value *val = value_dup ( - case_data (&c, var), - var_get_width (var) - ); + max = (const struct extremum *) + ll_data (ll_head(max_list), struct extremum, ll); - if ( var_is_value_missing (var, val, exclude_values)) - case_missing = true; + min = (const struct extremum *) + ll_data (ll_head (min_list), struct extremum, ll); - free (val); - } + metric->histogram = histogram_create (10, min->value, max->value); } - for ( v = 0 ; v < n_dependent_vars ; ++v ) + while ((c = casereader_read (input)) != NULL) { - const struct variable *var = dependent_vars[v]; - union value *val = value_dup ( - case_data (&c, var), - var_get_width (var) - ); - - if ( var_is_value_missing (var, val, exclude_values) - || case_missing ) + const double weight = wv ? case_data (c, wv)->f : 1.0; + + for (v = 0; v < n_dependent_vars; ++v) { - free (val) ; - val = NULL; + struct factor_metrics *metric = &result->metrics[v]; + if ( metric->histogram) + histogram_add ((struct histogram *) metric->histogram, + case_data (c, dependent_vars[v])->f, weight); } - - metrics_calc (&totals[v], val, weight, case_no); - - free (val); + case_unref (c); } - - factor_calc (&c, case_no, weight, case_missing); + casereader_destroy (input); } - ok = casereader_destroy (input); - for ( v = 0 ; v < n_dependent_vars ; ++v) + /* In this case, a third iteration is required */ + if (cmd->a_plot[XMN_PLT_BOXPLOT]) { - fctr = factors; - while ( fctr ) + for (v = 0; v < n_dependent_vars; ++v) { - struct hsh_iterator hi; - struct factor_statistics *fs; + struct factor_metrics *metric = &result->metrics[v]; + + metric->box_whisker = + box_whisker_create ((struct tukey_hinges *) metric->tukey_hinges, + cmd->v_id, + casereader_get_value_cnt (metric->up_reader) + - 1); + + order_stats_accumulate ((struct order_stats **) &metric->box_whisker, + 1, + casereader_clone (metric->up_reader), + wv, dependent_vars[v], MV_ANY); + } + } - for ( fs = hsh_first (fctr->fstats, &hi); - fs != 0 ; - fs = hsh_next (fctr->fstats, &hi)) - { + ll_push_tail (&factor->result_list, &result->ll); + casereader_destroy (reader); +} - fs->m[v].ptile_hash = list_to_ptile_hash (&percentile_list); - fs->m[v].ptile_alg = percentile_algorithm; - metrics_postcalc (&fs->m[v]); - } - fctr = fctr->next; - } +static void +run_examine (struct cmd_examine *cmd, struct casereader *input, + struct dataset *ds) +{ + struct ll *ll; + const struct dictionary *dict = dataset_dict (ds); + struct ccase *c; + struct casereader *level0 = casereader_clone (input); - totals[v].ptile_hash = list_to_ptile_hash (&percentile_list); - totals[v].ptile_alg = percentile_algorithm; - metrics_postcalc (&totals[v]); + c = casereader_peek (input, 0); + if (c == NULL) + { + casereader_destroy (input); + return; } + output_split_file_values (ds, c); + case_unref (c); - /* Make sure that the combination of factors are complete */ - - fctr = factors; - while ( fctr ) - { - struct hsh_iterator hi; - struct hsh_iterator hi0; - struct hsh_iterator hi1; - struct factor_statistics *fs; + ll_init (&level0_factor.result_list); - struct hsh_table *idh0 = NULL; - struct hsh_table *idh1 = NULL; - union value **val0; - union value **val1; + examine_group (cmd, level0, 0, dict, &level0_factor); - idh0 = hsh_create (4, (hsh_compare_func *) compare_ptr_values, - (hsh_hash_func *) hash_ptr_value, - 0,0); + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); + ll = ll_next (ll)) + { + struct xfactor *factor = ll_data (ll, struct xfactor, ll); - idh1 = hsh_create (4, (hsh_compare_func *) compare_ptr_values, - (hsh_hash_func *) hash_ptr_value, - 0,0); + struct casereader *group = NULL; + struct casereader *level1; + struct casegrouper *grouper1 = NULL; + level1 = casereader_clone (input); + level1 = sort_execute_1var (level1, factor->indep_var[0]); + grouper1 = casegrouper_create_vars (level1, &factor->indep_var[0], 1); - for ( fs = hsh_first (fctr->fstats, &hi); - fs != 0 ; - fs = hsh_next (fctr->fstats, &hi)) + while (casegrouper_get_next_group (grouper1, &group)) { - hsh_insert (idh0, &fs->id[0]); - hsh_insert (idh1, &fs->id[1]); - } + struct casereader *group_copy = casereader_clone (group); - /* Ensure that the factors combination is complete */ - for ( val0 = hsh_first (idh0, &hi0); - val0 != 0 ; - val0 = hsh_next (idh0, &hi0)) - { - for ( val1 = hsh_first (idh1, &hi1); - val1 != 0 ; - val1 = hsh_next (idh1, &hi1)) + if ( !factor->indep_var[1]) + examine_group (cmd, group_copy, 1, dict, factor); + else { - struct factor_statistics **ffs; - union value *key[2]; - key[0] = *val0; - key[1] = *val1; - - ffs = (struct factor_statistics **) - hsh_probe (fctr->fstats, &key ); - - if ( !*ffs ) { - size_t i; - (*ffs) = create_factor_statistics (n_dependent_vars, - key[0], key[1]); - for ( i = 0 ; i < n_dependent_vars ; ++i ) - metrics_precalc ( & (*ffs)->m[i]); - } - } - } + int n_groups = 0; + struct casereader *group2 = NULL; + struct casegrouper *grouper2 = NULL; - hsh_destroy (idh0); - hsh_destroy (idh1); + group_copy = sort_execute_1var (group_copy, + factor->indep_var[1]); - fctr->fs = (struct factor_statistics **) hsh_sort_copy (fctr->fstats); + grouper2 = casegrouper_create_vars (group_copy, + &factor->indep_var[1], 1); + + while (casegrouper_get_next_group (grouper2, &group2)) + { + examine_group (cmd, group2, 2, dict, factor); + n_groups++; + } + casegrouper_destroy (grouper2); + } - fctr = fctr->next; + casereader_destroy (group); + } + casegrouper_destroy (grouper1); } - if (ok) - output_examine (dict); + casereader_destroy (input); - output_examine (); ++ output_examine (dict); + + factor_destroy (&level0_factor); + + { + struct ll *ll; + for (ll = ll_head (&factor_list); + ll != ll_null (&factor_list); + ll = ll_next (ll)) + { + struct xfactor *f = ll_data (ll, struct xfactor, ll); + factor_destroy (f); + } + } - if ( totals ) - { - size_t i; - for ( i = 0 ; i < n_dependent_vars ; ++i ) - { - metrics_destroy (&totals[i]); - } - } } static void show_summary (const struct variable **dependent_var, int n_dep_var, + const struct dictionary *dict, - const struct factor *fctr) + const struct xfactor *fctr) { ++ const struct variable *wv = dict_get_weight (dict); ++ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; ++ static const char *subtitle[]= { - N_ ("Valid"), - N_ ("Missing"), - N_ ("Total") + N_("Valid"), + N_("Missing"), + N_("Total") }; - int i; - int heading_columns ; + int v, j; + int heading_columns = 1; int n_cols; const int heading_rows = 3; struct tab_table *tbl; int n_rows ; - int n_factors = 1; + n_rows = n_dep_var; + + assert (fctr); - if ( fctr ) + if ( fctr->indep_var[0] ) { heading_columns = 2; - n_factors = hsh_count (fctr->fstats); - n_rows = n_dep_var * n_factors ; if ( fctr->indep_var[1] ) - heading_columns = 3; - } - else - { - heading_columns = 1; - n_rows = n_dep_var; + { + heading_columns = 3; + } } + n_rows *= ll_count (&fctr->result_list); n_rows += heading_rows; n_cols = heading_columns + 6; - tbl = tab_create (n_cols,n_rows,0); + tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); @@@ -1262,12 -979,12 +1267,12 @@@ tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - tab_title (tbl, _ ("Case Processing Summary")); + tab_title (tbl, _("Case Processing Summary")); tab_joint_text (tbl, heading_columns, 0, - n_cols -1, 0, - TAB_CENTER | TAT_TITLE, - _ ("Cases")); + n_cols -1, 0, + TAB_CENTER | TAT_TITLE, + _("Cases")); /* Remove lines ... */ tab_box (tbl, @@@ -1276,28 -993,28 +1281,28 @@@ heading_columns, 0, n_cols - 1, 0); - for ( i = 0 ; i < 3 ; ++i ) + for (j = 0 ; j < 3 ; ++j) { - tab_text (tbl, heading_columns + i * 2 , 2, TAB_CENTER | TAT_TITLE, - _ ("N")); + tab_text (tbl, heading_columns + j * 2 , 2, TAB_CENTER | TAT_TITLE, + _("N")); - tab_text (tbl, heading_columns + i * 2 + 1, 2, TAB_CENTER | TAT_TITLE, - _ ("Percent")); + tab_text (tbl, heading_columns + j * 2 + 1, 2, TAB_CENTER | TAT_TITLE, + _("Percent")); - tab_joint_text (tbl, heading_columns + i*2 , 1, - heading_columns + i * 2 + 1, 1, - TAB_CENTER | TAT_TITLE, - subtitle[i]); + tab_joint_text (tbl, heading_columns + j * 2 , 1, + heading_columns + j * 2 + 1, 1, + TAB_CENTER | TAT_TITLE, + subtitle[j]); tab_box (tbl, -1, -1, TAL_0, TAL_0, - heading_columns + i * 2, 1, - heading_columns + i * 2 + 1, 1); + heading_columns + j * 2, 1, + heading_columns + j * 2 + 1, 1); } /* Titles for the independent variables */ - if ( fctr ) + if ( fctr->indep_var[0] ) { tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, var_to_string (fctr->indep_var[0])); @@@ -1309,883 -1026,1283 +1314,886 @@@ } } - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v) { - int n_factors = 1; - if ( fctr ) - n_factors = hsh_count (fctr->fstats); + int j = 0; + struct ll *ll; + union value *last_value = NULL; - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols -1 , i * n_factors + heading_rows); + if ( v > 0 ) + tab_hline (tbl, TAL_1, 0, n_cols -1 , + v * ll_count (&fctr->result_list) + + heading_rows); tab_text (tbl, - 0, i * n_factors + heading_rows, + 0, + v * ll_count (&fctr->result_list) + heading_rows, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); - if ( !fctr ) - populate_summary (tbl, heading_columns, - (i * n_factors) + heading_rows, - dict, - &totals[i]); - else + + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0 ; - const union value *prev = NULL; + double n; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - while (*fs) + if ( fctr->indep_var[0] ) { - if ( !prev || - 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) + + if ( last_value == NULL || + compare_values_short (last_value, result->value[0], + fctr->indep_var[0])) { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - - tab_text (tbl, - 1, - (i * n_factors ) + count + - heading_rows, + struct string str; + + last_value = result->value[0]; + ds_init_empty (&str); + + var_append_value_name (fctr->indep_var[0], result->value[0], + &str); + + tab_text (tbl, 1, + heading_rows + j + + v * ll_count (&fctr->result_list), TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); + ds_cstr (&str)); - ds_destroy (&vstr); + ds_destroy (&str); - if (fctr->indep_var[1] && count > 0 ) + if ( fctr->indep_var[1] && j > 0) tab_hline (tbl, TAL_1, 1, n_cols - 1, - (i * n_factors ) + count + heading_rows); + heading_rows + j + + v * ll_count (&fctr->result_list)); } - prev = (*fs)->id[0]; - if ( fctr->indep_var[1]) { - struct string vstr; - ds_init_empty (&vstr); + struct string str; + + ds_init_empty (&str); + var_append_value_name (fctr->indep_var[1], - (*fs)->id[1], &vstr); - tab_text (tbl, - 2, - (i * n_factors ) + count + - heading_rows, + result->value[1], &str); + + tab_text (tbl, 2, + heading_rows + j + + v * ll_count (&fctr->result_list), TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - ds_destroy (&vstr); + ds_cstr (&str)); + + ds_destroy (&str); } + } - populate_summary (tbl, heading_columns, - (i * n_factors) + count - + heading_rows, - dict, - & (*fs)->m[i]); - count++ ; - fs++; - } + moments1_calculate (result->metrics[v].moments, + &n, &result->metrics[v].mean, + &result->metrics[v].variance, + &result->metrics[v].skewness, + &result->metrics[v].kurtosis); + + result->metrics[v].se_mean = sqrt (result->metrics[v].variance / n) ; + + /* Total Valid */ - tab_float (tbl, heading_columns, ++ tab_double (tbl, heading_columns, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, - n, 8, 0); ++ n, wfmt); + + tab_text (tbl, heading_columns + 1, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT | TAT_PRINTF, + "%g%%", n * 100.0 / result->metrics[v].n); + + /* Total Missing */ - tab_float (tbl, heading_columns + 2, ++ tab_double (tbl, heading_columns + 2, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + result->metrics[v].n - n, - 8, 0); ++ wfmt); + + tab_text (tbl, heading_columns + 3, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT | TAT_PRINTF, + "%g%%", + (result->metrics[v].n - n) * 100.0 / result->metrics[v].n + ); + + /* Total Valid + Missing */ - tab_float (tbl, heading_columns + 4, ++ tab_double (tbl, heading_columns + 4, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_LEFT, + result->metrics[v].n, - 8, 0); ++ wfmt); + + tab_text (tbl, heading_columns + 5, + heading_rows + j + v * ll_count (&fctr->result_list), + TAB_RIGHT | TAT_PRINTF, + "%g%%", + (result->metrics[v].n) * 100.0 / result->metrics[v].n + ); + + ++j; } } - tab_submit (tbl); -} - -static void -populate_summary (struct tab_table *t, int col, int row, - const struct dictionary *dict, - const struct metrics *m) - -{ - const double total = m->n + m->n_missing ; - - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; - - tab_double (t, col + 0, row + 0, TAB_RIGHT, m->n, wfmt); - - tab_double (t, col + 2, row + 0, TAB_RIGHT, m->n_missing, wfmt); - - tab_double (t, col + 4, row + 0, TAB_RIGHT, total, wfmt); - - - if ( total > 0 ) { - tab_text (t, col + 1, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * m->n / total ); - - tab_text (t, col + 3, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * m->n_missing / total ); - - /* This seems a bit pointless !!! */ - tab_text (t, col + 5, row + 0, TAB_RIGHT | TAT_PRINTF, "%2.0f%%", - 100.0 * total / total ); - } + tab_submit (tbl); } - +#define DESCRIPTIVE_ROWS 13 static void -show_extremes (const struct variable **dependent_var, int n_dep_var, - const struct factor *fctr, - int n_extremities) +show_descriptives (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - int i; - int heading_columns ; + int v; + int heading_columns = 3; int n_cols; const int heading_rows = 1; struct tab_table *tbl; - - - int n_factors = 1; int n_rows ; + n_rows = n_dep_var; - if ( fctr ) - { - heading_columns = 2; - n_factors = hsh_count (fctr->fstats); + assert (fctr); - n_rows = n_dep_var * 2 * n_extremities * n_factors; + if ( fctr->indep_var[0] ) + { + heading_columns = 4; if ( fctr->indep_var[1] ) - heading_columns = 3; - } - else - { - heading_columns = 1; - n_rows = n_dep_var * 2 * n_extremities; + { + heading_columns = 5; + } } + n_rows *= ll_count (&fctr->result_list) * DESCRIPTIVE_ROWS; n_rows += heading_rows; - heading_columns += 2; n_cols = heading_columns + 2; - tbl = tab_create (n_cols,n_rows,0); + tbl = tab_create (n_cols, n_rows, 0); tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - /* Outline the box, No internal lines*/ + /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - tab_title (tbl, _ ("Extreme Values")); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); - tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); + tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); - if ( fctr ) - { - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[0])); - if ( fctr->indep_var[1] ) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[1])); - } + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0])); - tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _ ("Value")); - tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _ ("Case Number")); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1])); - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { + struct ll *ll; + int i = 0; - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols -1 , - i * 2 * n_extremities * n_factors + heading_rows); + const int row_var_start = + v * DESCRIPTIVE_ROWS * ll_count(&fctr->result_list); - tab_text (tbl, 0, - i * 2 * n_extremities * n_factors + heading_rows, + tab_text (tbl, + 0, + heading_rows + row_var_start, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); - - if ( !fctr ) - populate_extremes (tbl, heading_columns - 2, - i * 2 * n_extremities * n_factors + heading_rows, - n_extremities, - dependent_var[i], - &totals[i]); - else + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - struct factor_statistics **fs = fctr->fs; - int count = 0 ; - const union value *prev = NULL; - - while (*fs) - { - const int row = heading_rows + ( 2 * n_extremities ) * - ( ( i * n_factors ) + count ); - - - if ( !prev || 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } - - prev = (*fs)->id[0]; - - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 2, n_cols - 1, row); - - if ( fctr->indep_var[1]) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); - - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - populate_extremes (tbl, heading_columns - 2, - row, n_extremities, - dependent_var[i], - & (*fs)->m[i]); + const double t = + gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0) / 2.0, + result->metrics[v].n - 1); - count++ ; - fs++; + if ( i > 0 || v > 0 ) + { + const int left_col = (i == 0) ? 0 : 1; + tab_hline (tbl, TAL_1, left_col, n_cols - 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS); } - } - } - - tab_submit (tbl); -} - - -/* Fill in the extremities table */ -static void -populate_extremes (struct tab_table *t, - int col, int row, int n, - const struct variable *var, - const struct metrics *m) -{ - int extremity; - int idx=0; - - tab_text (t, col, row, - TAB_RIGHT | TAT_TITLE , - _ ("Highest") - ); - - tab_text (t, col, row + n , - TAB_RIGHT | TAT_TITLE , - _ ("Lowest") - ); - - - tab_hline (t, TAL_1, col, col + 3, row + n ); - - for (extremity = 0; extremity < n ; ++extremity ) - { - /* Highest */ - tab_fixed (t, col + 1, row + extremity, - TAB_RIGHT, - extremity + 1, 8, 0); - - - /* Lowest */ - tab_fixed (t, col + 1, row + extremity + n, - TAB_RIGHT, - extremity + 1, 8, 0); - - } - - - /* Lowest */ - for (idx = 0, extremity = 0; extremity < n && idx < m->n_data ; ++idx ) - { - int j; - const struct weighted_value *wv = m->wvp[idx]; - struct case_node *cn = wv->case_nos; - - - for (j = 0 ; j < wv->w ; ++j ) - { - if ( extremity + j >= n ) - break ; - - tab_value (t, col + 3, row + extremity + j + n, - TAB_RIGHT, - &wv->v, var_get_print_format (var)); + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + ds_cstr (&vstr) + ); - tab_fixed (t, col + 2, row + extremity + j + n, - TAB_RIGHT, - cn->num, 10, 0); + ds_destroy (&vstr); + } - if ( cn->next ) - cn = cn->next; + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Mean")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT | TAT_PRINTF, + _("%g%% Confidence Interval for Mean"), + cmd.n_cinterval[0]); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Lower Bound")); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Upper Bound")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_LEFT | TAT_PRINTF, + _("5%% Trimmed Mean")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Median")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Variance")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Std. Deviation")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Minimum")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Maximum")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Range")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Interquartile Range")); + + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Skewness")); + + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_LEFT, + _("Kurtosis")); + + + /* Now the statistics ... */ + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean, - 8, 2); ++ NULL); + - tab_float (tbl, n_cols - 1, ++ tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].se_mean, - 8, 3); ++ NULL); + + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 1 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean - t * + result->metrics[v].se_mean, - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 2 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].mean + t * + result->metrics[v].se_mean, - 8, 3); ++ NULL); + + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 3 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + trimmed_mean_calculate ((struct trimmed_mean *) result->metrics[v].trimmed_mean), - 8, 2); ++ NULL); + + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 4 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].quartiles[1], percentile_algorithm), - 8, 2); ++ NULL); + + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 5 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].variance, - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 6 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + sqrt (result->metrics[v].variance), - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 10 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].quartiles[2], + percentile_algorithm) - + percentile_calculate (result->metrics[v].quartiles[0], + percentile_algorithm), - 8, 2); ++ NULL); + + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].skewness, - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + result->metrics[v].kurtosis, - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 1, ++ tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + 11 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + calc_seskew (result->metrics[v].n), - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 1, ++ tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + 12 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + calc_sekurt (result->metrics[v].n), - 8, 3); ++ NULL); + + { + struct extremum *minimum, *maximum ; + + struct ll *max_ll = ll_head (extrema_list (result->metrics[v].maxima)); + struct ll *min_ll = ll_head (extrema_list (result->metrics[v].minima)); + + maximum = ll_data (max_ll, struct extremum, ll); + minimum = ll_data (min_ll, struct extremum, ll); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 7 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + minimum->value, - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 8 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + maximum->value, - 8, 3); ++ NULL); + - tab_float (tbl, n_cols - 2, ++ tab_double (tbl, n_cols - 2, + heading_rows + row_var_start + 9 + i * DESCRIPTIVE_ROWS, + TAB_CENTER, + maximum->value - minimum->value, - 8, 3); ++ NULL); + } } - - extremity += wv->w ; } + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - /* Highest */ - for (idx = m->n_data - 1, extremity = 0; extremity < n && idx >= 0; --idx ) - { - int j; - const struct weighted_value *wv = m->wvp[idx]; - struct case_node *cn = wv->case_nos; - - for (j = 0 ; j < wv->w ; ++j ) - { - if ( extremity + j >= n ) - break ; - - tab_value (t, col + 3, row + extremity + j, - TAB_RIGHT, - &wv->v, var_get_print_format (var)); - - tab_fixed (t, col + 2, row + extremity + j, - TAB_RIGHT, - cn->num, 10, 0); + tab_title (tbl, _("Descriptives")); - if ( cn->next ) - cn = cn->next; + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, + _("Statistic")); - } + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + _("Std. Error")); - extremity += wv->w ; - } + tab_submit (tbl); } -/* Show the descriptives table */ -void -show_descriptives (const struct variable **dependent_var, - int n_dep_var, - struct factor *fctr) + +static void +show_extremes (const struct variable **dependent_var, + int n_dep_var, + const struct xfactor *fctr) { - int i; - int heading_columns ; + int v; + int heading_columns = 3; int n_cols; - const int n_stat_rows = 13; - const int heading_rows = 1; - struct tab_table *tbl; - int n_factors = 1; int n_rows ; + n_rows = n_dep_var; - if ( fctr ) + assert (fctr); + + if ( fctr->indep_var[0] ) { heading_columns = 4; - n_factors = hsh_count (fctr->fstats); - - n_rows = n_dep_var * n_stat_rows * n_factors; if ( fctr->indep_var[1] ) - heading_columns = 5; - } - else - { - heading_columns = 3; - n_rows = n_dep_var * n_stat_rows; + { + heading_columns = 5; + } } + n_rows *= ll_count (&fctr->result_list) * cmd.st_n * 2; n_rows += heading_rows; n_cols = heading_columns + 2; - tbl = tab_create (n_cols, n_rows, 0); - - tab_headers (tbl, heading_columns + 1, 0, heading_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - /* Outline the box and have no internal lines*/ + /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); - tab_vline (tbl, TAL_1, heading_columns, 0, n_rows - 1); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows - 1); + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows - 1); - tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _ ("Statistic")); - tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Error")); + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 0, TAT_TITLE, var_to_string (fctr->indep_var[0])); - tab_title (tbl, _ ("Descriptives")); + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 0, TAT_TITLE, var_to_string (fctr->indep_var[1])); - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { - const int row = heading_rows + i * n_stat_rows * n_factors ; - - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols - 1, row ); + struct ll *ll; + int i = 0; + const int row_var_start = v * cmd.st_n * 2 * ll_count(&fctr->result_list); - tab_text (tbl, 0, - i * n_stat_rows * n_factors + heading_rows, + tab_text (tbl, + 0, + heading_rows + row_var_start, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); - - if ( fctr ) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - const union value *prev = NULL; + int e ; + struct ll *min_ll; + struct ll *max_ll; + const int row_result_start = i * cmd.st_n * 2; - struct factor_statistics **fs = fctr->fs; - int count = 0; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - tab_text (tbl, 1, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[0])); + if (i > 0 || v > 0) + tab_hline (tbl, TAL_1, 1, n_cols - 1, + heading_rows + row_var_start + row_result_start); + tab_hline (tbl, TAL_1, heading_columns - 2, n_cols - 1, + heading_rows + row_var_start + row_result_start + cmd.st_n); - if ( fctr->indep_var[1]) - tab_text (tbl, 2, heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[1])); - - while ( *fs ) + for ( e = 1; e <= cmd.st_n; ++e ) { - const int row = heading_rows + n_stat_rows * - ( ( i * n_factors ) + count ); - - - if ( !prev || 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); - - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); - } + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + e - 1, + TAB_RIGHT | TAT_PRINTF, + _("%d"), e); + + tab_text (tbl, n_cols - 3, + heading_rows + row_var_start + row_result_start + cmd.st_n + e - 1, + TAB_RIGHT | TAT_PRINTF, + _("%d"), e); + } - prev = (*fs)->id[0]; - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 2, n_cols - 1, row); + min_ll = ll_head (extrema_list (result->metrics[v].minima)); + for (e = 0; e < cmd.st_n;) + { + struct extremum *minimum = ll_data (min_ll, struct extremum, ll); + double weight = minimum->weight; - if ( fctr->indep_var[1]) + while (weight-- > 0 && e < cmd.st_n) { - tab_float (tbl, n_cols - 1, - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); - - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); - - ds_destroy (&vstr); ++ tab_double (tbl, n_cols - 1, + heading_rows + row_var_start + row_result_start + cmd.st_n + e, + TAB_RIGHT, + minimum->value, - 8, 2); ++ NULL); + + - tab_float (tbl, n_cols - 2, - heading_rows + row_var_start + row_result_start + cmd.st_n + e, ++ tab_fixed (tbl, n_cols - 2, ++ heading_rows + row_var_start + ++ row_result_start + cmd.st_n + e, + TAB_RIGHT, + minimum->location, - 8, 0); ++ 10, 0); + ++e; } - populate_descriptives (tbl, heading_columns - 2, - row, - dependent_var[i], - & (*fs)->m[i]); - - count++ ; - fs++; + min_ll = ll_next (min_ll); } - } - - else - { - - populate_descriptives (tbl, heading_columns - 2, - i * n_stat_rows * n_factors + heading_rows, - dependent_var[i], - &totals[i]); - } - } - - tab_submit (tbl); -} - -/* Fill in the descriptives data */ -static void -populate_descriptives (struct tab_table *tbl, int col, int row, - const struct variable *var, - const struct metrics *m) -{ - const double t = gsl_cdf_tdist_Qinv ((1 - cmd.n_cinterval[0] / 100.0)/2.0, - m->n -1); - - tab_text (tbl, col, - row, - TAB_LEFT | TAT_TITLE, - _ ("Mean")); - - tab_double (tbl, col + 2, - row, - TAB_CENTER, - m->mean, - NULL); - - tab_double (tbl, col + 3, - row, - TAB_CENTER, - m->se_mean, - NULL); - - - tab_text (tbl, col, - row + 1, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("%g%% Confidence Interval for Mean"), cmd.n_cinterval[0]); - - - tab_text (tbl, col + 1, - row + 1, - TAB_LEFT | TAT_TITLE, - _ ("Lower Bound")); - - tab_double (tbl, col + 2, - row + 1, - TAB_CENTER, - m->mean - t * m->se_mean, - NULL); - - tab_text (tbl, col + 1, - row + 2, - TAB_LEFT | TAT_TITLE, - _ ("Upper Bound")); - - - tab_double (tbl, col + 2, - row + 2, - TAB_CENTER, - m->mean + t * m->se_mean, - NULL); - - tab_text (tbl, col, - row + 3, - TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("5%% Trimmed Mean")); - - tab_double (tbl, col + 2, - row + 3, - TAB_CENTER, - m->trimmed_mean, - NULL); - - tab_text (tbl, col, - row + 4, - TAB_LEFT | TAT_TITLE, - _ ("Median")); - - { - struct percentile *p; - double d = 50; - - p = hsh_find (m->ptile_hash, &d); - - assert (p); - - - tab_double (tbl, col + 2, - row + 4, - TAB_CENTER, - p->v, - NULL); - } - - - tab_text (tbl, col, - row + 5, - TAB_LEFT | TAT_TITLE, - _ ("Variance")); - - tab_double (tbl, col + 2, - row + 5, - TAB_CENTER, - m->var, - NULL); - - - tab_text (tbl, col, - row + 6, - TAB_LEFT | TAT_TITLE, - _ ("Std. Deviation")); - - - tab_double (tbl, col + 2, - row + 6, - TAB_CENTER, - m->stddev, - NULL); - - - tab_text (tbl, col, - row + 7, - TAB_LEFT | TAT_TITLE, - _ ("Minimum")); - - tab_double (tbl, col + 2, - row + 7, - TAB_CENTER, - m->min, var_get_print_format (var)); - - tab_text (tbl, col, - row + 8, - TAB_LEFT | TAT_TITLE, - _ ("Maximum")); - - tab_double (tbl, col + 2, - row + 8, - TAB_CENTER, - m->max, var_get_print_format (var)); - - tab_text (tbl, col, - row + 9, - TAB_LEFT | TAT_TITLE, - _ ("Range")); - - - tab_double (tbl, col + 2, - row + 9, - TAB_CENTER, - m->max - m->min, - NULL); - - tab_text (tbl, col, - row + 10, - TAB_LEFT | TAT_TITLE, - _ ("Interquartile Range")); - - { - struct percentile *p1; - struct percentile *p2; - - double d = 75; - p1 = hsh_find (m->ptile_hash, &d); - - d = 25; - p2 = hsh_find (m->ptile_hash, &d); - - assert (p1); - assert (p2); - - tab_double (tbl, col + 2, - row + 10, - TAB_CENTER, - p1->v - p2->v, - NULL); - } - - tab_text (tbl, col, - row + 11, - TAB_LEFT | TAT_TITLE, - _ ("Skewness")); - - - tab_double (tbl, col + 2, - row + 11, - TAB_CENTER, - m->skewness, - NULL); - - /* stderr of skewness */ - tab_double (tbl, col + 3, - row + 11, - TAB_CENTER, - calc_seskew (m->n), - NULL); - - tab_text (tbl, col, - row + 12, - TAB_LEFT | TAT_TITLE, - _ ("Kurtosis")); - - - tab_double (tbl, col + 2, - row + 12, - TAB_CENTER, - m->kurtosis, - NULL); - - /* stderr of kurtosis */ - tab_double (tbl, col + 3, - row + 12, - TAB_CENTER, - calc_sekurt (m->n), - NULL); -} - - - -void -box_plot_variables (const struct factor *fctr, - const struct variable **vars, int n_vars, - const struct variable *id) -{ - - int i; - struct factor_statistics **fs ; - - if ( ! fctr ) - { - box_plot_group (fctr, vars, n_vars, id); - return; - } - - for ( fs = fctr->fs ; *fs ; ++fs ) - { - struct string str; - double y_min = DBL_MAX; - double y_max = -DBL_MAX; - struct chart *ch = chart_create (); - ds_init_empty (&str); - factor_to_string (fctr, *fs, 0, &str ); - - chart_write_title (ch, ds_cstr (&str)); - - for ( i = 0 ; i < n_vars ; ++i ) - { - y_max = MAX (y_max, (*fs)->m[i].max); - y_min = MIN (y_min, (*fs)->m[i].min); - } - - boxplot_draw_yscale (ch, y_max, y_min); - - for ( i = 0 ; i < n_vars ; ++i ) - { - - const double box_width = (ch->data_right - ch->data_left) - / (n_vars * 2.0 ) ; - - const double box_centre = ( i * 2 + 1) * box_width - + ch->data_left; - - boxplot_draw_boxplot (ch, - box_centre, box_width, - & (*fs)->m[i], - var_to_string (vars[i])); - - - } - - chart_submit (ch); - ds_destroy (&str); - } -} - - - -/* Do a box plot, grouping all factors into one plot ; - each dependent variable has its own plot. -*/ -void -box_plot_group (const struct factor *fctr, - const struct variable **vars, - int n_vars, - const struct variable *id UNUSED) -{ - - int i; - - for ( i = 0 ; i < n_vars ; ++i ) - { - struct factor_statistics **fs ; - struct chart *ch; - - ch = chart_create (); + max_ll = ll_head (extrema_list (result->metrics[v].maxima)); + for (e = 0; e < cmd.st_n;) + { + struct extremum *maximum = ll_data (max_ll, struct extremum, ll); + double weight = maximum->weight; - boxplot_draw_yscale (ch, totals[i].max, totals[i].min); + while (weight-- > 0 && e < cmd.st_n) + { - tab_float (tbl, n_cols - 1, - heading_rows + row_var_start + row_result_start + e, ++ tab_double (tbl, n_cols - 1, ++ heading_rows + row_var_start + ++ row_result_start + e, + TAB_RIGHT, + maximum->value, - 8, 2); ++ NULL); + + - tab_float (tbl, n_cols - 2, - heading_rows + row_var_start + row_result_start + e, ++ tab_fixed (tbl, n_cols - 2, ++ heading_rows + row_var_start + ++ row_result_start + e, + TAB_RIGHT, + maximum->location, - 8, 0); ++ 10, 0); + ++e; + } - if ( fctr ) - { - int n_factors = 0; - int f=0; - for ( fs = fctr->fs ; *fs ; ++fs ) - ++n_factors; + max_ll = ll_next (max_ll); + } - chart_write_title (ch, _ ("Boxplot of %s vs. %s"), - var_to_string (vars[i]), var_to_string (fctr->indep_var[0]) ); - for ( fs = fctr->fs ; *fs ; ++fs ) + if ( fctr->indep_var[0]) { - struct string str; - const double box_width = (ch->data_right - ch->data_left) - / (n_factors * 2.0 ) ; - - const double box_centre = ( f++ * 2 + 1) * box_width - + ch->data_left; - - ds_init_empty (&str); - factor_to_string_concise (fctr, *fs, &str); + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + row_result_start, + TAB_LEFT, + ds_cstr (&vstr) + ); - boxplot_draw_boxplot (ch, - box_centre, box_width, - & (*fs)->m[i], - ds_cstr (&str)); - ds_destroy (&str); + ds_destroy (&vstr); } - } - else if ( ch ) - { - const double box_width = (ch->data_right - ch->data_left) / 3.0; - const double box_centre = (ch->data_right + ch->data_left) / 2.0; - chart_write_title (ch, _ ("Boxplot")); - boxplot_draw_boxplot (ch, - box_centre, box_width, - &totals[i], - var_to_string (vars[i]) ); + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + row_result_start, + TAB_RIGHT, + _("Highest")); + tab_text (tbl, n_cols - 4, + heading_rows + row_var_start + row_result_start + cmd.st_n, + TAB_RIGHT, + _("Lowest")); } - - chart_submit (ch); } -} - -/* Plot the normal and detrended normal plots for m - Label the plots with factorname */ -void -np_plot (const struct metrics *m, const char *factorname) -{ - int i; - double yfirst=0, ylast=0; - - /* Normal Plot */ - struct chart *np_chart; - - /* Detrended Normal Plot */ - struct chart *dnp_chart; - - /* The slope and intercept of the ideal normal probability line */ - const double slope = 1.0 / m->stddev; - const double intercept = - m->mean / m->stddev; - - /* Cowardly refuse to plot an empty data set */ - if ( m->n_data == 0 ) - return ; - - np_chart = chart_create (); - dnp_chart = chart_create (); - - if ( !np_chart || ! dnp_chart ) - return ; - - chart_write_title (np_chart, _ ("Normal Q-Q Plot of %s"), factorname); - chart_write_xlabel (np_chart, _ ("Observed Value")); - chart_write_ylabel (np_chart, _ ("Expected Normal")); - - - chart_write_title (dnp_chart, _ ("Detrended Normal Q-Q Plot of %s"), - factorname); - chart_write_xlabel (dnp_chart, _ ("Observed Value")); - chart_write_ylabel (dnp_chart, _ ("Dev from Normal")); - - yfirst = gsl_cdf_ugaussian_Pinv (m->wvp[0]->rank / ( m->n + 1)); - ylast = gsl_cdf_ugaussian_Pinv (m->wvp[m->n_data-1]->rank / ( m->n + 1)); - - - { - /* Need to make sure that both the scatter plot and the ideal fit into the - plot */ - double x_lower = MIN (m->min, (yfirst - intercept) / slope) ; - double x_upper = MAX (m->max, (ylast - intercept) / slope) ; - double slack = (x_upper - x_lower) * 0.05 ; - - chart_write_xscale (np_chart, x_lower - slack, x_upper + slack, 5); - - chart_write_xscale (dnp_chart, m->min, m->max, 5); - - } - - chart_write_yscale (np_chart, yfirst, ylast, 5); - - { - /* We have to cache the detrended data, beacause we need to - find its limits before we can plot it */ - double *d_data = xnmalloc (m->n_data, sizeof *d_data); - double d_max = -DBL_MAX; - double d_min = DBL_MAX; - for ( i = 0 ; i < m->n_data; ++i ) - { - const double ns = gsl_cdf_ugaussian_Pinv (m->wvp[i]->rank / ( m->n + 1)); + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - chart_datum (np_chart, 0, m->wvp[i]->v.f, ns); - d_data[i] = (m->wvp[i]->v.f - m->mean) / m->stddev - ns; + tab_title (tbl, _("Extreme Values")); - if ( d_data[i] < d_min ) d_min = d_data[i]; - if ( d_data[i] > d_max ) d_max = d_data[i]; - } - chart_write_yscale (dnp_chart, d_min, d_max, 5); - for ( i = 0 ; i < m->n_data; ++i ) - chart_datum (dnp_chart, 0, m->wvp[i]->v.f, d_data[i]); + tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, + _("Case Number")); - free (d_data); - } - chart_line (np_chart, slope, intercept, yfirst, ylast , CHART_DIM_Y); - chart_line (dnp_chart, 0, 0, m->min, m->max , CHART_DIM_X); + tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, + _("Value")); - chart_submit (np_chart); - chart_submit (dnp_chart); + tab_submit (tbl); } +#define PERCENTILE_ROWS 2 - - -/* Show the percentiles */ -void +static void show_percentiles (const struct variable **dependent_var, - int n_dep_var, - struct factor *fctr) + int n_dep_var, + const struct xfactor *fctr) { - struct tab_table *tbl; int i; + int v; + int heading_columns = 2; + int n_cols; + const int n_percentiles = subc_list_double_count (&percentile_list); + const int heading_rows = 2; + struct tab_table *tbl; - int n_cols, n_rows; - int n_factors; - - struct hsh_table *ptiles ; - - int n_heading_columns; - const int n_heading_rows = 2; - const int n_stat_rows = 2; + int n_rows ; + n_rows = n_dep_var; - int n_ptiles ; + assert (fctr); - if ( fctr ) + if ( fctr->indep_var[0] ) { - struct factor_statistics **fs = fctr->fs ; - n_heading_columns = 3; - n_factors = hsh_count (fctr->fstats); - - ptiles = (*fs)->m[0].ptile_hash; + heading_columns = 3; if ( fctr->indep_var[1] ) - n_heading_columns = 4; - } - else - { - n_factors = 1; - n_heading_columns = 2; - - ptiles = totals[0].ptile_hash; + { + heading_columns = 4; + } } - n_ptiles = hsh_count (ptiles); - - n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors; + n_rows *= ll_count (&fctr->result_list) * PERCENTILE_ROWS; + n_rows += heading_rows; - n_cols = n_heading_columns + n_ptiles ; + n_cols = heading_columns + n_percentiles; tbl = tab_create (n_cols, n_rows, 0); - - tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); tab_dim (tbl, tab_natural_dimensions); - /* Outline the box and have no internal lines*/ + /* Outline the box */ tab_box (tbl, TAL_2, TAL_2, -1, -1, 0, 0, n_cols - 1, n_rows - 1); - tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows ); - - tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1); - - - tab_title (tbl, _ ("Percentiles")); - - - tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 ); - - tab_box (tbl, - -1, -1, - -1, TAL_1, - 0, n_heading_rows, - n_heading_columns - 1, n_rows - 1); - - - tab_box (tbl, - -1, -1, - -1, TAL_1, - n_heading_columns, n_heading_rows - 1, - n_cols - 1, n_rows - 1); - - tab_joint_text (tbl, n_heading_columns + 1, 0, - n_cols - 1 , 0, - TAB_CENTER | TAT_TITLE , - _ ("Percentiles")); - - - { - /* Put in the percentile break points as headings */ + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows ); + tab_hline (tbl, TAL_2, 1, n_cols - 1, heading_rows ); - struct percentile **p = (struct percentile **) hsh_sort (ptiles); + if ( fctr->indep_var[0]) + tab_text (tbl, 1, 1, TAT_TITLE, var_to_string (fctr->indep_var[0])); - i = 0; - while ( (*p) ) - { - tab_fixed (tbl, n_heading_columns + i++ , 1, - TAB_CENTER, - (*p)->p, - 8, 0); - p++; - } + if ( fctr->indep_var[1]) + tab_text (tbl, 2, 1, TAT_TITLE, var_to_string (fctr->indep_var[1])); - } - - for ( i = 0 ; i < n_dep_var ; ++i ) + for (v = 0 ; v < n_dep_var ; ++v ) { - const int n_stat_rows = 2; - const int row = n_heading_rows + i * n_stat_rows * n_factors ; + double hinges[3]; + struct ll *ll; + int i = 0; - if ( i > 0 ) - tab_hline (tbl, TAL_1, 0, n_cols - 1, row ); + const int row_var_start = + v * PERCENTILE_ROWS * ll_count(&fctr->result_list); - tab_text (tbl, 0, - i * n_stat_rows * n_factors + n_heading_rows, + tab_text (tbl, + 0, + heading_rows + row_var_start, TAB_LEFT | TAT_TITLE, - var_to_string (dependent_var[i]) + var_to_string (dependent_var[v]) ); - if ( fctr ) + for (ll = ll_head (&fctr->result_list); + ll != ll_null (&fctr->result_list); i++, ll = ll_next (ll)) { - const union value *prev = NULL ; - struct factor_statistics **fs = fctr->fs; - int count = 0; + int j; + const struct factor_result *result = + ll_data (ll, struct factor_result, ll); - tab_text (tbl, 1, n_heading_rows - 1, - TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[0])); + if ( i > 0 || v > 0 ) + { + const int left_col = (i == 0) ? 0 : 1; + tab_hline (tbl, TAL_1, left_col, n_cols - 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS); + } + if ( fctr->indep_var[0]) + { + struct string vstr; + ds_init_empty (&vstr); + var_append_value_name (fctr->indep_var[0], + result->value[0], &vstr); + + tab_text (tbl, 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_LEFT, + ds_cstr (&vstr) + ); - if ( fctr->indep_var[1]) - tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, - var_to_string (fctr->indep_var[1])); + ds_destroy (&vstr); + } - while ( *fs ) - { - const int row = n_heading_rows + n_stat_rows * - ( ( i * n_factors ) + count ); + tab_text (tbl, n_cols - n_percentiles - 1, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_LEFT, + ptile_alg_desc [percentile_algorithm]); - if ( !prev || 0 != compare_values (prev, (*fs)->id[0], - var_get_width (fctr->indep_var[0]))) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[0], - (*fs)->id[0], &vstr); + tab_text (tbl, n_cols - n_percentiles - 1, + heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, + TAB_LEFT, + _("Tukey's Hinges")); - if ( count > 0 ) - tab_hline (tbl, TAL_1, 1, n_cols - 1, row); - tab_text (tbl, - 1, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); + tab_vline (tbl, TAL_1, n_cols - n_percentiles -1, heading_rows, n_rows - 1); - ds_destroy (&vstr); - } + tukey_hinges_calculate ((struct tukey_hinges *) result->metrics[v].tukey_hinges, + hinges); - prev = (*fs)->id[0]; + for (j = 0; j < n_percentiles; ++j) + { + double hinge = SYSMIS; - tab_float (tbl, n_cols - n_percentiles + j, ++ tab_double (tbl, n_cols - n_percentiles + j, + heading_rows + row_var_start + i * PERCENTILE_ROWS, + TAB_CENTER, + percentile_calculate (result->metrics[v].ptl[j], + percentile_algorithm), - 8, 2 ++ NULL + ); + + if ( result->metrics[v].ptl[j]->ptile == 0.5) + hinge = hinges[1]; + else if ( result->metrics[v].ptl[j]->ptile == 0.25) + hinge = hinges[0]; + else if ( result->metrics[v].ptl[j]->ptile == 0.75) + hinge = hinges[2]; + + if ( hinge != SYSMIS) - tab_float (tbl, n_cols - n_percentiles + j, ++ tab_double (tbl, n_cols - n_percentiles + j, + heading_rows + row_var_start + 1 + i * PERCENTILE_ROWS, + TAB_CENTER, + hinge, - 8, 2 ++ NULL + ); - if (fctr->indep_var[1] && count > 0 ) - tab_hline (tbl, TAL_1, 2, n_cols - 1, row); + } + } + } - if ( fctr->indep_var[1]) - { - struct string vstr; - ds_init_empty (&vstr); - var_append_value_name (fctr->indep_var[1], (*fs)->id[1], &vstr); + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); - tab_text (tbl, 2, row, - TAB_LEFT | TAT_TITLE, - ds_cstr (&vstr) - ); + tab_title (tbl, _("Percentiles")); - ds_destroy (&vstr); - } + for (i = 0 ; i < n_percentiles; ++i ) + { + tab_text (tbl, n_cols - n_percentiles + i, 1, + TAB_CENTER | TAT_TITLE | TAT_PRINTF, + _("%g"), + subc_list_double_at (&percentile_list, i) + ); - populate_percentiles (tbl, n_heading_columns - 1, - row, - & (*fs)->m[i]); + } - count++ ; - fs++; - } + tab_joint_text (tbl, + n_cols - n_percentiles, 0, + n_cols - 1, 0, + TAB_CENTER | TAT_TITLE, + _("Percentiles")); + /* Vertical lines for the data only */ + tab_box (tbl, + -1, -1, + -1, TAL_1, + n_cols - n_percentiles, 1, + n_cols - 1, n_rows - 1); - } - else - { - populate_percentiles (tbl, n_heading_columns - 1, - i * n_stat_rows * n_factors + n_heading_rows, - &totals[i]); - } - } + tab_hline (tbl, TAL_1, n_cols - n_percentiles, n_cols - 1, 1); tab_submit (tbl); } - - static void -populate_percentiles (struct tab_table *tbl, int col, int row, - const struct metrics *m) +factor_to_string_concise (const struct xfactor *fctr, + const struct factor_result *result, + struct string *str + ) { - int i; - - struct percentile **p = (struct percentile **) hsh_sort (m->ptile_hash); - - tab_text (tbl, - col, row + 1, - TAB_LEFT | TAT_TITLE, - _ ("Tukey\'s Hinges") - ); + if (fctr->indep_var[0]) + { + var_append_value_name (fctr->indep_var[0], result->value[0], str); - tab_text (tbl, - col, row, - TAB_LEFT | TAT_TITLE, - ptile_alg_desc[m->ptile_alg] - ); + if ( fctr->indep_var[1] ) + { + ds_put_cstr (str, ","); + var_append_value_name (fctr->indep_var[1], result->value[1], str); - i = 0; - while ( (*p) ) - { - tab_double (tbl, col + i + 1 , row, - TAB_CENTER, - (*p)->v, - NULL); - - if ( (*p)->p == 25 ) - tab_double (tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[0], - NULL); - - if ( (*p)->p == 50 ) - tab_double (tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[1], - NULL); - - - if ( (*p)->p == 75 ) - tab_double (tbl, col + i + 1 , row + 1, - TAB_CENTER, - m->hinge[2], - NULL); - i++; - p++; + ds_put_cstr (str, ")"); + } } } + static void -factor_to_string (const struct factor *fctr, - const struct factor_statistics *fs, - const struct variable *var, +factor_to_string (const struct xfactor *fctr, + const struct factor_result *result, struct string *str ) { - if (var) - ds_put_format (str, "%s (",var_to_string (var) ); - - - ds_put_format (str, "%s = ", - var_to_string (fctr->indep_var[0])); + if (fctr->indep_var[0]) + { + ds_put_format (str, "(%s = ", var_get_name (fctr->indep_var[0])); - var_append_value_name (fctr->indep_var[0], fs->id[0], str); + var_append_value_name (fctr->indep_var[0], result->value[0], str); - if ( fctr->indep_var[1] ) - { - ds_put_format (str, "; %s = )", - var_to_string (fctr->indep_var[1])); + if ( fctr->indep_var[1] ) + { + ds_put_cstr (str, ","); + ds_put_format (str, "%s = ", var_get_name (fctr->indep_var[1])); - var_append_value_name (fctr->indep_var[1], fs->id[1], str); - } - else - { - if ( var ) - ds_put_cstr (str, ")"); + var_append_value_name (fctr->indep_var[1], result->value[1], str); + } + ds_put_cstr (str, ")"); } } -static void -factor_to_string_concise (const struct factor *fctr, - const struct factor_statistics *fs, - struct string *str - ) -{ - var_append_value_name (fctr->indep_var[0], fs->id[0], str); - - if ( fctr->indep_var[1] ) - { - ds_put_cstr (str, ","); - - var_append_value_name (fctr->indep_var[1],fs->id[1], str); - - ds_put_cstr (str, ")"); - } -} /* Local Variables: diff --combined src/language/stats/frequencies.q index cd370be8,a09ecc10..25866a41 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -251,12 -251,12 +251,12 @@@ static void calc_stats (const struct va static void precalc (struct casereader *, struct dataset *); static void calc (const struct ccase *, const struct dataset *); - static void postcalc (void); + static void postcalc (const struct dataset *); static void postprocess_freq_tab (const struct variable *); - static void dump_full (const struct variable *); - static void dump_condensed (const struct variable *); - static void dump_statistics (const struct variable *, int show_varname); + static void dump_full ( const struct variable *, const struct variable *); + static void dump_condensed (const struct variable *, const struct variable *); + static void dump_statistics (const struct variable *, bool show_varname, const struct variable *); static void cleanup_freq_tab (const struct variable *); static hsh_compare_func compare_value_numeric_a, compare_value_alpha_a; @@@ -268,7 -268,7 +268,7 @@@ static hsh_compare_func compare_freq_nu static void do_piechart(const struct variable *var, const struct freq_tab *frq_tab); -gsl_histogram * +struct histogram * freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var); @@@ -376,12 -376,12 +376,12 @@@ internal_cmd_frequencies (struct lexer for (; casegrouper_get_next_group (grouper, &group); casereader_destroy (group)) { - struct ccase c; + struct ccase *c; precalc (group, ds); - for (; casereader_read (group, &c); case_destroy (&c)) - calc (&c, ds); + for (; (c = casereader_read (group)) != NULL; case_unref (c)) + calc (c, ds); - postcalc (); + postcalc (ds); } ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; @@@ -535,14 -535,13 +535,14 @@@ calc (const struct ccase *c, const stru static void precalc (struct casereader *input, struct dataset *ds) { - struct ccase c; + struct ccase *c; size_t i; - if (casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c != NULL) { - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); } pool_destroy (data_pool); @@@ -560,8 -559,10 +560,10 @@@ /* Finishes up with the variables after frequencies have been calculated. Displays statistics, percentiles, ... */ static void - postcalc (void) + postcalc (const struct dataset *ds) { + const struct dictionary *dict = dataset_dict (ds); + const struct variable *wv = dict_get_weight (dict); size_t i; for (i = 0; i < n_variables; i++) @@@ -581,16 -582,16 +583,16 @@@ switch (cmd.cond) { case FRQ_CONDENSE: - dump_condensed (v); + dump_condensed (v, wv); break; case FRQ_STANDARD: - dump_full (v); + dump_full (v, wv); break; case FRQ_ONEPAGE: if (n_categories > cmd.onepage_limit) - dump_condensed (v); + dump_condensed (v, wv); else - dump_full (v); + dump_full (v, wv); break; default: NOT_REACHED (); @@@ -600,33 -601,38 +602,33 @@@ /* Statistics. */ if (n_stats) - dump_statistics (v, !dumped_freq_tab); + dump_statistics (v, !dumped_freq_tab, wv); if ( chart == GFT_HIST) { double d[frq_n_stats]; - struct normal_curve norm; - gsl_histogram *hist ; - - - norm.N = vf->tab.valid_cases; + struct histogram *hist ; calc_stats (v, d); - norm.mean = d[frq_mean]; - norm.stddev = d[frq_stddev]; - hist = freq_tab_to_hist(ft,v); + hist = freq_tab_to_hist (ft,v); - histogram_plot(hist, var_to_string(v), &norm, normal); + histogram_plot_n (hist, var_to_string(v), + vf->tab.valid_cases, + d[frq_mean], + d[frq_stddev], + normal); - gsl_histogram_free(hist); + statistic_destroy ((struct statistic *)hist); } - if ( chart == GFT_PIE) { do_piechart(v_variables[i], ft); } - - cleanup_freq_tab (v); } @@@ -1030,8 -1036,9 +1032,9 @@@ full_dim (struct tab_table *t, struct o /* Displays a full frequency table for variable V. */ static void - dump_full (const struct variable *v) + dump_full (const struct variable *v, const struct variable *wv) { + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; int n_categories; struct var_freqs *vf; struct freq_tab *ft; @@@ -1100,10 -1107,10 +1103,10 @@@ } tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); - tab_float (t, 2 + lab, r, TAB_NONE, percent, 5, 1); - tab_float (t, 3 + lab, r, TAB_NONE, valid_percent, 5, 1); - tab_float (t, 4 + lab, r, TAB_NONE, cum_total, 5, 1); + tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL); + tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL); + tab_double (t, 4 + lab, r, TAB_NONE, cum_total, NULL); r++; } for (; f < &ft->valid[n_categories]; f++) @@@ -1118,9 -1125,9 +1121,9 @@@ } tab_value (t, 0 + lab, r, TAB_NONE, f->value, &vf->print); - tab_float (t, 1 + lab, r, TAB_NONE, f->count, 8, 0); - tab_float (t, 2 + lab, r, TAB_NONE, - f->count / ft->total_cases * 100.0, 5, 1); + tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2 + lab, r, TAB_NONE, + f->count / ft->total_cases * 100.0, NULL); tab_text (t, 3 + lab, r, TAB_NONE, _("Missing")); r++; } @@@ -1132,9 -1139,9 +1135,9 @@@ tab_hline (t, TAL_2, 0, 4 + lab, r); tab_joint_text (t, 0, r, 0 + lab, r, TAB_RIGHT | TAT_TITLE, _("Total")); tab_vline (t, TAL_0, 1, r, r); - tab_float (t, 1 + lab, r, TAB_NONE, cum_freq, 8, 0); - tab_float (t, 2 + lab, r, TAB_NONE, 100.0, 5, 1); - tab_float (t, 3 + lab, r, TAB_NONE, 100.0, 5, 1); + tab_double (t, 1 + lab, r, TAB_NONE, cum_freq, wfmt); + tab_fixed (t, 2 + lab, r, TAB_NONE, 100.0, 5, 1); + tab_fixed (t, 3 + lab, r, TAB_NONE, 100.0, 5, 1); tab_title (t, "%s", var_to_string (v)); tab_submit (t); @@@ -1161,8 -1168,9 +1164,9 @@@ condensed_dim (struct tab_table *t, str /* Display condensed frequency table for variable V. */ static void - dump_condensed (const struct variable *v) + dump_condensed (const struct variable *v, const struct variable *wv) { + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; int n_categories; struct var_freqs *vf; struct freq_tab *ft; @@@ -1193,17 -1201,17 +1197,17 @@@ cum_total += f->count / ft->valid_cases * 100.0; tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); - tab_float (t, 2, r, TAB_NONE, percent, 3, 0); - tab_float (t, 3, r, TAB_NONE, cum_total, 3, 0); + tab_double (t, 1, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2, r, TAB_NONE, percent, NULL); + tab_double (t, 3, r, TAB_NONE, cum_total, NULL); r++; } for (; f < &ft->valid[n_categories]; f++) { tab_value (t, 0, r, TAB_NONE, f->value, &vf->print); - tab_float (t, 1, r, TAB_NONE, f->count, 8, 0); - tab_float (t, 2, r, TAB_NONE, - f->count / ft->total_cases * 100.0, 3, 0); + tab_double (t, 1, r, TAB_NONE, f->count, wfmt); + tab_double (t, 2, r, TAB_NONE, + f->count / ft->total_cases * 100.0, NULL); r++; } @@@ -1360,8 -1368,10 +1364,10 @@@ calc_stats (const struct variable *v, d /* Displays a table of all the statistics requested for variable V. */ static void - dump_statistics (const struct variable *v, int show_varname) + dump_statistics (const struct variable *v, bool show_varname, + const struct variable *wv) { + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; struct freq_tab *ft; double stat_value[frq_n_stats]; struct tab_table *t; @@@ -1394,7 -1404,7 +1400,7 @@@ { tab_text (t, 0, r, TAB_LEFT | TAT_TITLE, gettext (st_name[i].s10)); - tab_float (t, 2, r, TAB_NONE, stat_value[i], 11, 3); + tab_double (t, 2, r, TAB_NONE, stat_value[i], NULL); r++; } @@@ -1402,9 -1412,8 +1408,8 @@@ tab_text (t, 1, 0, TAB_LEFT | TAT_TITLE, _("Valid")); tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("Missing")); - tab_float(t, 2, 0, TAB_NONE, ft->valid_cases, 11, 0); - tab_float(t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, 11, 0); - + tab_double (t, 2, 0, TAB_NONE, ft->valid_cases, wfmt); + tab_double (t, 2, 1, TAB_NONE, ft->total_cases - ft->valid_cases, wfmt); for (i = 0; i < n_percentiles; i++, r++) { @@@ -1416,9 -1425,9 +1421,9 @@@ if (percentiles[i].p == 0.5) tab_text (t, 1, r, TAB_LEFT, _("50 (Median)")); else - tab_float (t, 1, r, TAB_LEFT, percentiles[i].p * 100, 3, 0); - tab_float (t, 2, r, TAB_NONE, percentiles[i].value, 11, 3); - + tab_fixed (t, 1, r, TAB_LEFT, percentiles[i].p * 100, 3, 0); + tab_double (t, 2, r, TAB_NONE, percentiles[i].value, + var_get_print_format (v)); } tab_columns (t, SOM_COL_DOWN, 1); @@@ -1433,14 -1442,14 +1438,14 @@@ /* Create a gsl_histogram from a freq_tab */ -gsl_histogram * -freq_tab_to_hist(const struct freq_tab *ft, const struct variable *var) +struct histogram * +freq_tab_to_hist (const struct freq_tab *ft, const struct variable *var) { int i; double x_min = DBL_MAX; double x_max = -DBL_MAX; - gsl_histogram *hist; + struct statistic *hist; const double bins = 11; struct hsh_iterator hi; @@@ -1457,15 -1466,15 +1462,15 @@@ if ( frq->value[0].f > x_max ) x_max = frq->value[0].f ; } - hist = histogram_create(bins, x_min, x_max); + hist = histogram_create (bins, x_min, x_max); for( i = 0 ; i < ft->n_valid ; ++i ) { frq = &ft->valid[i]; - gsl_histogram_accumulate(hist, frq->value[0].f, frq->count); + histogram_add ((struct histogram *)hist, frq->value[0].f, frq->count); } - return hist; + return (struct histogram *)hist; } diff --combined src/language/stats/npar-summary.c index 04c83e1a,db40dfc2..3ad3a5bb --- a/src/language/stats/npar-summary.c +++ b/src/language/stats/npar-summary.c @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 2006 Free Software Foundation, Inc. + Copyright (C) 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -15,6 -15,8 +15,8 @@@ along with this program. If not, see . */ #include + + #include #include #include #include @@@ -45,23 -47,23 +47,23 @@@ npar_summary_calc_descriptives (struct double maximum = -DBL_MAX; double var; struct moments1 *moments = moments1_create (MOMENT_VARIANCE); - struct ccase c; + struct ccase *c; const struct variable *v = *vv++; struct casereader *pass; pass = casereader_clone (input); pass = casereader_create_filter_missing (pass, &v, 1, - filter, NULL); + filter, NULL, NULL); pass = casereader_create_filter_weight (pass, dict, NULL, NULL); - while (casereader_read(pass, &c)) + while ((c = casereader_read (pass)) != NULL) { - double val = case_num (&c, v); - double w = dict_get_case_weight (dict, &c, NULL); + double val = case_num (c, v); + double w = dict_get_case_weight (dict, c, NULL); minimum = MIN (minimum, val); maximum = MAX (maximum, val); moments1_add (moments, val, w); - case_destroy (&c); + case_unref (c); } casereader_destroy (pass); @@@ -84,6 -86,11 +86,11 @@@ } + void + do_summary_box (const struct descriptives *desc, + const struct variable *const *vv, + int n_vars); + void do_summary_box (const struct descriptives *desc, @@@ -97,7 -104,6 +104,6 @@@ int columns = 1 ; struct tab_table *table ; - if ( desc ) columns += 5; if ( quartiles ) columns += 3; @@@ -152,15 -158,19 +158,19 @@@ col++; } + for ( v = 0 ; v < n_vars ; ++v ) { - tab_text (table, 0, 2 + v, TAT_NONE, var_to_string (vv[v])); + const struct variable *var = vv[v]; + const struct fmt_spec *fmt = var_get_print_format (var); + + tab_text (table, 0, 2 + v, TAT_NONE, var_to_string (var)); - tab_float (table, 1, 2 + v, TAT_NONE, desc[v].n, 8, 0); - tab_float (table, 2, 2 + v, TAT_NONE, desc[v].mean, 8, 3); - tab_float (table, 3, 2 + v, TAT_NONE, desc[v].std_dev, 8, 3); - tab_float (table, 4, 2 + v, TAT_NONE, desc[v].min, 8, 3); - tab_float (table, 5, 2 + v, TAT_NONE, desc[v].max, 8, 3); + tab_double (table, 1, 2 + v, TAT_NONE, desc[v].n, fmt); + tab_double (table, 2, 2 + v, TAT_NONE, desc[v].mean, fmt); + tab_double (table, 3, 2 + v, TAT_NONE, desc[v].std_dev, fmt); + tab_double (table, 4, 2 + v, TAT_NONE, desc[v].min, fmt); + tab_double (table, 5, 2 + v, TAT_NONE, desc[v].max, fmt); } diff --combined src/language/stats/oneway.q index be57eb24,34f2a216..40107f77 --- a/src/language/stats/oneway.q +++ b/src/language/stats/oneway.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2007 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -43,6 -43,7 +43,7 @@@ #include #include #include "sort-criteria.h" + #include #include "xalloc.h" @@@ -55,7 -56,7 +56,7 @@@ "ONEWAY" (oneway_): *^variables=custom; missing=miss:!analysis/listwise, - incl:include/!exclude; + incl:include/!exclude; +contrast= double list; +statistics[st_]=descriptives,homogeneity. */ @@@ -76,7 -77,7 +77,7 @@@ static const struct variable **vars /* A hash table containing all the distinct values of the independent variables */ -static struct hsh_table *global_group_hash ; +static struct hsh_table *global_group_hash; /* The number of distinct values of the independent variable, when all missing values are disregarded */ @@@ -88,19 -89,19 +89,19 @@@ static void run_oneway (struct cmd_onew /* Routines to show the output tables */ - static void show_anova_table (void); - static void show_descriptives (void); - static void show_homogeneity (void); + static void show_anova_table(void); + static void show_descriptives (const struct dictionary *dict); + static void show_homogeneity(void); -static void show_contrast_coeffs(short *); -static void show_contrast_tests(short *); +static void show_contrast_coeffs (short *); +static void show_contrast_tests (short *); enum stat_table_t {STAT_DESC = 1, STAT_HOMO = 2}; -static enum stat_table_t stat_tables ; +static enum stat_table_t stat_tables; - void output_oneway (void); + static void output_oneway (const struct dictionary *dict); int @@@ -111,26 -112,25 +112,26 @@@ cmd_oneway (struct lexer *lexer, struc int i; bool ok; - if ( !parse_oneway (lexer, ds, &cmd, NULL) ) + if ( !parse_oneway (lexer, ds, &cmd, NULL)) return CMD_FAILURE; /* What statistics were requested */ - if ( cmd.sbc_statistics ) + if ( cmd.sbc_statistics) { - for (i = 0 ; i < ONEWAY_ST_count ; ++i ) + for (i = 0; i < ONEWAY_ST_count; ++i) { - if ( ! cmd.a_statistics[i] ) continue; - - switch (i) { - case ONEWAY_ST_DESCRIPTIVES: - stat_tables |= STAT_DESC; - break; - case ONEWAY_ST_HOMOGENEITY: - stat_tables |= STAT_HOMO; - break; - } + if (! cmd.a_statistics[i]) continue; + + switch (i) + { + case ONEWAY_ST_DESCRIPTIVES: + stat_tables |= STAT_DESC; + break; + case ONEWAY_ST_HOMOGENEITY: + stat_tables |= STAT_HOMO; + break; + } } } @@@ -148,89 -148,92 +149,89 @@@ } - void - output_oneway (void) + static void + output_oneway (const struct dictionary *dict) { size_t i; - short *bad_contrast ; + short *bad_contrast; bad_contrast = xnmalloc (cmd.sbc_contrast, sizeof *bad_contrast); /* Check the sanity of the given contrast values */ - for (i = 0 ; i < cmd.sbc_contrast ; ++i ) + for (i = 0; i < cmd.sbc_contrast; ++i) { int j; double sum = 0; bad_contrast[i] = 0; - if ( subc_list_double_count(&cmd.dl_contrast[i]) != - ostensible_number_of_groups ) + if (subc_list_double_count (&cmd.dl_contrast[i]) != + ostensible_number_of_groups) { - msg(SW, - _("Number of contrast coefficients must equal the number of groups")); + msg (SW, + _("Number of contrast coefficients must equal the number of groups")); bad_contrast[i] = 1; continue; } - for (j=0; j < ostensible_number_of_groups ; ++j ) - sum += subc_list_double_at(&cmd.dl_contrast[i],j); + for (j = 0; j < ostensible_number_of_groups; ++j) + sum += subc_list_double_at (&cmd.dl_contrast[i], j); if ( sum != 0.0 ) - msg(SW,_("Coefficients for contrast %zu do not total zero"), i + 1); + msg (SW, _("Coefficients for contrast %zu do not total zero"), i + 1); } if ( stat_tables & STAT_DESC ) - show_descriptives (); + show_descriptives (dict); if ( stat_tables & STAT_HOMO ) - show_homogeneity(); + show_homogeneity (); - show_anova_table(); + show_anova_table (); if (cmd.sbc_contrast ) { - show_contrast_coeffs(bad_contrast); - show_contrast_tests(bad_contrast); + show_contrast_coeffs (bad_contrast); + show_contrast_tests (bad_contrast); } - - free(bad_contrast); + free (bad_contrast); /* Clean up */ - for (i = 0 ; i < n_vars ; ++i ) + for (i = 0; i < n_vars; ++i ) { struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; - hsh_destroy(group_hash); + hsh_destroy (group_hash); } - hsh_destroy(global_group_hash); - + hsh_destroy (global_group_hash); } - - /* Parser for the variables sub command */ static int oneway_custom_variables (struct lexer *lexer, - struct dataset *ds, struct cmd_oneway *cmd UNUSED, - void *aux UNUSED) + struct dataset *ds, struct cmd_oneway *cmd UNUSED, + void *aux UNUSED) { struct dictionary *dict = dataset_dict (ds); lex_match (lexer, '='); - if ((lex_token (lexer) != T_ID || dict_lookup_var (dict, lex_tokid (lexer)) == NULL) + if ((lex_token (lexer) != T_ID || + dict_lookup_var (dict, lex_tokid (lexer)) == NULL) && lex_token (lexer) != T_ALL) return 2; if (!parse_variables_const (lexer, dict, &vars, &n_vars, - PV_DUPLICATE - | PV_NUMERIC | PV_NO_SCRATCH) ) + PV_DUPLICATE + | PV_NUMERIC | PV_NO_SCRATCH) ) { free (vars); return 0; } - assert(n_vars); + assert (n_vars); if ( ! lex_match (lexer, T_BY)) return 2; @@@ -239,7 -242,7 +240,7 @@@ if ( !indep_var ) { - msg(SE,_("`%s' is not a variable name"),lex_tokid (lexer)); + msg (SE, _("`%s' is not a variable name"), lex_tokid (lexer)); return 0; } @@@ -249,7 -252,7 +250,7 @@@ /* Show the ANOVA table */ static void -show_anova_table(void) +show_anova_table (void) { size_t i; int n_cols =7; @@@ -258,7 -261,7 +259,7 @@@ struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 1, 0); tab_dim (t, tab_natural_dimensions); @@@ -280,23 -283,23 +281,23 @@@ tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); - for ( i=0 ; i < n_vars ; ++i ) + for (i = 0; i < n_vars; ++i) { struct group_statistics *totals = &group_proc_get (vars[i])->ugs; struct hsh_table *group_hash = group_proc_get (vars[i])->group_hash; struct hsh_iterator g; struct group_statistics *gs; - double ssa=0; - const char *s = var_to_string(vars[i]); + double ssa = 0; + const char *s = var_to_string (vars[i]); - for (gs = hsh_first (group_hash,&g); + for (gs = hsh_first (group_hash, &g); gs != 0; - gs = hsh_next(group_hash,&g)) + gs = hsh_next (group_hash, &g)) { - ssa += (gs->sum * gs->sum)/gs->n; + ssa += pow2 (gs->sum) / gs->n; } - ssa -= ( totals->sum * totals->sum ) / totals->n ; + ssa -= pow2 (totals->sum) / totals->n; tab_text (t, 0, i * 3 + 1, TAB_LEFT | TAT_TITLE, s); tab_text (t, 1, i * 3 + 1, TAB_LEFT | TAT_TITLE, _("Between Groups")); @@@ -304,43 -307,46 +305,43 @@@ tab_text (t, 1, i * 3 + 3, TAB_LEFT | TAT_TITLE, _("Total")); if (i > 0) - tab_hline(t, TAL_1, 0, n_cols - 1 , i * 3 + 1); + tab_hline (t, TAL_1, 0, n_cols - 1, i * 3 + 1); { struct group_proc *gp = group_proc_get (vars[i]); - const double sst = totals->ssq - ( totals->sum * totals->sum) / totals->n ; + const double sst = totals->ssq - pow2 (totals->sum) / totals->n; const double df1 = gp->n_groups - 1; - const double df2 = totals->n - gp->n_groups ; + const double df2 = totals->n - gp->n_groups; const double msa = ssa / df1; gp->mse = (sst - ssa) / df2; /* Sums of Squares */ - tab_float (t, 2, i * 3 + 1, 0, ssa, 10, 2); - tab_float (t, 2, i * 3 + 3, 0, sst, 10, 2); - tab_float (t, 2, i * 3 + 2, 0, sst - ssa, 10, 2); + tab_double (t, 2, i * 3 + 1, 0, ssa, NULL); + tab_double (t, 2, i * 3 + 3, 0, sst, NULL); + tab_double (t, 2, i * 3 + 2, 0, sst - ssa, NULL); /* Degrees of freedom */ - tab_float (t, 3, i * 3 + 1, 0, df1, 4, 0); - tab_float (t, 3, i * 3 + 2, 0, df2, 4, 0); - tab_float (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0); + tab_fixed (t, 3, i * 3 + 1, 0, df1, 4, 0); + tab_fixed (t, 3, i * 3 + 2, 0, df2, 4, 0); + tab_fixed (t, 3, i * 3 + 3, 0, totals->n - 1, 4, 0); /* Mean Squares */ - tab_float (t, 4, i * 3 + 1, TAB_RIGHT, msa, 8, 3); - tab_float (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, 8, 3); + tab_double (t, 4, i * 3 + 1, TAB_RIGHT, msa, NULL); + tab_double (t, 4, i * 3 + 2, TAB_RIGHT, gp->mse, NULL); - { - const double F = msa/gp->mse; + const double F = msa / gp->mse ; /* The F value */ - tab_float (t, 5, i * 3 + 1, 0, F, 8, 3); + tab_double (t, 5, i * 3 + 1, 0, F, NULL); /* The significance */ - tab_float (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), 8, 3); - tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1,df2), NULL); ++ tab_double (t, 6, i * 3 + 1, 0, gsl_cdf_fdist_Q (F, df1, df2), NULL); } - } - } @@@ -351,23 -357,25 +352,25 @@@ /* Show the descriptives table */ static void - show_descriptives (void) + show_descriptives (const struct dictionary *dict) { size_t v; - int n_cols =10; + int n_cols = 10; struct tab_table *t; int row; const double confidence = 0.95; const double q = (1.0 - confidence) / 2.0; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; - int n_rows = 2 ; + int n_rows = 2; - for ( v = 0 ; v < n_vars ; ++v ) + for ( v = 0; v < n_vars; ++v ) n_rows += group_proc_get (vars[v])->n_groups + 1; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 2, 0); tab_dim (t, tab_natural_dimensions); @@@ -380,7 -388,7 +383,7 @@@ n_cols - 1, n_rows - 1); /* Underline headers */ - tab_hline (t, TAL_2, 0, n_cols - 1, 2 ); + tab_hline (t, TAL_2, 0, n_cols - 1, 2); tab_vline (t, TAL_2, 2, 0, n_rows - 1); tab_text (t, 2, 1, TAB_CENTER | TAT_TITLE, _("N")); @@@ -389,10 -397,9 +392,10 @@@ tab_text (t, 5, 1, TAB_CENTER | TAT_TITLE, _("Std. Error")); - tab_vline(t, TAL_0, 7, 0, 0); - tab_hline(t, TAL_1, 6, 7, 1); - tab_joint_text (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, _("%g%% Confidence Interval for Mean"),confidence*100.0); + tab_vline (t, TAL_0, 7, 0, 0); + tab_hline (t, TAL_1, 6, 7, 1); + tab_joint_text (t, 6, 0, 7, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, + _("%g%% Confidence Interval for Mean"), confidence*100.0); tab_text (t, 6, 1, TAB_CENTER | TAT_TITLE, _("Lower Bound")); tab_text (t, 7, 1, TAB_CENTER | TAT_TITLE, _("Upper Bound")); @@@ -405,7 -412,7 +408,7 @@@ row = 2; - for ( v=0 ; v < n_vars ; ++v ) + for (v = 0; v < n_vars; ++v) { double T; double std_error; @@@ -416,14 -423,15 +419,15 @@@ struct group_statistics *totals = &gp->ugs; const char *s = var_to_string (vars[v]); + const struct fmt_spec *fmt = var_get_print_format (vars[v]); struct group_statistics *const *gs_array = - (struct group_statistics *const *) hsh_sort(gp->group_hash); + (struct group_statistics *const *) hsh_sort (gp->group_hash); int count = 0; tab_text (t, 0, row, TAB_LEFT | TAT_TITLE, s); if ( v > 0) - tab_hline(t, TAL_1, 0, n_cols - 1 , row); + tab_hline (t, TAL_1, 0, n_cols - 1, row); for (count = 0; count < hsh_count (gp->group_hash); ++count) { @@@ -441,67 -449,70 +445,69 @@@ /* Now fill in the numbers ... */ - tab_float (t, 2, row + count, 0, gs->n, 8, 0); + tab_fixed (t, 2, row + count, 0, gs->n, 8, 0); - tab_float (t, 3, row + count, 0, gs->mean, 8, 2); + tab_double (t, 3, row + count, 0, gs->mean, NULL); - tab_float (t, 4, row + count, 0, gs->std_dev, 8, 2); + tab_double (t, 4, row + count, 0, gs->std_dev, NULL); - std_error = gs->std_dev/sqrt (gs->n); - tab_float (t, 5, row + count, 0, - std_error, 8, 2); + std_error = gs->std_dev / sqrt (gs->n) ; + tab_double (t, 5, row + count, 0, + std_error, NULL); /* Now the confidence interval */ T = gsl_cdf_tdist_Qinv (q, gs->n - 1); - tab_float (t, 6, row + count, 0, - gs->mean - T * std_error, 8, 2); + tab_double (t, 6, row + count, 0, + gs->mean - T * std_error, NULL); - tab_float (t, 7, row + count, 0, - gs->mean + T * std_error, 8, 2); + tab_double (t, 7, row + count, 0, + gs->mean + T * std_error, NULL); /* Min and Max */ - tab_float (t, 8, row + count, 0, gs->minimum, 8, 2); - tab_float (t, 9, row + count, 0, gs->maximum, 8, 2); + + tab_double (t, 8, row + count, 0, gs->minimum, fmt); + tab_double (t, 9, row + count, 0, gs->maximum, fmt); } tab_text (t, 1, row + count, - TAB_LEFT | TAT_TITLE ,_("Total")); + TAB_LEFT | TAT_TITLE, _("Total")); - tab_float (t, 2, row + count, 0, totals->n, 8, 0); + tab_double (t, 2, row + count, 0, totals->n, wfmt); - tab_float (t, 3, row + count, 0, totals->mean, 8, 2); + tab_double (t, 3, row + count, 0, totals->mean, NULL); - tab_float (t, 4, row + count, 0, totals->std_dev, 8, 2); + tab_double (t, 4, row + count, 0, totals->std_dev, NULL); - std_error = totals->std_dev/sqrt (totals->n); + std_error = totals->std_dev / sqrt (totals->n) ; - tab_float (t, 5, row + count, 0, std_error, 8, 2); + tab_double (t, 5, row + count, 0, std_error, NULL); /* Now the confidence interval */ T = gsl_cdf_tdist_Qinv (q, totals->n - 1); - tab_float (t, 6, row + count, 0, - totals->mean - T * std_error, 8, 2); + tab_double (t, 6, row + count, 0, + totals->mean - T * std_error, NULL); - tab_float (t, 7, row + count, 0, - totals->mean + T * std_error, 8, 2); + tab_double (t, 7, row + count, 0, + totals->mean + T * std_error, NULL); /* Min and Max */ - tab_float (t, 8, row + count, 0, totals->minimum, 8, 2); - tab_float (t, 9, row + count, 0, totals->maximum, 8, 2); + + tab_double (t, 8, row + count, 0, totals->minimum, fmt); + tab_double (t, 9, row + count, 0, totals->maximum, fmt); row += gp->n_groups + 1; } - tab_submit (t); } /* Show the homogeneity table */ static void -show_homogeneity(void) +show_homogeneity (void) { size_t v; int n_cols = 5; @@@ -510,7 -521,7 +516,7 @@@ struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 1, 0, 1, 0); tab_dim (t, tab_natural_dimensions); @@@ -522,37 -533,38 +528,37 @@@ n_cols - 1, n_rows - 1); - tab_hline(t, TAL_2, 0, n_cols - 1, 1); - tab_vline(t, TAL_2, 1, 0, n_rows - 1); - + tab_hline (t, TAL_2, 0, n_cols - 1, 1); + tab_vline (t, TAL_2, 1, 0, n_rows - 1); - tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2")); - tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance")); + tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Levene Statistic")); + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("df1")); + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("df2")); + tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Significance")); tab_title (t, _("Test of Homogeneity of Variances")); - for ( v=0 ; v < n_vars ; ++v ) + for (v = 0; v < n_vars; ++v) { double F; const struct variable *var = vars[v]; const struct group_proc *gp = group_proc_get (vars[v]); - const char *s = var_to_string(var); + const char *s = var_to_string (var); const struct group_statistics *totals = &gp->ugs; const double df1 = gp->n_groups - 1; - const double df2 = totals->n - gp->n_groups ; + const double df2 = totals->n - gp->n_groups; tab_text (t, 0, v + 1, TAB_LEFT | TAT_TITLE, s); F = gp->levene; - tab_float (t, 1, v + 1, TAB_RIGHT, F, 8, 3); - tab_float (t, 2, v + 1, TAB_RIGHT, df1, 8, 0); - tab_float (t, 3, v + 1, TAB_RIGHT, df2, 8, 0); + tab_double (t, 1, v + 1, TAB_RIGHT, F, NULL); + tab_fixed (t, 2, v + 1, TAB_RIGHT, df1, 8, 0); + tab_fixed (t, 3, v + 1, TAB_RIGHT, df2, 8, 0); /* Now the significance */ - tab_float (t, 4, v + 1, TAB_RIGHT, gsl_cdf_fdist_Q (F, df1, df2), 8, 3); + tab_double (t, 4, v + 1, TAB_RIGHT,gsl_cdf_fdist_Q (F, df1, df2), NULL); } tab_submit (t); @@@ -566,12 -578,12 +572,12 @@@ show_contrast_coeffs (short *bad_contra int n_cols = 2 + ostensible_number_of_groups; int n_rows = 2 + cmd.sbc_contrast; union value *group_value; - int count = 0 ; - void *const *group_values ; + int count = 0; + void *const *group_values; struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 2, 0, 2, 0); tab_dim (t, tab_natural_dimensions); @@@ -583,21 -595,21 +589,21 @@@ n_cols - 1, n_rows - 1); tab_box (t, - -1,-1, + -1, -1, TAL_0, TAL_0, 2, 0, n_cols - 1, 0); tab_box (t, - -1,-1, + -1, -1, TAL_0, TAL_0, - 0,0, - 1,1); + 0, 0, + 1, 1); - tab_hline(t, TAL_1, 2, n_cols - 1, 1); - tab_hline(t, TAL_2, 0, n_cols - 1, 2); + tab_hline (t, TAL_1, 2, n_cols - 1, 1); + tab_hline (t, TAL_2, 0, n_cols - 1, 2); - tab_vline(t, TAL_2, 2, 0, n_rows - 1); + tab_vline (t, TAL_2, 2, 0, n_rows - 1); tab_title (t, _("Contrast Coefficients")); @@@ -605,11 -617,11 +611,11 @@@ tab_joint_text (t, 2, 0, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, - var_to_string(indep_var)); + var_to_string (indep_var)); - group_values = hsh_sort(global_group_hash); - for (count = 0 ; - count < hsh_count(global_group_hash) ; + group_values = hsh_sort (global_group_hash); + for (count = 0; + count < hsh_count (global_group_hash); ++count) { int i; @@@ -626,16 -638,16 +632,16 @@@ ds_destroy (&vstr); - for (i = 0 ; i < cmd.sbc_contrast ; ++i ) + for (i = 0; i < cmd.sbc_contrast; ++i ) { - tab_text(t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1); + tab_text (t, 1, i + 2, TAB_CENTER | TAT_PRINTF, "%d", i + 1); if ( bad_contrast[i] ) - tab_text(t, count + 2, i + 2, TAB_RIGHT, "?" ); + tab_text (t, count + 2, i + 2, TAB_RIGHT, "?" ); else - tab_text(t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g", - subc_list_double_at(&cmd.dl_contrast[i], count) - ); + tab_text (t, count + 2, i + 2, TAB_RIGHT | TAT_PRINTF, "%g", + subc_list_double_at (&cmd.dl_contrast[i], count) + ); } } @@@ -645,7 -657,7 +651,7 @@@ /* Show the results of the contrast tests */ static void -show_contrast_tests(short *bad_contrast) +show_contrast_tests (short *bad_contrast) { size_t v; int n_cols = 8; @@@ -653,7 -665,7 +659,7 @@@ struct tab_table *t; - t = tab_create (n_cols,n_rows,0); + t = tab_create (n_cols, n_rows, 0); tab_headers (t, 3, 0, 1, 0); tab_dim (t, tab_natural_dimensions); @@@ -665,34 -677,34 +671,34 @@@ n_cols - 1, n_rows - 1); tab_box (t, - -1,-1, + -1, -1, TAL_0, TAL_0, 0, 0, 2, 0); - tab_hline(t, TAL_2, 0, n_cols - 1, 1); - tab_vline(t, TAL_2, 3, 0, n_rows - 1); + tab_hline (t, TAL_2, 0, n_cols - 1, 1); + tab_vline (t, TAL_2, 3, 0, n_rows - 1); tab_title (t, _("Contrast Tests")); - tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast")); - tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast")); + tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("Contrast")); + tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Value of Contrast")); tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error")); tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t")); tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("df")); tab_text (t, 7, 0, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); - for ( v = 0 ; v < n_vars ; ++v ) + for (v = 0; v < n_vars; ++v) { int i; int lines_per_variable = 2 * cmd.sbc_contrast; tab_text (t, 0, (v * lines_per_variable) + 1, TAB_LEFT | TAT_TITLE, - var_to_string(vars[v])); + var_to_string (vars[v])); - for ( i = 0 ; i < cmd.sbc_contrast ; ++i ) + for (i = 0; i < cmd.sbc_contrast; ++i) { int ci; double contrast_value = 0.0; @@@ -703,18 -715,18 +709,18 @@@ void *const *group_stat_array; double T; - double std_error_contrast ; + double std_error_contrast; double df; - double sec_vneq=0.0; + double sec_vneq = 0.0; /* Note: The calculation of the degrees of freedom in the "variances not equal" case is painfull!! The following formula may help to understand it: - \frac{\left(\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2} + \frac{\left (\sum_{i=1}^k{c_i^2\frac{s_i^2}{n_i}}\right)^2} { - \sum_{i=1}^k\left( - \frac{\left(c_i^2\frac{s_i^2}{n_i}\right)^2} {n_i-1} + \sum_{i=1}^k\left ( + \frac{\left (c_i^2\frac{s_i^2}{n_i}\right)^2} {n_i-1} \right) } */ @@@ -733,130 -745,129 +739,124 @@@ } tab_text (t, 2, (v * lines_per_variable) + i + 1, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1); + TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d", i + 1); tab_text (t, 2, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d",i+1); + TAB_CENTER | TAT_TITLE | TAT_PRINTF, "%d", i + 1); if ( bad_contrast[i]) continue; - group_stat_array = hsh_sort(group_hash); + group_stat_array = hsh_sort (group_hash); - for (ci = 0 ; ci < hsh_count(group_hash) ; ++ci) + for (ci = 0; ci < hsh_count (group_hash); ++ci) { - const double coef = subc_list_double_at(&cmd.dl_contrast[i], ci); + const double coef = subc_list_double_at (&cmd.dl_contrast[i], ci); struct group_statistics *gs = group_stat_array[ci]; - const double winv = (gs->std_dev * gs->std_dev) / gs->n; + const double winv = pow2 (gs->std_dev) / gs->n; contrast_value += coef * gs->mean; - coef_msq += (coef * coef) / gs->n ; + coef_msq += (coef * coef) / gs->n; - sec_vneq += (coef * coef) * (gs->std_dev * gs->std_dev ) /gs->n ; + sec_vneq += (coef * coef) * pow2 (gs->std_dev) /gs->n; df_numerator += (coef * coef) * winv; df_denominator += pow2((coef * coef) * winv) / (gs->n - 1); } - sec_vneq = sqrt(sec_vneq); + sec_vneq = sqrt (sec_vneq); - df_numerator = pow2(df_numerator); + df_numerator = pow2 (df_numerator); - tab_float (t, 3, (v * lines_per_variable) + i + 1, - TAB_RIGHT, contrast_value, 8, 2); + tab_double (t, 3, (v * lines_per_variable) + i + 1, + TAB_RIGHT, contrast_value, NULL); - tab_float (t, 3, (v * lines_per_variable) + i + 1 + + tab_double (t, 3, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, contrast_value, 8, 2); + TAB_RIGHT, contrast_value, NULL); std_error_contrast = sqrt (grp_data->mse * coef_msq); /* Std. Error */ - tab_float (t, 4, (v * lines_per_variable) + i + 1, + tab_double (t, 4, (v * lines_per_variable) + i + 1, TAB_RIGHT, std_error_contrast, - 8, 3); + NULL); - T = fabs(contrast_value / std_error_contrast) ; + T = fabs (contrast_value / std_error_contrast); /* T Statistic */ - tab_float (t, 5, (v * lines_per_variable) + i + 1, + tab_double (t, 5, (v * lines_per_variable) + i + 1, TAB_RIGHT, T, - 8, 3); + NULL); df = grp_data->ugs.n - grp_data->n_groups; /* Degrees of Freedom */ - tab_float (t, 6, (v * lines_per_variable) + i + 1, + tab_fixed (t, 6, (v * lines_per_variable) + i + 1, TAB_RIGHT, df, 8, 0); /* Significance TWO TAILED !!*/ - tab_float (t, 7, (v * lines_per_variable) + i + 1, + tab_double (t, 7, (v * lines_per_variable) + i + 1, TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), - 8, 3); - + NULL); - /* Now for the Variances NOT Equal case */ /* Std. Error */ - tab_float (t, 4, + tab_double (t, 4, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, sec_vneq, - 8, 3); - + NULL); - T = contrast_value / sec_vneq; - tab_float (t, 5, + tab_double (t, 5, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, T, - 8, 3); - + NULL); - df = df_numerator / df_denominator; - tab_float (t, 6, + tab_double (t, 6, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, TAB_RIGHT, df, - 8, 3); + NULL); /* The Significance */ - tab_float (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, - TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T, df), - 8, 3); - - + tab_double (t, 7, (v * lines_per_variable) + i + 1 + cmd.sbc_contrast, + TAB_RIGHT, 2 * gsl_cdf_tdist_Q (T,df), + NULL); - - } if ( v > 0 ) - tab_hline(t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1); + tab_hline (t, TAL_1, 0, n_cols - 1, (v * lines_per_variable) + 1); } tab_submit (t); - } /* ONEWAY ANOVA Calculations */ -static void postcalc ( struct cmd_oneway *cmd UNUSED ); +static void postcalc (struct cmd_oneway *cmd UNUSED); -static void precalc ( struct cmd_oneway *cmd UNUSED ); +static void precalc (struct cmd_oneway *cmd UNUSED); /* Pre calculations */ static void -precalc ( struct cmd_oneway *cmd UNUSED ) +precalc (struct cmd_oneway *cmd UNUSED) { - size_t i=0; + size_t i = 0; - for(i=0; i< n_vars ; ++i) + for (i = 0; i < n_vars; ++i) { struct group_proc *gp = group_proc_get (vars[i]); struct group_statistics *totals = &gp->ugs; @@@ -865,15 -876,19 +865,15 @@@ The hash contains a group_statistics structure, and is keyed by value of the independent variable */ - gp->group_hash = - hsh_create(4, - (hsh_compare_func *) compare_group, - (hsh_hash_func *) hash_group, - (hsh_free_func *) free_group, - (void *) var_get_width (indep_var) ); + gp->group_hash = hsh_create (4, compare_group, hash_group, + (hsh_free_func *) free_group, + indep_var); - - totals->sum=0; - totals->n=0; - totals->ssq=0; - totals->sum_diff=0; - totals->maximum = - DBL_MAX; + totals->sum = 0; + totals->n = 0; + totals->ssq = 0; + totals->sum_diff = 0; + totals->maximum = -DBL_MAX; totals->minimum = DBL_MAX; } } @@@ -894,81 -909,80 +894,81 @@@ run_oneway (struct cmd_oneway *cmd struct dictionary *dict = dataset_dict (ds); enum mv_class exclude; struct casereader *reader; - struct ccase c; + struct ccase *c; - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); taint = taint_clone (casereader_get_taint (input)); - global_group_hash = hsh_create(4, - (hsh_compare_func *) compare_values, - (hsh_hash_func *) hash_value, - free_value, - (void *) var_get_width (indep_var) ); + global_group_hash = hsh_create (4, + compare_values_short, + hash_value_short, + free_value, + indep_var); - precalc(cmd); + precalc (cmd); exclude = cmd->incl != ONEWAY_INCLUDE ? MV_ANY : MV_SYSTEM; input = casereader_create_filter_missing (input, &indep_var, 1, - exclude, NULL); + exclude, NULL, NULL); if (cmd->miss == ONEWAY_LISTWISE) input = casereader_create_filter_missing (input, vars, n_vars, - exclude, NULL); + exclude, NULL, NULL); input = casereader_create_filter_weight (input, dict, NULL, NULL); reader = casereader_clone (input); - for (; casereader_read (reader, &c); case_destroy (&c)) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { size_t i; - const double weight = dict_get_case_weight (dict, &c, NULL); + const double weight = dict_get_case_weight (dict, c, NULL); - const union value *indep_val = case_data (&c, indep_var); + const union value *indep_val = case_data (c, indep_var); void **p = hsh_probe (global_group_hash, indep_val); if (*p == NULL) *p = value_dup (indep_val, var_get_width (indep_var)); - for ( i = 0 ; i < n_vars ; ++i ) + for (i = 0; i < n_vars; ++i) { const struct variable *v = vars[i]; - const union value *val = case_data (&c, v); + const union value *val = case_data (c, v); struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; struct group_statistics *gs; - gs = hsh_find(group_hash, (void *) indep_val ); + gs = hsh_find (group_hash, indep_val ); if ( ! gs ) { gs = xmalloc (sizeof *gs); gs->id = *indep_val; - gs->sum=0; - gs->n=0; - gs->ssq=0; - gs->sum_diff=0; + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; + gs->sum_diff = 0; gs->minimum = DBL_MAX; gs->maximum = -DBL_MAX; - hsh_insert ( group_hash, (void *) gs ); + hsh_insert ( group_hash, gs ); } if (!var_is_value_missing (v, val, exclude)) { struct group_statistics *totals = &gp->ugs; - totals->n+=weight; - totals->sum+=weight * val->f; - totals->ssq+=weight * val->f * val->f; + totals->n += weight; + totals->sum += weight * val->f; + totals->ssq += weight * pow2 (val->f); if ( val->f * weight < totals->minimum ) totals->minimum = val->f * weight; @@@ -976,9 -990,9 +976,9 @@@ if ( val->f * weight > totals->maximum ) totals->maximum = val->f * weight; - gs->n+=weight; - gs->sum+=weight * val->f; - gs->ssq+=weight * val->f * val->f; + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); if ( val->f * weight < gs->minimum ) gs->minimum = val->f * weight; @@@ -993,7 -1007,7 +993,7 @@@ } casereader_destroy (reader); - postcalc(cmd); + postcalc (cmd); if ( stat_tables & STAT_HOMO ) @@@ -1004,7 -1018,8 +1004,8 @@@ ostensible_number_of_groups = hsh_count (global_group_hash); if (!taint_has_tainted_successor (taint)) - output_oneway (); + output_oneway (dict); + taint_destroy (taint); } @@@ -1013,9 -1028,10 +1014,9 @@@ void postcalc ( struct cmd_oneway *cmd UNUSED ) { - size_t i=0; - + size_t i = 0; - for(i = 0; i < n_vars ; ++i) + for (i = 0; i < n_vars; ++i) { struct group_proc *gp = group_proc_get (vars[i]); struct hsh_table *group_hash = gp->group_hash; @@@ -1024,29 -1040,35 +1025,29 @@@ struct hsh_iterator g; struct group_statistics *gs; - for (gs = hsh_first (group_hash,&g); + for (gs = hsh_first (group_hash, &g); gs != 0; - gs = hsh_next(group_hash,&g)) + gs = hsh_next (group_hash, &g)) { - gs->mean=gs->sum / gs->n; - gs->s_std_dev= sqrt( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; - - gs->std_dev= sqrt( - gs->n/(gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + gs->mean = gs->sum / gs->n; + gs->s_std_dev = sqrt (gs->ssq / gs->n - pow2 (gs->mean)); - gs->se_mean = gs->std_dev / sqrt(gs->n); - gs->mean_diff= gs->sum_diff / gs->n; + gs->std_dev = sqrt ( + gs->n / (gs->n - 1) * + ( gs->ssq / gs->n - pow2 (gs->mean)) + ); + gs->se_mean = gs->std_dev / sqrt (gs->n); + gs->mean_diff = gs->sum_diff / gs->n; } - - totals->mean = totals->sum / totals->n; - totals->std_dev= sqrt( - totals->n/(totals->n-1) * - ( (totals->ssq / totals->n ) - totals->mean * totals->mean ) - ) ; - - totals->se_mean = totals->std_dev / sqrt(totals->n); + totals->std_dev = sqrt ( + totals->n / (totals->n - 1) * + (totals->ssq / totals->n - pow2 (totals->mean)) + ); + totals->se_mean = totals->std_dev / sqrt (totals->n); } } diff --combined src/language/stats/regression.q index 68db454d,a067b3af..13cc4f69 --- a/src/language/stats/regression.q +++ b/src/language/stats/regression.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005 Free Software Foundation, Inc. + Copyright (C) 2005, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -160,10 -160,10 +160,10 @@@ reg_stats_r (pspp_linreg_cache * c tab_text (t, 2, 0, TAB_CENTER | TAT_TITLE, _("R Square")); tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Adjusted R Square")); tab_text (t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Error of the Estimate")); - tab_float (t, 1, 1, TAB_RIGHT, sqrt (rsq), 10, 2); - tab_float (t, 2, 1, TAB_RIGHT, rsq, 10, 2); - tab_float (t, 3, 1, TAB_RIGHT, adjrsq, 10, 2); - tab_float (t, 4, 1, TAB_RIGHT, std_error, 10, 2); + tab_double (t, 1, 1, TAB_RIGHT, sqrt (rsq), NULL); + tab_double (t, 2, 1, TAB_RIGHT, rsq, NULL); + tab_double (t, 3, 1, TAB_RIGHT, adjrsq, NULL); + tab_double (t, 4, 1, TAB_RIGHT, std_error, NULL); tab_title (t, _("Model Summary")); tab_submit (t); } @@@ -205,14 -205,14 +205,14 @@@ reg_stats_coeff (pspp_linreg_cache * c tab_text (t, 5, 0, TAB_CENTER | TAT_TITLE, _("t")); tab_text (t, 6, 0, TAB_CENTER | TAT_TITLE, _("Significance")); tab_text (t, 1, 1, TAB_LEFT | TAT_TITLE, _("(Constant)")); - tab_float (t, 2, 1, 0, c->intercept, 10, 2); + tab_double (t, 2, 1, 0, c->intercept, NULL); std_err = sqrt (gsl_matrix_get (c->cov, 0, 0)); - tab_float (t, 3, 1, 0, std_err, 10, 2); - tab_float (t, 4, 1, 0, 0.0, 10, 2); + tab_double (t, 3, 1, 0, std_err, NULL); + tab_double (t, 4, 1, 0, 0.0, NULL); t_stat = c->intercept / std_err; - tab_float (t, 5, 1, 0, t_stat, 10, 2); + tab_double (t, 5, 1, 0, t_stat, NULL); pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), 1.0); - tab_float (t, 6, 1, 0, pval, 10, 2); + tab_double (t, 6, 1, 0, pval, NULL); for (j = 0; j < c->n_coeffs; j++) { struct string tstr; @@@ -240,32 -240,32 +240,32 @@@ /* Regression coefficients. */ - tab_float (t, 2, this_row, 0, c->coeff[j]->estimate, 10, 2); + tab_double (t, 2, this_row, 0, c->coeff[j]->estimate, NULL); /* Standard error of the coefficients. */ std_err = sqrt (gsl_matrix_get (c->cov, j + 1, j + 1)); - tab_float (t, 3, this_row, 0, std_err, 10, 2); + tab_double (t, 3, this_row, 0, std_err, NULL); /* Standardized coefficient, i.e., regression coefficient if all variables had unit variance. */ beta = pspp_coeff_get_sd (c->coeff[j]); beta *= c->coeff[j]->estimate / c->depvar_std; - tab_float (t, 4, this_row, 0, beta, 10, 2); + tab_double (t, 4, this_row, 0, beta, NULL); /* Test statistic for H0: coefficient is 0. */ t_stat = c->coeff[j]->estimate / std_err; - tab_float (t, 5, this_row, 0, t_stat, 10, 2); + tab_double (t, 5, this_row, 0, t_stat, NULL); /* P values for the test statistic above. */ pval = 2 * gsl_cdf_tdist_Q (fabs (t_stat), (double) (c->n_obs - c->n_coeffs)); - tab_float (t, 6, this_row, 0, pval, 10, 2); + tab_double (t, 6, this_row, 0, pval, NULL); ds_destroy (&tstr); } tab_title (t, _("Coefficients")); @@@ -309,9 -309,9 +309,9 @@@ reg_stats_anova (pspp_linreg_cache * c tab_text (t, 1, 3, TAB_LEFT | TAT_TITLE, _("Total")); /* Sums of Squares */ - tab_float (t, 2, 1, 0, c->ssm, 10, 2); - tab_float (t, 2, 3, 0, c->sst, 10, 2); - tab_float (t, 2, 2, 0, c->sse, 10, 2); + tab_double (t, 2, 1, 0, c->ssm, NULL); + tab_double (t, 2, 3, 0, c->sst, NULL); + tab_double (t, 2, 2, 0, c->sse, NULL); /* Degrees of freedom */ @@@ -320,12 -320,12 +320,12 @@@ tab_text (t, 3, 3, TAB_RIGHT | TAT_PRINTF, "%g", c->dft); /* Mean Squares */ - tab_float (t, 4, 1, TAB_RIGHT, msm, 8, 3); - tab_float (t, 4, 2, TAB_RIGHT, mse, 8, 3); + tab_double (t, 4, 1, TAB_RIGHT, msm, NULL); + tab_double (t, 4, 2, TAB_RIGHT, mse, NULL); - tab_float (t, 5, 1, 0, F, 8, 3); + tab_double (t, 5, 1, 0, F, NULL); - tab_float (t, 6, 1, 0, pval, 8, 3); + tab_double (t, 6, 1, 0, pval, NULL); tab_title (t, _("ANOVA")); tab_submit (t); @@@ -398,8 -398,8 +398,8 @@@ reg_stats_bcov (pspp_linreg_cache * c { col = (i <= k) ? k : i; row = (i <= k) ? i : k; - tab_float (t, k + 2, i, TAB_CENTER, - gsl_matrix_get (c->cov, row, col), 8, 3); + tab_double (t, k + 2, i, TAB_CENTER, + gsl_matrix_get (c->cov, row, col), NULL); } } tab_title (t, _("Coefficient Correlations")); @@@ -542,7 -542,7 +542,7 @@@ regression_trns_free (void *t_ Gets the predicted values. */ static int -regression_trns_pred_proc (void *t_, struct ccase *c, +regression_trns_pred_proc (void *t_, struct ccase **c, casenumber case_idx UNUSED) { size_t i; @@@ -563,12 -563,12 +563,12 @@@ n_vals = (*model->get_vars) (model, vars); vals = xnmalloc (n_vals, sizeof (*vals)); - output = case_data_rw (c, model->pred); - assert (output != NULL); + *c = case_unshare (*c); + output = case_data_rw (*c, model->pred); for (i = 0; i < n_vals; i++) { - vals[i] = case_data (c, vars[i]); + vals[i] = case_data (*c, vars[i]); } output->f = (*model->predict) ((const struct variable **) vars, vals, model, n_vals); @@@ -581,7 -581,7 +581,7 @@@ Gets the residuals. */ static int -regression_trns_resid_proc (void *t_, struct ccase *c, +regression_trns_resid_proc (void *t_, struct ccase **c, casenumber case_idx UNUSED) { size_t i; @@@ -603,15 -603,14 +603,15 @@@ n_vals = (*model->get_vars) (model, vars); vals = xnmalloc (n_vals, sizeof (*vals)); - output = case_data_rw (c, model->resid); + *c = case_unshare (*c); + output = case_data_rw (*c, model->resid); assert (output != NULL); for (i = 0; i < n_vals; i++) { - vals[i] = case_data (c, vars[i]); + vals[i] = case_data (*c, vars[i]); } - obs = case_data (c, model->depvar); + obs = case_data (*c, model->depvar); output->f = (*model->residual) ((const struct variable **) vars, vals, obs, model, n_vals); free (vals); @@@ -689,21 -688,17 +689,21 @@@ subcommand_save (struct dataset *ds, in for (lc = models; lc < models + cmd.n_dependent; lc++) { - assert (*lc != NULL); - assert ((*lc)->depvar != NULL); - if (cmd.a_save[REGRESSION_SV_RESID]) - { - reg_save_var (ds, "RES", regression_trns_resid_proc, *lc, - &(*lc)->resid, n_trns); - } - if (cmd.a_save[REGRESSION_SV_PRED]) + if (*lc != NULL) { - reg_save_var (ds, "PRED", regression_trns_pred_proc, *lc, - &(*lc)->pred, n_trns); + if ((*lc)->depvar != NULL) + { + if (cmd.a_save[REGRESSION_SV_RESID]) + { + reg_save_var (ds, "RES", regression_trns_resid_proc, *lc, + &(*lc)->resid, n_trns); + } + if (cmd.a_save[REGRESSION_SV_PRED]) + { + reg_save_var (ds, "PRED", regression_trns_pred_proc, *lc, + &(*lc)->pred, n_trns); + } + } } } } @@@ -826,7 -821,7 +826,7 @@@ prepare_categories (struct casereader * struct moments_var *mom) { int n_data; - struct ccase c; + struct ccase *c; size_t i; assert (vars != NULL); @@@ -837,7 -832,7 +837,7 @@@ cat_stored_values_create (vars[i]); n_data = 0; - for (; casereader_read (input, &c); case_destroy (&c)) + for (; (c = casereader_read (input)) != NULL; case_unref (c)) { /* The second condition ensures the program will run even if @@@ -846,7 -841,7 +846,7 @@@ */ for (i = 0; i < n_vars; i++) { - const union value *val = case_data (&c, vars[i]); + const union value *val = case_data (c, vars[i]); if (var_is_alpha (vars[i])) cat_value_update (vars[i], val); else @@@ -866,6 -861,39 +866,6 @@@ coeff_init (pspp_linreg_cache * c, stru pspp_coeff_init (c->coeff, dm); } -/* - Put the moments in the linreg cache. - */ -static void -compute_moments (pspp_linreg_cache * c, struct moments_var *mom, - struct design_matrix *dm, size_t n) -{ - size_t i; - size_t j; - double weight; - double mean; - double variance; - double skewness; - double kurtosis; - /* - Scan the variable names in the columns of the design matrix. - When we find the variable we need, insert its mean in the cache. - */ - for (i = 0; i < dm->m->size2; i++) - { - for (j = 0; j < n; j++) - { - if (design_matrix_col_to_var (dm, i) == (mom + j)->v) - { - moments1_calculate ((mom + j)->m, &weight, &mean, &variance, - &skewness, &kurtosis); - pspp_linreg_set_indep_variable_mean (c, (mom + j)->v, mean); - pspp_linreg_set_indep_variable_sd (c, (mom + j)->v, sqrt (variance)); - } - } - } -} - static bool run_regression (struct casereader *input, struct cmd_regression *cmd, struct dataset *ds, pspp_linreg_cache **models) @@@ -873,7 -901,7 +873,7 @@@ size_t i; int n_indep = 0; int k; - struct ccase c; + struct ccase *c; const struct variable **indep_vars; struct design_matrix *X; struct moments_var *mom; @@@ -883,14 -911,13 +883,14 @@@ assert (models != NULL); - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return true; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); if (!v_variables) { @@@ -922,16 -949,16 +922,16 @@@ const struct variable *dep_var; struct casereader *reader; casenumber row; - struct ccase c; + struct ccase *c; size_t n_data; /* Number of valid cases. */ dep_var = cmd->v_dependent[k]; n_indep = identify_indep_vars (indep_vars, dep_var); reader = casereader_clone (input); reader = casereader_create_filter_missing (reader, indep_vars, n_indep, - MV_ANY, NULL); + MV_ANY, NULL, NULL); reader = casereader_create_filter_missing (reader, &dep_var, 1, - MV_ANY, NULL); + MV_ANY, NULL, NULL); n_data = prepare_categories (casereader_clone (reader), indep_vars, n_indep, mom); @@@ -946,8 -973,7 +946,8 @@@ { lopts.get_indep_mean_std[i] = 1; } - models[k] = pspp_linreg_cache_alloc (X->m->size1, X->m->size2); + models[k] = pspp_linreg_cache_alloc (dep_var, (const struct variable **) indep_vars, + X->m->size1, X->m->size2); models[k]->depvar = dep_var; /* For large data sets, use QR decomposition. @@@ -961,18 -987,18 +961,18 @@@ The second pass fills the design matrix. */ reader = casereader_create_counter (reader, &row, -1); - for (; casereader_read (reader, &c); case_destroy (&c)) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { for (i = 0; i < n_indep; ++i) { const struct variable *v = indep_vars[i]; - const union value *val = case_data (&c, v); + const union value *val = case_data (c, v); if (var_is_alpha (v)) design_matrix_set_categorical (X, row, v, val); else design_matrix_set_numeric (X, row, v, val); } - gsl_vector_set (Y, row, case_num (&c, dep_var)); + gsl_vector_set (Y, row, case_num (c, dep_var)); } /* Now that we know the number of coefficients, allocate space diff --combined src/language/stats/reliability.q index 25aff211,00000000..0e7f91a0 mode 100644,000000..100644 --- a/src/language/stats/reliability.q +++ b/src/language/stats/reliability.q @@@ -1,812 -1,0 +1,824 @@@ +/* PSPP - a program for statistical analysis. + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +#include + +#include "xalloc.h" +#include "xmalloca.h" + +#include "gettext.h" +#define _(msgid) gettext (msgid) +#define N_(msgid) msgid + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* (headers) */ + +/* (specification) + reliability (rel_): + *^variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); + scale=custom; + missing=miss:!exclude/include; + model=custom; + method=covariance; + +summary[sum_]=total. +*/ +/* (declarations) */ +/* (functions) */ + + +static int rel_custom_scale (struct lexer *lexer, struct dataset *ds, + struct cmd_reliability *p, void *aux); + +static int rel_custom_model (struct lexer *, struct dataset *, + struct cmd_reliability *, void *); + +int cmd_reliability (struct lexer *lexer, struct dataset *ds); + +struct cronbach +{ + const struct variable **items; + size_t n_items; + double alpha; + double sum_of_variances; + double variance_of_sums; + int totals_idx; /* Casereader index into the totals */ + + struct moments1 **m ; /* Moments of the items */ + struct moments1 *total ; /* Moments of the totals */ +}; + +#if 0 +static void +dump_cronbach (const struct cronbach *s) +{ + int i; + printf ("N items %d\n", s->n_items); + for (i = 0 ; i < s->n_items; ++i) + { + printf ("%s\n", var_get_name (s->items[i])); + } + + printf ("Totals idx %d\n", s->totals_idx); + + printf ("scale variance %g\n", s->variance_of_sums); + printf ("alpha %g\n", s->alpha); + putchar ('\n'); +} +#endif + +enum model + { + MODEL_ALPHA, + MODEL_SPLIT + }; + + +struct reliability +{ ++ const struct dictionary *dict; + const struct variable **variables; + int n_variables; + enum mv_class exclude; + + struct cronbach *sc; + int n_sc; + + int total_start; + + struct string scale_name; + + enum model model; + int split_point; +}; + + +static double +alpha (int k, double sum_of_variances, double variance_of_sums) +{ + return k / ( k - 1.0) * ( 1 - sum_of_variances / variance_of_sums); +} + +static void reliability_summary_total (const struct reliability *rel); + +static void reliability_statistics (const struct reliability *rel); + + + +static void +run_reliability (struct casereader *group, struct dataset *ds, + struct reliability *rel); + + +int +cmd_reliability (struct lexer *lexer, struct dataset *ds) +{ + int i; + bool ok = false; + struct casegrouper *grouper; + struct casereader *group; + struct cmd_reliability cmd; + - struct reliability rel = { ++ struct reliability rel = {NULL, + NULL, 0, MV_ANY, NULL, 0, -1, + DS_EMPTY_INITIALIZER, + MODEL_ALPHA, 0}; + + cmd.v_variables = NULL; + + if ( ! parse_reliability (lexer, ds, &cmd, &rel) ) + { + goto done; + } + ++ rel.dict = dataset_dict (ds); + rel.variables = cmd.v_variables; + rel.n_variables = cmd.n_variables; + rel.exclude = MV_ANY; + + + if (NULL == rel.sc) + { + struct cronbach *c; + /* Create a default Scale */ + + rel.n_sc = 1; + rel.sc = xzalloc (sizeof (struct cronbach) * rel.n_sc); + + ds_init_cstr (&rel.scale_name, "ANY"); + + c = &rel.sc[0]; + c->n_items = cmd.n_variables; + c->items = xzalloc (sizeof (struct variable*) * c->n_items); + + for (i = 0 ; i < c->n_items ; ++i) + c->items[i] = cmd.v_variables[i]; + } + + if ( cmd.miss == REL_INCLUDE) + rel.exclude = MV_SYSTEM; + + if ( rel.model == MODEL_SPLIT) + { + int i; + const struct cronbach *s; + + rel.n_sc += 2 ; + rel.sc = xrealloc (rel.sc, sizeof (struct cronbach) * rel.n_sc); + + s = &rel.sc[0]; + + rel.sc[1].n_items = + (rel.split_point == -1) ? s->n_items / 2 : rel.split_point; + + rel.sc[2].n_items = s->n_items - rel.sc[1].n_items; + rel.sc[1].items = xzalloc (sizeof (struct variable *) + * rel.sc[1].n_items); + + rel.sc[2].items = xzalloc (sizeof (struct variable *) * + rel.sc[2].n_items); + + for (i = 0; i < rel.sc[1].n_items ; ++i) + rel.sc[1].items[i] = s->items[i]; + + while (i < s->n_items) + { + rel.sc[2].items[i - rel.sc[1].n_items] = s->items[i]; + i++; + } + } + + if (cmd.a_summary[REL_SUM_TOTAL]) + { + int i; + const int base_sc = rel.n_sc; + + rel.total_start = base_sc; + + rel.n_sc += rel.sc[0].n_items ; + rel.sc = xrealloc (rel.sc, sizeof (struct cronbach) * rel.n_sc); + + for (i = 0 ; i < rel.sc[0].n_items; ++i ) + { + int v_src; + int v_dest = 0; + struct cronbach *s = &rel.sc[i + base_sc]; + + s->n_items = rel.sc[0].n_items - 1; + s->items = xzalloc (sizeof (struct variable *) * s->n_items); + for (v_src = 0 ; v_src < rel.sc[0].n_items ; ++v_src) + { + if ( v_src != i) + s->items[v_dest++] = rel.sc[0].items[v_src]; + } + } + } + + /* Data pass. */ + grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); + while (casegrouper_get_next_group (grouper, &group)) + { + run_reliability (group, ds, &rel); + + reliability_statistics (&rel); + + if (cmd.a_summary[REL_SUM_TOTAL]) + reliability_summary_total (&rel); + } + ok = casegrouper_destroy (grouper); + ok = proc_commit (ds) && ok; + + free_reliability (&cmd); + + done: + + /* Free all the stuff */ + for (i = 0 ; i < rel.n_sc; ++i) + { + int x; + struct cronbach *c = &rel.sc[i]; + free (c->items); + + moments1_destroy (c->total); + + if ( c->m) + for (x = 0 ; x < c->n_items; ++x) + moments1_destroy (c->m[x]); + + free (c->m); + } + + ds_destroy (&rel.scale_name); + free (rel.sc); + + if (ok) + return CMD_SUCCESS; + + return CMD_FAILURE; +} + +/* Return the sum of all the item variables in S */ +static double +append_sum (const struct ccase *c, casenumber n UNUSED, void *aux) +{ + double sum = 0; + const struct cronbach *s = aux; + + int v; + for (v = 0 ; v < s->n_items; ++v) + { + sum += case_data (c, s->items[v])->f; + } + + return sum; +}; + + - static void case_processing_summary (casenumber n_valid, casenumber n_missing); ++static void case_processing_summary (casenumber n_valid, casenumber n_missing, ++ const struct dictionary *dict); + +static void - run_reliability (struct casereader *input, struct dataset *ds UNUSED, ++run_reliability (struct casereader *input, struct dataset *ds, + struct reliability *rel) +{ + int i; + int si; + struct ccase *c; + casenumber n_missing ; + casenumber n_valid = 0; + + + for (si = 0 ; si < rel->n_sc; ++si) + { + struct cronbach *s = &rel->sc[si]; + + s->m = xzalloc (sizeof (s->m) * s->n_items); + s->total = moments1_create (MOMENT_VARIANCE); + + for (i = 0 ; i < s->n_items ; ++i ) + s->m[i] = moments1_create (MOMENT_VARIANCE); + } + + input = casereader_create_filter_missing (input, + rel->variables, + rel->n_variables, + rel->exclude, + &n_missing, + NULL); + + for (si = 0 ; si < rel->n_sc; ++si) + { + struct cronbach *s = &rel->sc[si]; + + + s->totals_idx = casereader_get_value_cnt (input); + input = + casereader_create_append_numeric (input, append_sum, + s, NULL); + } + + for (; (c = casereader_read (input)) != NULL; case_unref (c)) + { + double weight = 1.0; + n_valid ++; + + for (si = 0; si < rel->n_sc; ++si) + { + struct cronbach *s = &rel->sc[si]; + + for (i = 0 ; i < s->n_items ; ++i ) + moments1_add (s->m[i], case_data (c, s->items[i])->f, weight); + + moments1_add (s->total, case_data_idx (c, s->totals_idx)->f, weight); + } + } + casereader_destroy (input); + + for (si = 0; si < rel->n_sc; ++si) + { + struct cronbach *s = &rel->sc[si]; + + s->sum_of_variances = 0; + for (i = 0 ; i < s->n_items ; ++i ) + { + double weight, mean, variance; + moments1_calculate (s->m[i], &weight, &mean, &variance, NULL, NULL); + + s->sum_of_variances += variance; + } + + moments1_calculate (s->total, NULL, NULL, &s->variance_of_sums, + NULL, NULL); + + s->alpha = + alpha (s->n_items, s->sum_of_variances, s->variance_of_sums); + } + + + { + struct tab_table *tab = tab_create(1, 1, 0); + + tab_dim (tab, tab_natural_dimensions); + tab_flags (tab, SOMF_NO_TITLE ); + + tab_text(tab, 0, 0, TAT_PRINTF, "Scale: %s", ds_cstr (&rel->scale_name)); + + tab_submit(tab); + } + + - case_processing_summary (n_valid, n_missing); ++ case_processing_summary (n_valid, n_missing, dataset_dict (ds)); +} + + +static void reliability_statistics_model_alpha (struct tab_table *tbl, + const struct reliability *rel); + +static void reliability_statistics_model_split (struct tab_table *tbl, + const struct reliability *rel); + +struct reliability_output_table +{ + int n_cols; + int n_rows; + int heading_cols; + int heading_rows; - void (*populate)(struct tab_table *, const struct reliability *); ++ void (*populate) (struct tab_table *, const struct reliability *); +}; + +static struct reliability_output_table rol[2] = + { + { 2, 2, 1, 1, reliability_statistics_model_alpha}, + { 4, 9, 3, 0, reliability_statistics_model_split} + }; + +static void +reliability_statistics (const struct reliability *rel) +{ + int n_cols = rol[rel->model].n_cols; + int n_rows = rol[rel->model].n_rows; + int heading_columns = rol[rel->model].heading_cols; + int heading_rows = rol[rel->model].heading_rows; + + struct tab_table *tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); + + tab_dim (tbl, tab_natural_dimensions); + + tab_title (tbl, _("Reliability Statistics")); + + /* Vertical lines for the data only */ + tab_box (tbl, + -1, -1, + -1, TAL_1, + heading_columns, 0, + n_cols - 1, n_rows - 1); + + /* Box around table */ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); + + + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows); + + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); + + if ( rel->model == MODEL_ALPHA ) + reliability_statistics_model_alpha (tbl, rel); + else if (rel->model == MODEL_SPLIT ) + reliability_statistics_model_split (tbl, rel); + + tab_submit (tbl); +} + +static void +reliability_summary_total (const struct reliability *rel) +{ + int i; + const int n_cols = 5; + const int heading_columns = 1; + const int heading_rows = 1; + const int n_rows = rel->sc[0].n_items + heading_rows ; + + struct tab_table *tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); + + tab_dim (tbl, tab_natural_dimensions); + + tab_title (tbl, _("Item-Total Statistics")); + + /* Vertical lines for the data only */ + tab_box (tbl, + -1, -1, + -1, TAL_1, + heading_columns, 0, + n_cols - 1, n_rows - 1); + + /* Box around table */ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); + + + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows); + + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); + + tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE, + _("Scale Mean if Item Deleted")); + + tab_text (tbl, 2, 0, TAB_CENTER | TAT_TITLE, + _("Scale Variance if Item Deleted")); + + tab_text (tbl, 3, 0, TAB_CENTER | TAT_TITLE, + _("Corrected Item-Total Correlation")); + + tab_text (tbl, 4, 0, TAB_CENTER | TAT_TITLE, + _("Cronbach's Alpha if Item Deleted")); + + + for (i = 0 ; i < rel->sc[0].n_items; ++i) + { + double cov, item_to_total_r; + double mean, weight, var; + + const struct cronbach *s = &rel->sc[rel->total_start + i]; + tab_text (tbl, 0, heading_rows + i, TAB_LEFT| TAT_TITLE, + var_to_string (rel->sc[0].items[i])); + + moments1_calculate (s->total, &weight, &mean, &var, 0, 0); + - tab_float (tbl, 1, heading_rows + i, TAB_RIGHT, - mean, 8, 3); ++ tab_double (tbl, 1, heading_rows + i, TAB_RIGHT, ++ mean, NULL); + - tab_float (tbl, 2, heading_rows + i, TAB_RIGHT, - s->variance_of_sums, 8, 3); ++ tab_double (tbl, 2, heading_rows + i, TAB_RIGHT, ++ s->variance_of_sums, NULL); + - tab_float (tbl, 4, heading_rows + i, TAB_RIGHT, - s->alpha, 8, 3); ++ tab_double (tbl, 4, heading_rows + i, TAB_RIGHT, ++ s->alpha, NULL); + + + moments1_calculate (rel->sc[0].m[i], &weight, &mean, &var, 0,0); + cov = rel->sc[0].variance_of_sums + var - s->variance_of_sums; + cov /= 2.0; + + item_to_total_r = (cov - var) / (sqrt(var) * sqrt (s->variance_of_sums)); + + - tab_float (tbl, 3, heading_rows + i, TAB_RIGHT, - item_to_total_r, 8, 3); ++ tab_double (tbl, 3, heading_rows + i, TAB_RIGHT, ++ item_to_total_r, NULL); + } + + + tab_submit (tbl); +} + + +static void +reliability_statistics_model_alpha (struct tab_table *tbl, + const struct reliability *rel) +{ ++ const struct variable *wv = dict_get_weight (rel->dict); ++ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; ++ + const struct cronbach *s = &rel->sc[0]; + + tab_text (tbl, 0, 0, TAB_CENTER | TAT_TITLE, + _("Cronbach's Alpha")); + + tab_text (tbl, 1, 0, TAB_CENTER | TAT_TITLE, + _("N of items")); + - tab_float (tbl, 0, 1, TAB_RIGHT, s->alpha, 8, 3); ++ tab_double (tbl, 0, 1, TAB_RIGHT, s->alpha, NULL); + - tab_float (tbl, 1, 1, TAB_RIGHT, s->n_items, 8, 0); ++ tab_double (tbl, 1, 1, TAB_RIGHT, s->n_items, wfmt); +} + + +static void +reliability_statistics_model_split (struct tab_table *tbl, + const struct reliability *rel) +{ ++ const struct variable *wv = dict_get_weight (rel->dict); ++ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; ++ + tab_text (tbl, 0, 0, TAB_LEFT, + _("Cronbach's Alpha")); + + tab_text (tbl, 1, 0, TAB_LEFT, + _("Part 1")); + + tab_text (tbl, 2, 0, TAB_LEFT, + _("Value")); + + tab_text (tbl, 2, 1, TAB_LEFT, + _("N of Items")); + + + + tab_text (tbl, 1, 2, TAB_LEFT, + _("Part 2")); + + tab_text (tbl, 2, 2, TAB_LEFT, + _("Value")); + + tab_text (tbl, 2, 3, TAB_LEFT, + _("N of Items")); + + + + tab_text (tbl, 1, 4, TAB_LEFT, + _("Total N of Items")); + + tab_text (tbl, 0, 5, TAB_LEFT, + _("Correlation Between Forms")); + + + tab_text (tbl, 0, 6, TAB_LEFT, + _("Spearman-Brown Coefficient")); + + tab_text (tbl, 1, 6, TAB_LEFT, + _("Equal Length")); + + tab_text (tbl, 1, 7, TAB_LEFT, + _("Unequal Length")); + + + tab_text (tbl, 0, 8, TAB_LEFT, + _("Guttman Split-Half Coefficient")); + + + - tab_float (tbl, 3, 0, TAB_RIGHT, rel->sc[1].alpha, 8, 3); - tab_float (tbl, 3, 2, TAB_RIGHT, rel->sc[2].alpha, 8, 3); ++ tab_double (tbl, 3, 0, TAB_RIGHT, rel->sc[1].alpha, NULL); ++ tab_double (tbl, 3, 2, TAB_RIGHT, rel->sc[2].alpha, NULL); + - tab_float (tbl, 3, 1, TAB_RIGHT, rel->sc[1].n_items, 8, 0); - tab_float (tbl, 3, 3, TAB_RIGHT, rel->sc[2].n_items, 8, 0); ++ tab_double (tbl, 3, 1, TAB_RIGHT, rel->sc[1].n_items, wfmt); ++ tab_double (tbl, 3, 3, TAB_RIGHT, rel->sc[2].n_items, wfmt); + - tab_float (tbl, 3, 4, TAB_RIGHT, - rel->sc[1].n_items + rel->sc[2].n_items, 8, 0); ++ tab_double (tbl, 3, 4, TAB_RIGHT, ++ rel->sc[1].n_items + rel->sc[2].n_items, wfmt); + + { + /* R is the correlation between the two parts */ + double r = rel->sc[0].variance_of_sums - + rel->sc[1].variance_of_sums - + rel->sc[2].variance_of_sums ; + + /* Guttman Split Half Coefficient */ + double g = 2 * r / rel->sc[0].variance_of_sums; + + /* Unequal Length Spearman Brown Coefficient, and + intermediate value used in the computation thereof */ + double uly, tmp; + + r /= sqrt (rel->sc[1].variance_of_sums); + r /= sqrt (rel->sc[2].variance_of_sums); + r /= 2.0; + - tab_float (tbl, 3, 5, TAB_RIGHT, r, 8, 3); ++ tab_double (tbl, 3, 5, TAB_RIGHT, r, NULL); + + /* Equal length Spearman-Brown Coefficient */ - tab_float (tbl, 3, 6, TAB_RIGHT, 2 * r / (1.0 + r), 8, 3); ++ tab_double (tbl, 3, 6, TAB_RIGHT, 2 * r / (1.0 + r), NULL); + - tab_float (tbl, 3, 8, TAB_RIGHT, g, 8, 3); ++ tab_double (tbl, 3, 8, TAB_RIGHT, g, NULL); + + tmp = (1.0 - r*r) * rel->sc[1].n_items * rel->sc[2].n_items / + pow2 (rel->sc[0].n_items); + + uly = sqrt( pow4 (r) + 4 * pow2 (r) * tmp); + uly -= pow2 (r); + uly /= 2 * tmp; + - tab_float (tbl, 3, 7, TAB_RIGHT, uly, 8, 3); - ++ tab_double (tbl, 3, 7, TAB_RIGHT, uly, NULL); + } +} + + + +static void - case_processing_summary (casenumber n_valid, casenumber n_missing) ++case_processing_summary (casenumber n_valid, casenumber n_missing, ++ const struct dictionary *dict) +{ ++ const struct variable *wv = dict_get_weight (dict); ++ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; ++ + casenumber total; + int n_cols = 4; + int n_rows = 4; + int heading_columns = 2; + int heading_rows = 1; + struct tab_table *tbl; + tbl = tab_create (n_cols, n_rows, 0); + tab_headers (tbl, heading_columns, 0, heading_rows, 0); + + tab_dim (tbl, tab_natural_dimensions); + + tab_title (tbl, _("Case Processing Summary")); + + /* Vertical lines for the data only */ + tab_box (tbl, + -1, -1, + -1, TAL_1, + heading_columns, 0, + n_cols - 1, n_rows - 1); + + /* Box around table */ + tab_box (tbl, + TAL_2, TAL_2, + -1, -1, + 0, 0, + n_cols - 1, n_rows - 1); + + + tab_hline (tbl, TAL_2, 0, n_cols - 1, heading_rows); + + tab_vline (tbl, TAL_2, heading_columns, 0, n_rows - 1); + + + tab_text (tbl, 0, heading_rows, TAB_LEFT | TAT_TITLE, + _("Cases")); + + tab_text (tbl, 1, heading_rows, TAB_LEFT | TAT_TITLE, + _("Valid")); + + tab_text (tbl, 1, heading_rows + 1, TAB_LEFT | TAT_TITLE, + _("Excluded")); + + tab_text (tbl, 1, heading_rows + 2, TAB_LEFT | TAT_TITLE, + _("Total")); + + tab_text (tbl, heading_columns, 0, TAB_CENTER | TAT_TITLE, + _("N")); + + tab_text (tbl, heading_columns + 1, 0, TAB_CENTER | TAT_TITLE | TAT_PRINTF, + _("%%")); + + total = n_missing + n_valid; + - tab_float (tbl, 2, heading_rows, TAB_RIGHT, - n_valid, 8, 0); ++ tab_double (tbl, 2, heading_rows, TAB_RIGHT, ++ n_valid, wfmt); + + - tab_float (tbl, 2, heading_rows + 1, TAB_RIGHT, - n_missing, 8, 0); ++ tab_double (tbl, 2, heading_rows + 1, TAB_RIGHT, ++ n_missing, wfmt); + + - tab_float (tbl, 2, heading_rows + 2, TAB_RIGHT, - total, 8, 0); ++ tab_double (tbl, 2, heading_rows + 2, TAB_RIGHT, ++ total, wfmt); + + - tab_float (tbl, 3, heading_rows, TAB_RIGHT, - 100 * n_valid / (double) total, 8, 1); ++ tab_double (tbl, 3, heading_rows, TAB_RIGHT, ++ 100 * n_valid / (double) total, NULL); + + - tab_float (tbl, 3, heading_rows + 1, TAB_RIGHT, - 100 * n_missing / (double) total, 8, 1); ++ tab_double (tbl, 3, heading_rows + 1, TAB_RIGHT, ++ 100 * n_missing / (double) total, NULL); + + - tab_float (tbl, 3, heading_rows + 2, TAB_RIGHT, - 100 * total / (double) total, 8, 1); ++ tab_double (tbl, 3, heading_rows + 2, TAB_RIGHT, ++ 100 * total / (double) total, NULL); + + + tab_submit (tbl); +} + +static int +rel_custom_model (struct lexer *lexer, struct dataset *ds UNUSED, + struct cmd_reliability *cmd UNUSED, void *aux) +{ + struct reliability *rel = aux; + + if (lex_match_id (lexer, "ALPHA")) + { + rel->model = MODEL_ALPHA; + } + else if (lex_match_id (lexer, "SPLIT")) + { + rel->model = MODEL_SPLIT; + rel->split_point = -1; + if ( lex_match (lexer, '(')) + { + lex_force_num (lexer); + rel->split_point = lex_number (lexer); + lex_get (lexer); + lex_force_match (lexer, ')'); + } + } + else + return 0; + + return 1; +} + + + +static int +rel_custom_scale (struct lexer *lexer, struct dataset *ds UNUSED, + struct cmd_reliability *p, void *aux) +{ + struct const_var_set *vs; + struct reliability *rel = aux; + struct cronbach *scale; + + rel->n_sc = 1; + rel->sc = xzalloc (sizeof (struct cronbach) * rel->n_sc); + scale = &rel->sc[0]; + + if ( ! lex_force_match (lexer, '(')) return 0; + + if ( ! lex_force_string (lexer) ) return 0; + + ds_init_string (&rel->scale_name, lex_tokstr (lexer)); + + lex_get (lexer); + + if ( ! lex_force_match (lexer, ')')) return 0; + + lex_match (lexer, '='); + + vs = const_var_set_create_from_array (p->v_variables, p->n_variables); + + if (!parse_const_var_set_vars (lexer, vs, &scale->items, &scale->n_items, 0)) + { + const_var_set_destroy (vs); + return 2; + } + + const_var_set_destroy (vs); + return 1; +} + +/* + Local Variables: + mode: c + End: +*/ diff --combined src/language/stats/t-test.q index bc6023c7,40bf9778..5f5fc56f --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -43,6 -43,7 +43,7 @@@ #include #include #include + #include #include "xalloc.h" @@@ -80,8 -81,8 +81,8 @@@ struct group_propertie /* The comparison criterion */ enum comparison criterion; - /* The width of the independent variable */ - int indep_width ; + /* The independent variable */ + struct variable *indep_var; union { /* The value of the independent variable at which groups are determined to @@@ -152,7 -153,8 +153,8 @@@ static int parse_value (struct lexer *l /* Structures and Functions for the Statistics Summary Box */ struct ssbox; typedef void populate_ssbox_func (struct ssbox *ssb, - struct cmd_t_test *cmd); + const struct dictionary *, + struct cmd_t_test *cmd); typedef void finalize_ssbox_func (struct ssbox *ssb); struct ssbox @@@ -168,21 -170,23 +170,23 @@@ void ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode); /* Populate a ssbox according to cmd */ - void ssbox_populate (struct ssbox *ssb, struct cmd_t_test *cmd); + void ssbox_populate (struct ssbox *ssb, const struct dictionary *dict, + struct cmd_t_test *cmd); /* Submit and destroy a ssbox */ void ssbox_finalize (struct ssbox *ssb); /* A function to create, populate and submit the Paired Samples Correlation box */ - void pscbox (void); + static void pscbox (const struct dictionary *); /* Structures and Functions for the Test Results Box */ struct trbox; typedef void populate_trbox_func (struct trbox *trb, - struct cmd_t_test *cmd); + const struct dictionary *dict, + struct cmd_t_test *cmd); typedef void finalize_trbox_func (struct trbox *trb); struct trbox { @@@ -195,7 -199,8 +199,8 @@@ void trbox_create (struct trbox *trb, struct cmd_t_test *cmd, int mode); /* Populate a ssbox according to cmd */ - void trbox_populate (struct trbox *trb, struct cmd_t_test *cmd); + static void trbox_populate (struct trbox *trb, const struct dictionary *dict, + struct cmd_t_test *cmd); /* Submit and destroy a ssbox */ void trbox_finalize (struct trbox *trb); @@@ -637,9 -642,10 +642,10 @@@ ssbox_create (struct ssbox *ssb, struc /* Despatcher for the populate method */ void - ssbox_populate (struct ssbox *ssb,struct cmd_t_test *cmd) + ssbox_populate (struct ssbox *ssb, const struct dictionary *dict, + struct cmd_t_test *cmd) { - ssb->populate (ssb,cmd); + ssb->populate (ssb, dict, cmd); } @@@ -675,7 -681,8 +681,8 @@@ ssbox_base_init (struct ssbox *this, in } void ssbox_one_sample_populate (struct ssbox *ssb, - struct cmd_t_test *cmd); + const struct dictionary *, + struct cmd_t_test *cmd); /* Initialize the one_sample ssbox */ void @@@ -696,8 -703,9 +703,9 @@@ ssbox_one_sample_init (struct ssbox *th tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); } - void ssbox_independent_samples_populate (struct ssbox *ssb, - struct cmd_t_test *cmd); + static void ssbox_independent_samples_populate (struct ssbox *ssb, + const struct dictionary *, + struct cmd_t_test *cmd); /* Initialize the independent samples ssbox */ void @@@ -721,12 -729,16 +729,16 @@@ ssbox_independent_samples_init (struct /* Populate the ssbox for independent samples */ - void + static void ssbox_independent_samples_populate (struct ssbox *ssb, - struct cmd_t_test *cmd) + const struct dictionary *dict, + struct cmd_t_test *cmd) { int i; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; + char *val_lab[2] = {NULL, NULL}; double indep_value[2]; @@@ -818,10 -830,10 +830,10 @@@ gs = hsh_find (grp_hash, (void *) &search_val); assert (gs); - tab_float (ssb->t, 2 ,i*2+count+1, TAB_RIGHT, gs->n, 10, 0); - tab_float (ssb->t, 3 ,i*2+count+1, TAB_RIGHT, gs->mean, 8, 2); - tab_float (ssb->t, 4 ,i*2+count+1, TAB_RIGHT, gs->std_dev, 8, 3); - tab_float (ssb->t, 5 ,i*2+count+1, TAB_RIGHT, gs->se_mean, 8, 3); + tab_double (ssb->t, 2, i*2+count+1, TAB_RIGHT, gs->n, wfmt); + tab_double (ssb->t, 3, i*2+count+1, TAB_RIGHT, gs->mean, NULL); + tab_double (ssb->t, 4, i*2+count+1, TAB_RIGHT, gs->std_dev, NULL); + tab_double (ssb->t, 5, i*2+count+1, TAB_RIGHT, gs->se_mean, NULL); } } free (val_lab[0]); @@@ -829,8 -841,9 +841,9 @@@ } - void ssbox_paired_populate (struct ssbox *ssb, - struct cmd_t_test *cmd); + static void ssbox_paired_populate (struct ssbox *ssb, + const struct dictionary *dict, + struct cmd_t_test *cmd); /* Initialize the paired values ssbox */ void @@@ -855,10 -868,14 +868,14 @@@ ssbox_paired_init (struct ssbox *this, /* Populate the ssbox for paired values */ void - ssbox_paired_populate (struct ssbox *ssb,struct cmd_t_test *cmd UNUSED) + ssbox_paired_populate (struct ssbox *ssb, const struct dictionary *dict, + struct cmd_t_test *cmd UNUSED) { int i; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; + assert (ssb->t); for (i=0; i < n_pairs; ++i) @@@ -879,10 -896,11 +896,11 @@@ var_get_name (pairs[i].v[j])); /* Values */ - tab_float (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], 8, 2); - tab_float (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, 10, 0); - tab_float (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], 8, 3); - tab_float (ssb->t,5, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j]/sqrt (pairs[i].n), 8, 3); + tab_double (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], NULL); + tab_double (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, wfmt); + tab_double (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], NULL); + tab_double (ssb->t,5, i*2+j+1, TAB_RIGHT, + pairs[i].std_dev[j]/sqrt (pairs[i].n), NULL); } } @@@ -890,10 -908,14 +908,14 @@@ /* Populate the one sample ssbox */ void - ssbox_one_sample_populate (struct ssbox *ssb, struct cmd_t_test *cmd) + ssbox_one_sample_populate (struct ssbox *ssb, const struct dictionary *dict, + struct cmd_t_test *cmd) { int i; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; + assert (ssb->t); for (i=0; i < cmd->n_variables; ++i) @@@ -901,12 -923,11 +923,11 @@@ struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; tab_text (ssb->t, 0, i+1, TAB_LEFT, var_get_name (cmd->v_variables[i])); - tab_float (ssb->t,1, i+1, TAB_RIGHT, gs->n, 10, 0); - tab_float (ssb->t,2, i+1, TAB_RIGHT, gs->mean, 8, 2); - tab_float (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, 8, 2); - tab_float (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, 8, 3); + tab_double (ssb->t,1, i+1, TAB_RIGHT, gs->n, wfmt); + tab_double (ssb->t,2, i+1, TAB_RIGHT, gs->mean, NULL); + tab_double (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, NULL); + tab_double (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, NULL); } - } @@@ -919,20 -940,23 +940,23 @@@ void trbox_base_finalize (struct trbox void trbox_independent_samples_init (struct trbox *trb, struct cmd_t_test *cmd ); - void trbox_independent_samples_populate (struct trbox *trb, - struct cmd_t_test *cmd); + static void trbox_independent_samples_populate (struct trbox *trb, + const struct dictionary *dict, + struct cmd_t_test *cmd); void trbox_one_sample_init (struct trbox *self, struct cmd_t_test *cmd ); - void trbox_one_sample_populate (struct trbox *trb, - struct cmd_t_test *cmd); + static void trbox_one_sample_populate (struct trbox *trb, + const struct dictionary *, + struct cmd_t_test *cmd); void trbox_paired_init (struct trbox *self, struct cmd_t_test *cmd ); - void trbox_paired_populate (struct trbox *trb, - struct cmd_t_test *cmd); + static void trbox_paired_populate (struct trbox *trb, + const struct dictionary *, + struct cmd_t_test *cmd); @@@ -958,10 -982,11 +982,11 @@@ trbox_create (struct trbox *trb } /* Populate a trbox according to cmd */ - void - trbox_populate (struct trbox *trb, struct cmd_t_test *cmd) + static void + trbox_populate (struct trbox *trb, const struct dictionary *dict, + struct cmd_t_test *cmd) { - trb->populate (trb,cmd); + trb->populate (trb, dict, cmd); } /* Submit and destroy a trbox */ @@@ -1012,9 -1037,10 +1037,10 @@@ trbox_independent_samples_init (struct } /* Populate the independent samples trbox */ - void + static void trbox_independent_samples_populate (struct trbox *self, - struct cmd_t_test *cmd ) + const struct dictionary *dict UNUSED, + struct cmd_t_test *cmd) { int i; @@@ -1064,16 -1090,16 +1090,16 @@@ tab_text (self->t, 1, i*2+3, TAB_LEFT, _ ("Equal variances assumed")); - tab_float (self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, 8,3); + tab_double (self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, NULL); /* Now work out the significance of the Levene test */ df1 = 1; df2 = grp_data->ugs.n - 2; q = gsl_cdf_fdist_Q (grp_data->levene, df1, df2); - tab_float (self->t, 3, i*2+3, TAB_CENTER, q, 8,3 ); + tab_double (self->t, 3, i*2+3, TAB_CENTER, q, NULL); df = gs0->n + gs1->n - 2.0 ; - tab_float (self->t, 5, i*2+3, TAB_RIGHT, df, 10, 0); + tab_double (self->t, 5, i*2+3, TAB_RIGHT, df, NULL); pooled_variance = ( (gs0->n )*pow2 (gs0->s_std_dev) + @@@ -1083,30 -1109,30 +1109,30 @@@ t = (gs0->mean - gs1->mean) / sqrt (pooled_variance) ; t /= sqrt ((gs0->n + gs1->n)/ (gs0->n*gs1->n)); - tab_float (self->t, 4, i*2+3, TAB_RIGHT, t, 8, 3); + tab_double (self->t, 4, i*2+3, TAB_RIGHT, t, NULL); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); - tab_float (self->t, 6, i*2+3, TAB_RIGHT, 2.0* (t>0?q:p) , 8, 3); + tab_double (self->t, 6, i*2+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); mean_diff = gs0->mean - gs1->mean; - tab_float (self->t, 7, i*2+3, TAB_RIGHT, mean_diff, 8, 3); + tab_double (self->t, 7, i*2+3, TAB_RIGHT, mean_diff, NULL); std_err_diff = sqrt ( pow2 (gs0->se_mean) + pow2 (gs1->se_mean)); - tab_float (self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, 8, 3); + tab_double (self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, NULL); /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q,df); - tab_float (self->t, 9, i*2+3, TAB_RIGHT, - mean_diff - t * std_err_diff, 8, 3); + tab_double (self->t, 9, i*2+3, TAB_RIGHT, + mean_diff - t * std_err_diff, NULL); - tab_float (self->t, 10, i*2+3, TAB_RIGHT, - mean_diff + t * std_err_diff, 8, 3); + tab_double (self->t, 10, i*2+3, TAB_RIGHT, + mean_diff + t * std_err_diff, NULL); { @@@ -1120,7 -1146,7 +1146,7 @@@ (pow2 (gs1->s_std_dev)/ (gs1->n -1) ); t = mean_diff / sqrt (se2) ; - tab_float (self->t, 4, i*2+3+1, TAB_RIGHT, t, 8, 3); + tab_double (self->t, 4, i*2+3+1, TAB_RIGHT, t, NULL); df = pow2 (se2) / ( (pow2 (pow2 (gs0->s_std_dev)/ (gs0->n - 1 )) @@@ -1131,30 -1157,30 +1157,30 @@@ / (gs1->n -1 ) ) ) ; - tab_float (self->t, 5, i*2+3+1, TAB_RIGHT, df, 8, 3); + + tab_double (self->t, 5, i*2+3+1, TAB_RIGHT, df, NULL); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); - tab_float (self->t, 6, i*2+3+1, TAB_RIGHT, 2.0* (t>0?q:p) , 8, 3); + tab_double (self->t, 6, i*2+3+1, TAB_RIGHT, 2.0* (t>0?q:p), NULL); /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_float (self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, 8, 3); - + tab_double (self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, NULL); - tab_float (self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, 8, 3); + tab_double (self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, NULL); - tab_float (self->t, 9, i*2+3+1, TAB_RIGHT, - mean_diff - t * std_err_diff, 8, 3); - tab_float (self->t, 10, i*2+3+1, TAB_RIGHT, - mean_diff + t * std_err_diff, 8, 3); + tab_double (self->t, 9, i*2+3+1, TAB_RIGHT, + mean_diff - t * std_err_diff, NULL); + tab_double (self->t, 10, i*2+3+1, TAB_RIGHT, + mean_diff + t * std_err_diff, NULL); } } } @@@ -1195,12 -1221,16 +1221,16 @@@ trbox_paired_init (struct trbox *self } /* Populate the paired samples trbox */ - void + static void trbox_paired_populate (struct trbox *trb, - struct cmd_t_test *cmd UNUSED) + const struct dictionary *dict, + struct cmd_t_test *cmd UNUSED) { int i; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; + for (i=0; i < n_pairs; ++i) { double p,q; @@@ -1216,42 -1246,42 +1246,42 @@@ var_get_name (pairs[i].v[0]), var_get_name (pairs[i].v[1])); - tab_float (trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, 8, 4); + tab_double (trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, NULL); - tab_float (trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, 8, 5); + tab_double (trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, NULL); /* SE Mean */ se_mean = pairs[i].std_dev_diff / sqrt (n) ; - tab_float (trb->t, 4, i+3, TAB_RIGHT, se_mean, 8,5 ); + tab_double (trb->t, 4, i+3, TAB_RIGHT, se_mean, NULL); /* Now work out the confidence interval */ q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_float (trb->t, 5, i+3, TAB_RIGHT, - pairs[i].mean_diff - t * se_mean , 8, 4); + tab_double (trb->t, 5, i+3, TAB_RIGHT, + pairs[i].mean_diff - t * se_mean , NULL); - tab_float (trb->t, 6, i+3, TAB_RIGHT, - pairs[i].mean_diff + t * se_mean , 8, 4); + tab_double (trb->t, 6, i+3, TAB_RIGHT, + pairs[i].mean_diff + t * se_mean , NULL); t = (pairs[i].mean[0] - pairs[i].mean[1]) / sqrt ( - ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) - + ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) - 2 * pairs[i].correlation * pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] ) / (n - 1) ); - tab_float (trb->t, 7, i+3, TAB_RIGHT, t , 8,3 ); + tab_double (trb->t, 7, i+3, TAB_RIGHT, t, NULL); /* Degrees of freedom */ - tab_float (trb->t, 8, i+3, TAB_RIGHT, df , 10, 0 ); + tab_double (trb->t, 8, i+3, TAB_RIGHT, df, wfmt); p = gsl_cdf_tdist_P (t,df); q = gsl_cdf_tdist_P (t,df); - tab_float (trb->t, 9, i+3, TAB_RIGHT, 2.0* (t>0?q:p) , 8, 3); + tab_double (trb->t, 9, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); } } @@@ -1293,11 -1323,16 +1323,16 @@@ trbox_one_sample_init (struct trbox *se /* Populate the one sample trbox */ - void - trbox_one_sample_populate (struct trbox *trb, struct cmd_t_test *cmd) + static void + trbox_one_sample_populate (struct trbox *trb, + const struct dictionary *dict, + struct cmd_t_test *cmd) { int i; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; + assert (trb->t); for (i=0; i < cmd->n_variables; ++i) @@@ -1312,31 -1347,31 +1347,31 @@@ t = (gs->mean - cmd->n_testval[0] ) * sqrt (gs->n) / gs->std_dev ; - tab_float (trb->t, 1, i+3, TAB_RIGHT, t, 8,3); + tab_double (trb->t, 1, i+3, TAB_RIGHT, t, NULL); /* degrees of freedom */ df = gs->n - 1; - tab_float (trb->t, 2, i+3, TAB_RIGHT, df, 8,0); + tab_double (trb->t, 2, i+3, TAB_RIGHT, df, wfmt); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); /* Multiply by 2 to get 2-tailed significance, makeing sure we've got the correct tail*/ - tab_float (trb->t, 3, i+3, TAB_RIGHT, 2.0* (t>0?q:p), 8,3); + tab_double (trb->t, 3, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); - tab_float (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, 8,3); + tab_double (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, NULL); q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_float (trb->t, 5, i+3, TAB_RIGHT, - gs->mean_diff - t * gs->se_mean, 8,4); + tab_double (trb->t, 5, i+3, TAB_RIGHT, + gs->mean_diff - t * gs->se_mean, NULL); - tab_float (trb->t, 6, i+3, TAB_RIGHT, - gs->mean_diff + t * gs->se_mean, 8,4); + tab_double (trb->t, 6, i+3, TAB_RIGHT, + gs->mean_diff + t * gs->se_mean, NULL); } } @@@ -1364,11 -1399,14 +1399,14 @@@ trbox_base_finalize (struct trbox *trb /* Create , populate and submit the Paired Samples Correlation box */ - void - pscbox (void) + static void + pscbox (const struct dictionary *dict) { - const int rows=1+n_pairs; - const int cols=5; + const struct variable *wv = dict_get_weight (dict); + const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; + + const int rows = 1 + n_pairs; + const int cols = 5; int i; struct tab_table *table; @@@ -1410,13 -1448,13 +1448,13 @@@ /* row data */ - tab_float (table, 2, i+1, TAB_RIGHT, pairs[i].n, 4, 0); - tab_float (table, 3, i+1, TAB_RIGHT, pairs[i].correlation, 8, 3); + tab_double (table, 2, i+1, TAB_RIGHT, pairs[i].n, wfmt); + tab_double (table, 3, i+1, TAB_RIGHT, pairs[i].correlation, NULL); p = gsl_cdf_tdist_P (correlation_t, df); q = gsl_cdf_tdist_Q (correlation_t, df); - tab_float (table, 4, i+1, TAB_RIGHT, 2.0* (correlation_t>0?q:p), 8, 3); + tab_double (table, 4, i+1, TAB_RIGHT, 2.0* (correlation_t>0?q:p), NULL); } tab_submit (table); @@@ -1459,7 -1497,7 +1497,7 @@@ common_calc (const struct dictionary *d gs->n += weight; gs->sum += weight * val->f; - gs->ssq += weight * val->f * val->f; + gs->ssq += weight * pow2 (val->f); } } return 0; @@@ -1496,12 -1534,12 +1534,12 @@@ common_postcalc (struct cmd_t_test *cmd gs->mean=gs->sum / gs->n; gs->s_std_dev= sqrt ( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ( (gs->ssq / gs->n ) - pow2 (gs->mean)) ) ; gs->std_dev= sqrt ( gs->n/ (gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ( (gs->ssq / gs->n ) - pow2 (gs->mean)) ) ; gs->se_mean = gs->std_dev / sqrt (gs->n); @@@ -1676,7 -1714,7 +1714,7 @@@ group_precalc (struct cmd_t_test *cmd /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; - gp.indep_width = var_get_width (indep_var); + gp.indep_var = indep_var; ttpr->group_hash = hsh_create (2, (hsh_compare_func *) compare_group_binary, @@@ -1772,12 -1810,12 +1810,12 @@@ group_postcalc ( struct cmd_t_test *cm gs->mean = gs->sum / gs->n; gs->s_std_dev= sqrt ( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ( (gs->ssq / gs->n ) - pow2 (gs->mean)) ) ; gs->std_dev= sqrt ( gs->n/ (gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) + ( (gs->ssq / gs->n ) - pow2 (gs->mean)) ) ; gs->se_mean = gs->std_dev / sqrt (gs->n); @@@ -1799,24 -1837,23 +1837,24 @@@ calculate (struct cmd_t_test *cmd struct casereader *pass1, *pass2, *pass3; struct taint *taint; - struct ccase c; + struct ccase *c; enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM; - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); if ( cmd->miss == TTS_LISTWISE ) input = casereader_create_filter_missing (input, cmd->v_variables, cmd->n_variables, - exclude, NULL); + exclude, NULL, NULL); input = casereader_create_filter_weight (input, dict, NULL, NULL); @@@ -1824,8 -1861,8 +1862,8 @@@ casereader_split (input, &pass1, &pass2); common_precalc (cmd); - for (; casereader_read (pass1, &c); case_destroy (&c)) - common_calc (dict, &c, cmd, exclude); + for (; (c = casereader_read (pass1)) != NULL; case_unref (c)) + common_calc (dict, c, cmd, exclude); casereader_destroy (pass1); common_postcalc (cmd); @@@ -1833,22 -1870,22 +1871,22 @@@ { case T_1_SAMPLE: one_sample_precalc (cmd); - for (; casereader_read (pass2, &c); case_destroy (&c)) - one_sample_calc (dict, &c, cmd, exclude); + for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) + one_sample_calc (dict, c, cmd, exclude); one_sample_postcalc (cmd); break; case T_PAIRED: paired_precalc (cmd); - for (; casereader_read (pass2, &c); case_destroy (&c)) - paired_calc (dict, &c, cmd, exclude); + for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) + paired_calc (dict, c, cmd, exclude); paired_postcalc (cmd); break; case T_IND_SAMPLES: pass3 = casereader_clone (pass2); group_precalc (cmd); - for (; casereader_read (pass2, &c); case_destroy (&c)) - group_calc (dict, &c, cmd, exclude); + for (; (c = casereader_read (pass2)) != NULL; case_unref (c)) + group_calc (dict, c, cmd, exclude); group_postcalc (cmd); levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables, @@@ -1860,14 -1897,14 +1898,14 @@@ if (!taint_has_tainted_successor (taint)) { ssbox_create (&stat_summary_box,cmd,mode); - ssbox_populate (&stat_summary_box,cmd); + ssbox_populate (&stat_summary_box, dict, cmd); ssbox_finalize (&stat_summary_box); if ( mode == T_PAIRED ) - pscbox (); + pscbox (dict); - trbox_create (&test_results_box,cmd,mode); - trbox_populate (&test_results_box,cmd); + trbox_create (&test_results_box, cmd, mode); + trbox_populate (&test_results_box, dict, cmd); trbox_finalize (&test_results_box); } @@@ -1889,6 -1926,10 +1927,6 @@@ compare_group_binary (const struct grou if ( p->criterion == CMP_LE ) { - /* less-than comparision is not meaningfull for - alpha variables, so we shouldn't ever arrive here */ - assert (p->indep_width == 0 ) ; - flag_a = ( a->id.f < p->v.critical_value ) ; flag_b = ( b->id.f < p->v.critical_value ) ; } @@@ -1915,6 -1956,8 +1953,6 @@@ hash_group_binary (const struct group_s if ( p->criterion == CMP_LE ) { - /* Not meaningfull to do a less than compare for alpha values ? */ - assert (p->indep_width == 0 ) ; flag = ( g->id.f < p->v.critical_value ) ; } else if ( p->criterion == CMP_EQ) @@@ -1934,10 -1977,10 +1972,10 @@@ shor which_group (const struct group_statistics *g, const struct group_properties *p) { - if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_width)) + if ( 0 == compare_values_short (&g->id, &p->v.g_value[0], p->indep_var)) return 0; - if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_width)) + if ( 0 == compare_values_short (&g->id, &p->v.g_value[1], p->indep_var)) return 1; return 2; diff --combined src/language/stats/wilcoxon.c index 1bdcc06d,00000000..c0329f6c mode 100644,000000..100644 --- a/src/language/stats/wilcoxon.c +++ b/src/language/stats/wilcoxon.c @@@ -1,361 -1,0 +1,369 @@@ +/* Pspp - a program for statistical analysis. + Copyright (C) 2008, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + + + +#include +#include "wilcoxon.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include ++#include + +static double +append_difference (const struct ccase *c, casenumber n UNUSED, void *aux) +{ + const variable_pair *vp = aux; + + return case_data (c, (*vp)[0])->f - case_data (c, (*vp)[1])->f; +} + +static void show_ranks_box (const struct wilcoxon_state *, - const struct two_sample_test *); ++ const struct two_sample_test *, ++ const struct dictionary *); + +static void show_tests_box (const struct wilcoxon_state *, + const struct two_sample_test *, + bool exact, double timer); + + + +static void +distinct_callback (double v UNUSED, casenumber n, double w UNUSED, void *aux) +{ + struct wilcoxon_state *ws = aux; + + ws->tiebreaker += pow3 (n) - n; +} + +#define WEIGHT_IDX 2 + +void +wilcoxon_execute (const struct dataset *ds, + struct casereader *input, + enum mv_class exclude, + const struct npar_test *test, + bool exact, + double timer) +{ + int i; + bool warn = true; + const struct dictionary *dict = dataset_dict (ds); + const struct two_sample_test *t2s = (struct two_sample_test *) test; + + struct wilcoxon_state *ws = xcalloc (sizeof (*ws), t2s->n_pairs); + const struct variable *weight = dict_get_weight (dict); + struct variable *weightx = var_create_internal (WEIGHT_IDX); + + input = + casereader_create_filter_weight (input, dict, &warn, NULL); + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + struct casereader *r = casereader_clone (input); + struct casewriter *writer; + struct ccase *c; + struct subcase ordering; + variable_pair *vp = &t2s->pairs[i]; + + const int reader_width = weight ? 3 : 2; + + ws[i].sign = var_create_internal (0); + ws[i].absdiff = var_create_internal (1); + + r = casereader_create_filter_missing (r, *vp, 2, + exclude, + NULL, NULL); + + subcase_init_var (&ordering, ws[i].absdiff, SC_ASCEND); + writer = sort_create_writer (&ordering, reader_width); + subcase_destroy (&ordering); + + for (; (c = casereader_read (r)) != NULL; case_unref (c)) + { + struct ccase *output = case_create (reader_width); + double d = append_difference (c, 0, vp); + + if (d > 0) + { + case_data_rw (output, ws[i].sign)->f = 1.0; + + } + else if (d < 0) + { + case_data_rw (output, ws[i].sign)->f = -1.0; + } + else + { + double w = 1.0; + if (weight) + w = case_data (c, weight)->f; + + /* Central point values should be dropped */ + ws[i].n_zeros += w; + case_unref (output); + continue; + } + + case_data_rw (output, ws[i].absdiff)->f = fabs (d); + + if (weight) + case_data_rw (output, weightx)->f = case_data (c, weight)->f; + + casewriter_write (writer, output); + } + casereader_destroy (r); + ws[i].reader = casewriter_make_reader (writer); + } + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + struct casereader *rr ; + struct ccase *c; + enum rank_error err = 0; + + rr = casereader_create_append_rank (ws[i].reader, ws[i].absdiff, + weight ? weightx : NULL, &err, + distinct_callback, &ws[i] + ); + + for (; (c = casereader_read (rr)) != NULL; case_unref (c)) + { + double sign = case_data (c, ws[i].sign)->f; + double rank = case_data_idx (c, weight ? 3 : 2)->f; + double w = 1.0; + if (weight) + w = case_data (c, weightx)->f; + + if ( sign > 0 ) + { + ws[i].positives.sum += rank * w; + ws[i].positives.n += w; + } + else if (sign < 0) + { + ws[i].negatives.sum += rank * w; + ws[i].negatives.n += w; + } + else + NOT_REACHED (); + } + + casereader_destroy (rr); + } + + casereader_destroy (input); + + var_destroy (weightx); + - show_ranks_box (ws, t2s); ++ show_ranks_box (ws, t2s, dict); + show_tests_box (ws, t2s, exact, timer); + + for (i = 0 ; i < t2s->n_pairs; ++i ) + { + var_destroy (ws[i].sign); + var_destroy (ws[i].absdiff); + } + + free (ws); +} + + + + +#include "gettext.h" +#define _(msgid) gettext (msgid) + +static void - show_ranks_box (const struct wilcoxon_state *ws, const struct two_sample_test *t2s) ++show_ranks_box (const struct wilcoxon_state *ws, ++ const struct two_sample_test *t2s, ++ const struct dictionary *dict) +{ + size_t i; ++ ++ const struct variable *wv = dict_get_weight (dict); ++ const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : & F_8_0; ++ + struct tab_table *table = tab_create (5, 1 + 4 * t2s->n_pairs, 0); + + tab_dim (table, tab_natural_dimensions); + + tab_title (table, _("Ranks")); + + tab_headers (table, 2, 0, 1, 0); + + /* Vertical lines inside the box */ + tab_box (table, 0, 0, -1, TAL_1, + 1, 0, table->nc - 1, tab_nr (table) - 1 ); + + /* Box around entire table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + + tab_text (table, 2, 0, TAB_CENTER, _("N")); + tab_text (table, 3, 0, TAB_CENTER, _("Mean Rank")); + tab_text (table, 4, 0, TAB_CENTER, _("Sum of Ranks")); + + + for (i = 0 ; i < t2s->n_pairs; ++i) + { + variable_pair *vp = &t2s->pairs[i]; + + struct string pair_name; + ds_init_cstr (&pair_name, var_to_string ((*vp)[0])); + ds_put_cstr (&pair_name, " - "); + ds_put_cstr (&pair_name, var_to_string ((*vp)[1])); + + tab_text (table, 1, 1 + i * 4, TAB_LEFT, _("Negative Ranks")); + tab_text (table, 1, 2 + i * 4, TAB_LEFT, _("Positive Ranks")); + tab_text (table, 1, 3 + i * 4, TAB_LEFT, _("Ties")); + tab_text (table, 1, 4 + i * 4, TAB_LEFT, _("Total")); + + tab_hline (table, TAL_1, 0, table->nc - 1, 1 + i * 4); + + + tab_text (table, 0, 1 + i * 4, TAB_LEFT, ds_cstr (&pair_name)); + ds_destroy (&pair_name); + + + /* N */ - tab_float (table, 2, 1 + i * 4, TAB_RIGHT, ws[i].negatives.n, 8, 0); - tab_float (table, 2, 2 + i * 4, TAB_RIGHT, ws[i].positives.n, 8, 0); - tab_float (table, 2, 3 + i * 4, TAB_RIGHT, ws[i].n_zeros, 8, 0); ++ tab_double (table, 2, 1 + i * 4, TAB_RIGHT, ws[i].negatives.n, wfmt); ++ tab_double (table, 2, 2 + i * 4, TAB_RIGHT, ws[i].positives.n, wfmt); ++ tab_double (table, 2, 3 + i * 4, TAB_RIGHT, ws[i].n_zeros, wfmt); + - tab_float (table, 2, 4 + i * 4, TAB_RIGHT, - ws[i].n_zeros + ws[i].positives.n + ws[i].negatives.n, 8, 0); ++ tab_double (table, 2, 4 + i * 4, TAB_RIGHT, ++ ws[i].n_zeros + ws[i].positives.n + ws[i].negatives.n, wfmt); + + /* Sums */ - tab_float (table, 4, 1 + i * 4, TAB_RIGHT, ws[i].negatives.sum, 8, 2); - tab_float (table, 4, 2 + i * 4, TAB_RIGHT, ws[i].positives.sum, 8, 2); ++ tab_double (table, 4, 1 + i * 4, TAB_RIGHT, ws[i].negatives.sum, NULL); ++ tab_double (table, 4, 2 + i * 4, TAB_RIGHT, ws[i].positives.sum, NULL); + + + /* Means */ - tab_float (table, 3, 1 + i * 4, TAB_RIGHT, - ws[i].negatives.sum / (double) ws[i].negatives.n, 8, 2); ++ tab_double (table, 3, 1 + i * 4, TAB_RIGHT, ++ ws[i].negatives.sum / (double) ws[i].negatives.n, NULL); + - tab_float (table, 3, 2 + i * 4, TAB_RIGHT, - ws[i].positives.sum / (double) ws[i].positives.n, 8, 2); ++ tab_double (table, 3, 2 + i * 4, TAB_RIGHT, ++ ws[i].positives.sum / (double) ws[i].positives.n, NULL); + + } + + tab_hline (table, TAL_2, 0, table->nc - 1, 1); + tab_vline (table, TAL_2, 2, 0, table->nr - 1); + + + tab_submit (table); +} + + +static void +show_tests_box (const struct wilcoxon_state *ws, + const struct two_sample_test *t2s, + bool exact, + double timer UNUSED + ) +{ + size_t i; + struct tab_table *table = tab_create (1 + t2s->n_pairs, exact ? 5 : 3, 0); + + tab_dim (table, tab_natural_dimensions); + + tab_title (table, _("Test Statistics")); + + tab_headers (table, 1, 0, 1, 0); + + /* Vertical lines inside the box */ + tab_box (table, 0, 0, -1, TAL_1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + /* Box around entire table */ + tab_box (table, TAL_2, TAL_2, -1, -1, + 0, 0, table->nc - 1, tab_nr (table) - 1 ); + + + tab_text (table, 0, 1, TAB_LEFT, _("Z")); + tab_text (table, 0, 2, TAB_LEFT, _("Asymp. Sig (2-tailed)")); + + if ( exact ) + { + tab_text (table, 0, 3, TAB_LEFT, _("Exact Sig (2-tailed)")); + tab_text (table, 0, 4, TAB_LEFT, _("Exact Sig (1-tailed)")); + +#if 0 + tab_text (table, 0, 5, TAB_LEFT, _("Point Probability")); +#endif + } + + for (i = 0 ; i < t2s->n_pairs; ++i) + { + double z; + double n = ws[i].positives.n + ws[i].negatives.n; + variable_pair *vp = &t2s->pairs[i]; + + struct string pair_name; + ds_init_cstr (&pair_name, var_to_string ((*vp)[0])); + ds_put_cstr (&pair_name, " - "); + ds_put_cstr (&pair_name, var_to_string ((*vp)[1])); + + + tab_text (table, 1 + i, 0, TAB_CENTER, ds_cstr (&pair_name)); + ds_destroy (&pair_name); + + z = MIN (ws[i].positives.sum, ws[i].negatives.sum); + z -= n * (n + 1)/ 4.0; + + z /= sqrt (n * (n + 1) * (2*n + 1)/24.0 - ws[i].tiebreaker / 48.0); + - tab_float (table, 1 + i, 1, TAB_RIGHT, z, 8, 3); ++ tab_double (table, 1 + i, 1, TAB_RIGHT, z, NULL); + - tab_float (table, 1 + i, 2, TAB_RIGHT, ++ tab_double (table, 1 + i, 2, TAB_RIGHT, + 2.0 * gsl_cdf_ugaussian_P (z), - 8, 3); ++ NULL); + + if (exact) + { + double p = LevelOfSignificanceWXMPSR (ws[i].positives.sum, n); + if (p < 0) + { + msg (MW, ("Too many pairs to calculate exact significance.")); + } + else + { - tab_float (table, 1 + i, 3, TAB_RIGHT, p, 8, 3); - tab_float (table, 1 + i, 4, TAB_RIGHT, p / 2.0, 8, 3); ++ tab_double (table, 1 + i, 3, TAB_RIGHT, p, NULL); ++ tab_double (table, 1 + i, 4, TAB_RIGHT, p / 2.0, NULL); + } + } + } + + tab_hline (table, TAL_2, 0, table->nc - 1, 1); + tab_vline (table, TAL_2, 1, 0, table->nr - 1); + + + tab_submit (table); +} diff --combined tests/automake.mk index 5c84354f,b0039df4..6db16ebc --- a/tests/automake.mk +++ b/tests/automake.mk @@@ -6,12 -6,8 +6,12 @@@ TESTS_ENVIRONMENT += PERL='@PERL@' PG_C # Allow locale_charset to find charset.alias before running "make install". TESTS_ENVIRONMENT += CHARSETALIASDIR='$(abs_top_builddir)/gl' +TESTS_ENVIRONMENT += LC_ALL=C + dist_TESTS = \ + tests/command/add-files.sh \ tests/command/aggregate.sh \ + tests/command/attributes.sh \ tests/command/autorecod.sh \ tests/command/beg-data.sh \ tests/command/bignum.sh \ @@@ -35,7 -31,6 +35,7 @@@ tests/command/input-program.sh \ tests/command/insert.sh \ tests/command/lag.sh \ + tests/command/line-ends.sh \ tests/command/list.sh \ tests/command/loop.sh \ tests/command/longvars.sh \ @@@ -45,7 -40,6 +45,7 @@@ tests/command/n_of_cases.sh \ tests/command/npar-binomial.sh \ tests/command/npar-chisquare.sh \ + tests/command/npar-wilcoxon.sh \ tests/command/oneway.sh \ tests/command/oneway-missing.sh \ tests/command/oneway-with-splits.sh \ @@@ -56,7 -50,6 +56,7 @@@ tests/command/rename.sh \ tests/command/regression.sh \ tests/command/regression-qr.sh \ + tests/command/reliability.sh \ tests/command/sample.sh \ tests/command/sort.sh \ tests/command/sysfiles.sh \ @@@ -75,7 -68,6 +75,7 @@@ tests/command/t-test-pairs.sh \ tests/command/trimmed-mean.sh \ tests/command/tabs.sh \ + tests/command/update.sh \ tests/command/use.sh \ tests/command/variable-display.sh \ tests/command/vector.sh \ @@@ -115,8 -107,6 +115,8 @@@ tests/bugs/double-frequency.sh \ tests/bugs/empty-do-repeat.sh \ tests/bugs/get.sh \ + tests/bugs/examine-crash.sh \ + tests/bugs/examine-crash2.sh \ tests/bugs/examine-1sample.sh \ tests/bugs/examine-missing.sh \ tests/bugs/examine-missing2.sh \ @@@ -131,6 -121,7 +131,7 @@@ tests/bugs/multipass.sh \ tests/bugs/overwrite-input-file.sh \ tests/bugs/overwrite-special-file.sh \ + tests/bugs/piechart.sh \ tests/bugs/random.sh \ tests/bugs/signals.sh \ tests/bugs/t-test-with-temp.sh \ @@@ -177,8 -168,6 +178,8 @@@ nodist_TESTS = tests/libpspp/abt-test \ tests/libpspp/bt-test \ tests/libpspp/heap-test \ + tests/libpspp/hmap-test \ + tests/libpspp/hmapx-test \ tests/libpspp/ll-test \ tests/libpspp/llx-test \ tests/libpspp/range-map-test \ @@@ -198,7 -187,6 +199,7 @@@ tests_libpspp_ll_test_SOURCES = src/libpspp/ll.h \ tests/libpspp/ll-test.c tests_libpspp_ll_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_ll_test_CFLAGS = $(AM_CFLAGS) tests_libpspp_llx_test_SOURCES = \ src/libpspp/ll.c \ @@@ -207,7 -195,6 +208,7 @@@ src/libpspp/llx.h \ tests/libpspp/llx-test.c tests_libpspp_llx_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_llx_test_CFLAGS = $(AM_CFLAGS) tests_libpspp_heap_test_SOURCES = \ src/libpspp/heap.c \ @@@ -215,25 -202,9 +216,25 @@@ src/libpspp/pool.c \ src/libpspp/pool.h \ tests/libpspp/heap-test.c -tests_libpspp_heap_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_heap_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_heap_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 +tests_libpspp_hmap_test_SOURCES = \ + src/libpspp/hmap.c \ + src/libpspp/hmap.h \ + tests/libpspp/hmap-test.c +tests_libpspp_hmap_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_hmap_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + +tests_libpspp_hmapx_test_SOURCES = \ + src/libpspp/hmap.c \ + src/libpspp/hmap.h \ + src/libpspp/hmapx.c \ + src/libpspp/hmapx.h \ + tests/libpspp/hmapx-test.c +tests_libpspp_hmapx_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_hmapx_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + tests_libpspp_abt_test_SOURCES = \ src/libpspp/abt.c \ src/libpspp/abt.h \ @@@ -265,12 -236,12 +266,12 @@@ tests_libpspp_range_set_test_SOURCES = src/libpspp/range-set.c \ src/libpspp/range-set.h \ tests/libpspp/range-set-test.c -tests_libpspp_range_set_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_range_set_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_range_set_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 tests_libpspp_str_test_SOURCES = \ tests/libpspp/str-test.c -tests_libpspp_str_test_LDADD = src/libpspp/libpspp.a gl/libgl.la @LIBINTL@ +tests_libpspp_str_test_LDADD = src/libpspp/libpspp.la gl/libgl.la @LIBINTL@ tests_libpspp_tower_test_SOURCES = \ src/libpspp/abt.c \ @@@ -280,7 -251,7 +281,7 @@@ src/libpspp/tower.c \ src/libpspp/tower.h \ tests/libpspp/tower-test.c -tests_libpspp_tower_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_tower_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_tower_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 tests_libpspp_sparse_array_test_SOURCES = \ @@@ -289,7 -260,7 +290,7 @@@ src/libpspp/pool.c \ src/libpspp/pool.h \ tests/libpspp/sparse-array-test.c -tests_libpspp_sparse_array_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_sparse_array_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_sparse_array_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 tests_formats_inexactify_SOURCES = tests/formats/inexactify.c @@@ -299,7 -270,7 +300,7 @@@ tests_dissect_sysfile_SOURCES = src/libpspp/integer-format.c \ src/libpspp/float-format.c \ tests/dissect-sysfile.c -tests_dissect_sysfile_LDADD = gl/libgl.la @LIBINTL@ +tests_dissect_sysfile_LDADD = gl/libgl.la @LIBINTL@ tests_dissect_sysfile_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" EXTRA_DIST += \ @@@ -342,12 -313,11 +343,12 @@@ CLEANFILES += *.save pspp.* foo* -DIST_HOOKS += check-for-export-var-val check-for-export-var-val: @if grep -q 'export .*=' $(dist_TESTS) ; then \ echo 'One or more tests contain non-portable "export VAR=val" syntax' ; \ false ; \ fi +DIST_HOOKS += check-for-export-var-val + EXTRA_DIST += tests/OChangeLog diff --combined tests/command/examine-extremes.sh index 8524bf5e,6e789c7b..67dfed0a --- a/tests/command/examine-extremes.sh +++ b/tests/command/examine-extremes.sh @@@ -102,33 -102,33 +102,33 @@@ activity="compare results perl -pi -e 's/^\s*$//g' $TEMPDIR/pspp.list diff -b $TEMPDIR/pspp.list - << EOF 1.1 EXAMINE. Case Processing Summary - #==#===============================# - # # Cases # - # #----------+---------+----------# - # # Valid | Missing | Total # - # #--+-------+-+-------+--+-------# - # # N|Percent|N|Percent| N|Percent# - #==#==#=======#=#=======#==#=======# - #V1#23| 100%|0| 0%|23| 100%# - #==#==#=======#=#=======#==#=======# + #==#=======================================# + # # Cases # + # #-------------+-----------+-------------# + # # Valid | Missing | Total # + # #-----+-------+---+-------+-----+-------# + # # N |Percent| N |Percent| N |Percent# + #==#=====#=======#===#=======#=====#=======# + #V1#23.00| 100%|.00| 0%|23.00| 100%# + #==#=====#=======#===#=======#=====#=======# 1.2 EXAMINE. Extreme Values -#============#===========#========# -# #Case Number| Value # -#============#===========#========# -#V1 Highest 1# 21| 20.00# -# 2# 20| 19.00# -# 3# 19| 18.00# -# 4# 19| 18.00# -# 5# 18| 17.00# -# 6# 17| 16.00# -# ----------#-----------+--------# -# Lowest 1# 1| 1.00# -# 2# 2| 2.00# -# 3# 4| 3.00# -# 4# 3| 3.00# -# 5# 3| 3.00# -# 6# 5| 4.00# -#============#===========#========# +#============#===========#=====# +# #Case Number|Value# +#============#===========#=====# +#V1 Highest 1# 21|20.00# +# 2# 20|19.00# +# 3# 19|18.00# +# 4# 19|18.00# +# 5# 18|17.00# +# 6# 17|16.00# +# ----------#-----------+-----# +# Lowest 1# 1| 1.00# +# 2# 2| 2.00# +# 3# 3| 3.00# +# 4# 3| 3.00# +# 5# 4| 3.00# +# 6# 5| 4.00# +#============#===========#=====# EOF if [ $? -ne 0 ] ; then fail ; fi diff --combined tests/command/examine.sh index 095ee0fb,6468e814..11534385 --- a/tests/command/examine.sh +++ b/tests/command/examine.sh @@@ -116,147 -116,147 +116,147 @@@ diff -b $TEMPDIR/pspp.list - << EO +--------+------+ Case# QUALITY W BRAND ----- -------- -------- -------- - 1 3.00 1.00 1.00 - 2 2.00 2.00 1.00 - 3 1.00 2.00 1.00 - 4 1.00 1.00 1.00 - 5 4.00 1.00 1.00 - 6 4.00 1.00 1.00 - 7 5.00 1.00 2.00 - 8 2.00 1.00 2.00 - 9 4.00 4.00 2.00 - 10 2.00 1.00 2.00 - 11 3.00 1.00 2.00 - 12 7.00 1.00 3.00 - 13 4.00 2.00 3.00 - 14 5.00 3.00 3.00 - 15 3.00 1.00 3.00 - 16 6.00 1.00 3.00 + 1 3.00 1.00 1.00 + 2 2.00 2.00 1.00 + 3 1.00 2.00 1.00 + 4 1.00 1.00 1.00 + 5 4.00 1.00 1.00 + 6 4.00 1.00 1.00 + 7 5.00 1.00 2.00 + 8 2.00 1.00 2.00 + 9 4.00 4.00 2.00 + 10 2.00 1.00 2.00 + 11 3.00 1.00 2.00 + 12 7.00 1.00 3.00 + 13 4.00 2.00 3.00 + 14 5.00 3.00 3.00 + 15 3.00 1.00 3.00 + 16 6.00 1.00 3.00 2.1 EXAMINE. Case Processing Summary - #===============#===============================# - # # Cases # - # #----------+---------+----------# - # # Valid | Missing | Total # - # #--+-------+-+-------+--+-------# - # # N|Percent|N|Percent| N|Percent# - #===============#==#=======#=#=======#==#=======# - #Breaking Strain#24| 100%|0| 0%|24| 100%# - #===============#==#=======#=#=======#==#=======# + #===============#=======================================# + # # Cases # + # #-------------+-----------+-------------# + # # Valid | Missing | Total # + # #-----+-------+---+-------+-----+-------# + # # N |Percent| N |Percent| N |Percent# + #===============#=====#=======#===#=======#=====#=======# + #Breaking Strain#24.00| 100%|.00| 0%|24.00| 100%# + #===============#=====#=======#===#=======#=====#=======# 2.2 EXAMINE. Extreme Values -#=========================#===========#========# -# #Case Number| Value # -#=========================#===========#========# -#Breaking Strain Highest 1# 12| 7.00# -# 2# 16| 6.00# -# 3# 14| 5.00# -# ----------#-----------+--------# -# Lowest 1# 4| 1.00# -# 2# 3| 1.00# -# 3# 3| 1.00# -#=========================#===========#========# +#=========================#===========#=====# +# #Case Number|Value# +#=========================#===========#=====# +#Breaking Strain Highest 1# 12| 7.00# +# 2# 16| 6.00# +# 3# 7| 5.00# +# ----------#-----------+-----# +# Lowest 1# 3| 1.00# +# 2# 3| 1.00# +# 3# 4| 1.00# +#=========================#===========#=====# 2.3 EXAMINE. Descriptives #============================================================#=========#==========# # #Statistic|Std. Error# #============================================================#=========#==========# - #Breaking Strain Mean # 3.54 | .324 # - # 95% Confidence Interval for Mean Lower Bound# 2.871 | # - # Upper Bound# 4.212 | # + #Breaking Strain Mean # 3.54 | .32 # + # 95% Confidence Interval for Mean Lower Bound# 2.87 | # + # Upper Bound# 4.21 | # # 5% Trimmed Mean # 3.50 | # # Median # 4.00 | # - # Variance # 2.520 | # - # Std. Deviation # 1.587 | # - # Minimum # 1.000 | # - # Maximum # 7.000 | # - # Range # 6.000 | # + # Variance # 2.52 | # + # Std. Deviation # 1.59 | # + # Minimum # 1.00 | # + # Maximum # 7.00 | # + # Range # 6.00 | # # Interquartile Range # 2.75 | # - # Skewness # .059 | .472 # - # Kurtosis # -.358 | .918 # + # Skewness # .06 | .47 # + # Kurtosis # -.36 | .92 # #============================================================#=========#==========# 2.4 EXAMINE. Case Processing Summary - #============================#=============================# - # # Cases # - # #---------+---------+---------# - # # Valid | Missing | Total # - # #-+-------+-+-------+-+-------# - # Manufacturer#N|Percent|N|Percent|N|Percent# - #============================#=#=======#=#=======#=#=======# - #Breaking Strain Aspeger #8| 100%|0| 0%|8| 100%# - # Bloggs #8| 100%|0| 0%|8| 100%# - # Charlies #8| 100%|0| 0%|8| 100%# - #============================#=#=======#=#=======#=#=======# + #============================#=====================================# + # # Cases # + # #------------+-----------+------------# + # # Valid | Missing | Total # + # #----+-------+---+-------+----+-------# + # Manufacturer# N |Percent| N |Percent| N |Percent# + #============================#====#=======#===#=======#====#=======# + #Breaking Strain Aspeger #8.00| 100%|.00| 0%|8.00| 100%# + # Bloggs #8.00| 100%|.00| 0%|8.00| 100%# + # Charlies #8.00| 100%|.00| 0%|8.00| 100%# + #============================#====#=======#===#=======#====#=======# 2.5 EXAMINE. Extreme Values -#======================================#===========#========# -# Manufacturer #Case Number| Value # -#======================================#===========#========# -#Breaking Strain Aspeger Highest 1# 6| 4.00# -# 2# 5| 4.00# -# 3# 1| 3.00# -# ----------#-----------+--------# -# Lowest 1# 4| 1.00# -# 2# 3| 1.00# -# 3# 3| 1.00# -# -----------------------#-----------+--------# -# Bloggs Highest 1# 7| 5.00# -# 2# 9| 4.00# -# 3# 9| 4.00# -# ----------#-----------+--------# -# Lowest 1# 10| 2.00# -# 2# 8| 2.00# -# 3# 11| 3.00# -# -----------------------#-----------+--------# -# Charlies Highest 1# 12| 7.00# -# 2# 16| 6.00# -# 3# 14| 5.00# -# ----------#-----------+--------# -# Lowest 1# 15| 3.00# -# 2# 13| 4.00# -# 3# 13| 4.00# -#======================================#===========#========# +#======================================#===========#=====# +# Manufacturer #Case Number|Value# +#======================================#===========#=====# +#Breaking Strain Aspeger Highest 1# 5| 4.00# +# 2# 6| 4.00# +# 3# 1| 3.00# +# ----------#-----------+-----# +# Lowest 1# 3| 1.00# +# 2# 3| 1.00# +# 3# 4| 1.00# +# -----------------------#-----------+-----# +# Bloggs Highest 1# 7| 5.00# +# 2# 9| 4.00# +# 3# 9| 4.00# +# ----------#-----------+-----# +# Lowest 1# 8| 2.00# +# 2# 10| 2.00# +# 3# 11| 3.00# +# -----------------------#-----------+-----# +# Charlies Highest 1# 12| 7.00# +# 2# 16| 6.00# +# 3# 14| 5.00# +# ----------#-----------+-----# +# Lowest 1# 15| 3.00# +# 2# 13| 4.00# +# 3# 13| 4.00# +#======================================#===========#=====# 2.6 EXAMINE. Descriptives #=========================================================================#=========#==========# - # Manufacturer #Statistic|Std. Error# + # Manufacturer #Statistic|Std. Error# #=========================================================================#=========#==========# - #Breaking Strain Aspeger Mean # 2.25 | .453 # - # 95% Confidence Interval for Mean Lower Bound# 1.178 | # - # Upper Bound# 3.322 | # + #Breaking Strain Aspeger Mean # 2.25 | .45 # + # 95% Confidence Interval for Mean Lower Bound# 1.18 | # + # Upper Bound# 3.32 | # # 5% Trimmed Mean # 2.22 | # # Median # 2.00 | # - # Variance # 1.643 | # - # Std. Deviation # 1.282 | # - # Minimum # 1.000 | # - # Maximum # 4.000 | # - # Range # 3.000 | # + # Variance # 1.64 | # + # Std. Deviation # 1.28 | # + # Minimum # 1.00 | # + # Maximum # 4.00 | # + # Range # 3.00 | # # Interquartile Range # 2.75 | # - # Skewness # .475 | .752 # - # Kurtosis # -1.546 | 1.481 # + # Skewness # .47 | .75 # + # Kurtosis # -1.55 | 1.48 # # ----------------------------------------------------------#---------+----------# - # Bloggs Mean # 3.50 | .378 # - # 95% Confidence Interval for Mean Lower Bound# 2.606 | # - # Upper Bound# 4.394 | # + # Bloggs Mean # 3.50 | .38 # + # 95% Confidence Interval for Mean Lower Bound# 2.61 | # + # Upper Bound# 4.39 | # # 5% Trimmed Mean # 3.50 | # # Median # 4.00 | # - # Variance # 1.143 | # - # Std. Deviation # 1.069 | # - # Minimum # 2.000 | # - # Maximum # 5.000 | # - # Range # 3.000 | # + # Variance # 1.14 | # + # Std. Deviation # 1.07 | # + # Minimum # 2.00 | # + # Maximum # 5.00 | # + # Range # 3.00 | # # Interquartile Range # 1.75 | # - # Skewness # -.468 | .752 # - # Kurtosis # -.831 | 1.481 # + # Skewness # -.47 | .75 # + # Kurtosis # -.83 | 1.48 # # ----------------------------------------------------------#---------+----------# - # Charlies Mean # 4.88 | .441 # - # 95% Confidence Interval for Mean Lower Bound# 3.833 | # - # Upper Bound# 5.917 | # + # Charlies Mean # 4.88 | .44 # + # 95% Confidence Interval for Mean Lower Bound# 3.83 | # + # Upper Bound# 5.92 | # # 5% Trimmed Mean # 4.86 | # # Median # 5.00 | # - # Variance # 1.554 | # - # Std. Deviation # 1.246 | # - # Minimum # 3.000 | # - # Maximum # 7.000 | # - # Range # 4.000 | # + # Variance # 1.55 | # + # Std. Deviation # 1.25 | # + # Minimum # 3.00 | # + # Maximum # 7.00 | # + # Range # 4.00 | # # Interquartile Range # 1.75 | # - # Skewness # .304 | .752 # - # Kurtosis # .146 | 1.481 # + # Skewness # .30 | .75 # + # Kurtosis # .15 | 1.48 # #=========================================================================#=========#==========# EOF if [ $? -ne 0 ] ; then fail ; fi diff --combined tests/command/npar-binomial.sh index 29ad2fab,439398e2..0db048dd --- a/tests/command/npar-binomial.sh +++ b/tests/command/npar-binomial.sh @@@ -74,6 -74,8 +74,8 @@@ END DATA WEIGHT BY w. + SET FORMAT F8.3. + NPAR TESTS /BINOMIAL(0.3) = x . @@@ -83,7 -85,9 +85,9 @@@ ECHO 'P < 0.5; N1/N2 > 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + + + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 7 2 6 @@@ -101,7 -105,7 +105,7 @@@ ECHO 'P < 0.5; N1/N2 = 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 8 2 8 @@@ -117,7 -121,7 +121,7 @@@ ECHO 'P > 0.5; N1/N2 < 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 11 2 12 @@@ -134,7 -138,7 +138,7 @@@ ECHO 'P > 0.5; N1/N2 > 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 11 2 9 @@@ -149,7 -153,7 +153,7 @@@ ECHO 'P > 0.5; N1/N2 == 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 11 2 11 @@@ -165,7 -169,7 +169,7 @@@ ECHO 'P == 0.5; N1/N2 < 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 8 2 15 @@@ -182,7 -186,7 +186,7 @@@ ECHO 'P == 0.5; N1/N2 > 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 12 2 6 @@@ -197,7 -201,7 +201,7 @@@ ECHO 'P == 0.5; N1/N2 == 1' NEW FILE. - DATA LIST LIST NOTABLE /x * w *. + DATA LIST LIST NOTABLE /x (F8.0) w (F8.0). BEGIN DATA. 1 10 2 10 @@@ -208,36 -212,6 +212,36 @@@ WEIGHT BY w NPAR TESTS /BINOMIAL(0.5) = x . + +ECHO 'P == 0.5; N1/N2 == 1 Cutpoint' . + +DATA LIST LIST NOTABLE /x * w *. +BEGIN DATA. +9 3 +10 7 +11 16 +END DATA. + +WEIGHT BY w. + +NPAR TESTS + /BINOMIAL(0.5) = x (10) + . + +ECHO 'P == 0.5; N1/N2 == 1 Named values' . + +DATA LIST LIST NOTABLE /x * w *. +BEGIN DATA. +10 10 +15 45 +20 13 +END DATA. + +WEIGHT BY w. + +NPAR TESTS + /BINOMIAL(0.5) = x (10, 20) + . EOF if [ $? -ne 0 ] ; then no_result ; fi @@@ -251,103 -225,85 +255,103 @@@ perl -pi -e 's/^\s*$//g' $TEMPDIR/pspp. diff -b $TEMPDIR/pspp.list - << EOF P < 0.5; N1/N2 < 1 1.1 NPAR TESTS. Binomial Test - +-+------#--------+--+--------------+----------+---------------------+ - | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| - +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00| 6| .286| .300| .551| - | |Group2# 2.00|15| .714| | | - | |Total # |21| 1.00| | | - +-+------#--------+--+--------------+----------+---------------------+ + +-+------#--------+-----+--------------+----------+---------------------+ + | | #Category| N |Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| + +-+------#--------+-----+--------------+----------+---------------------+ + |x|Group1# 1.00| 6.00| .286| .300| .551| + | |Group2# 2.00|15.00| .714| | | -| |Total # |21.00| 1.00| | | ++| |Total # |21.00| 1.000| | | + +-+------#--------+-----+--------------+----------+---------------------+ P < 0.5; N1/N2 > 1 2.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00| 7| .538| .400| .229| - | |Group2# 2.00| 6| .462| | | - | |Total # |13| 1.00| | | + |x|Group1# 1| 7| .538| .400| .229| + | |Group2# 2| 6| .462| | | -| |Total # |13| 1| | | ++| |Total # |13| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P < 0.5; N1/N2 = 1 3.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00| 8| .500| .400| .284| - | |Group2# 2.00| 8| .500| | | - | |Total # |16| 1.00| | | + |x|Group1# 1| 8| .500| .400| .284| + | |Group2# 2| 8| .500| | | -| |Total # |16| 1| | | ++| |Total # |16| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P > 0.5; N1/N2 < 1 4.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00|11| .478| .600| .164| - | |Group2# 2.00|12| .522| | | - | |Total # |23| 1.00| | | + |x|Group1# 1|11| .478| .600| .164| + | |Group2# 2|12| .522| | | -| |Total # |23| 1| | | ++| |Total # |23| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P > 0.5; N1/N2 > 1 5.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00|11| .550| .600| .404| - | |Group2# 2.00| 9| .450| | | - | |Total # |20| 1.00| | | + |x|Group1# 1|11| .550| .600| .404| + | |Group2# 2| 9| .450| | | -| |Total # |20| 1| | | ++| |Total # |20| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P > 0.5; N1/N2 == 1 6.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (1-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00|11| .500| .600| .228| - | |Group2# 2.00|11| .500| | | - | |Total # |22| 1.00| | | + |x|Group1# 1|11| .500| .600| .228| + | |Group2# 2|11| .500| | | -| |Total # |22| 1| | | ++| |Total # |22| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P == 0.5; N1/N2 < 1 7.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00| 8| .348| .500| .210| - | |Group2# 2.00|15| .652| | | - | |Total # |23| 1.00| | | + |x|Group1# 1| 8| .348| .500| .210| + | |Group2# 2|15| .652| | | -| |Total # |23| 1| | | ++| |Total # |23| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P == 0.5; N1/N2 > 1 8.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00|12| .667| .500| .238| - | |Group2# 2.00| 6| .333| | | - | |Total # |18| 1.00| | | + |x|Group1# 1|12| .667| .500| .238| + | |Group2# 2| 6| .333| | | -| |Total # |18| 1| | | ++| |Total # |18| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ P == 0.5; N1/N2 == 1 9.1 NPAR TESTS. Binomial Test +-+------#--------+--+--------------+----------+---------------------+ | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 1.00|10| .500| .500| 1.000| - | |Group2# 2.00|10| .500| | | - | |Total # |20| 1.00| | | + |x|Group1# 1|10| .500| .500| 1.000| + | |Group2# 2|10| .500| | | -| |Total # |20| 1| | | ++| |Total # |20| 1.000| | | +-+------#--------+--+--------------+----------+---------------------+ +P == 0.5; N1/N2 == 1 Cutpoint +10.1 NPAR TESTS. Binomial Test - +-+------#--------+--+--------------+----------+---------------------+ - | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| - +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# <= 10|10| .385| .500| .327| - | |Group2# |16| .615| | | - | |Total # |26| 1.00| | | - +-+------#--------+--+--------------+----------+---------------------+ +++-+------#--------+------+--------------+----------+---------------------+ ++| | #Category| N |Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| +++-+------#--------+------+--------------+----------+---------------------+ ++|x|Group1# <= 10|10.000| .385| .500| .327| ++| |Group2# |16.000| .615| | | ++| |Total # |26.000| 1.000| | | +++-+------#--------+------+--------------+----------+---------------------+ +P == 0.5; N1/N2 == 1 Named values +11.1 NPAR TESTS. Binomial Test - +-+------#--------+--+--------------+----------+---------------------+ - | | #Category| N|Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| - +-+------#--------+--+--------------+----------+---------------------+ - |x|Group1# 10.00|10| .435| .500| .678| - | |Group2# 20.00|13| .565| | | - | |Total # |23| 1.00| | | - +-+------#--------+--+--------------+----------+---------------------+ +++-+------#--------+------+--------------+----------+---------------------+ ++| | #Category| N |Observed Prop.|Test Prop.|Exact Sig. (2-tailed)| +++-+------#--------+------+--------------+----------+---------------------+ ++|x|Group1# 10.000|10.000| .435| .500| .678| ++| |Group2# 20.000|13.000| .565| | | ++| |Total # |23.000| 1.000| | | +++-+------#--------+------+--------------+----------+---------------------+ EOF if [ $? -ne 0 ] ; then fail ; fi diff --combined tests/command/npar-wilcoxon.sh index c4a5d825,00000000..ae0d39fc mode 100755,000000..100755 --- a/tests/command/npar-wilcoxon.sh +++ b/tests/command/npar-wilcoxon.sh @@@ -1,174 -1,0 +1,173 @@@ +#!/bin/sh + +# This program tests the wilcoxon subcommand of npar tests + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +# ensure that top_srcdir and top_builddir are absolute +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +top_srcdir=`cd $top_srcdir; pwd` +top_builddir=`cd $top_builddir; pwd` + +PSPP=$top_builddir/src/ui/terminal/pspp + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +LANG=C +export LANG + + +cleanup() +{ + if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then + echo "NOT cleaning $TEMPDIR" + return ; + fi + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program 1" +cat > $TESTFILE << EOF - data list notable list /foo * bar * w *. ++data list notable list /foo * bar * w (f8.0). +begin data. +1.00 1.00 1 +1.00 2.00 1 +2.00 1.00 1 +1.00 4.00 1 +2.00 5.00 1 +1.00 19.00 1 +2.00 7.00 1 +4.00 5.00 1 +1.00 12.00 1 +2.00 13.00 1 +2.00 2.00 1 +12.00 .00 2 +12.00 1.00 1 +13.00 1.00 1 +end data + +variable labels foo "first" bar "second". + +weight by w. + +npar test + /wilcoxon=foo with bar (paired) + /missing analysis + /method=exact. + +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program 1" +$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE +if [ $? -ne 0 ] ; then no_result ; fi + - +activity="generate results" +cat > $TEMPDIR/results.txt < $TESTFILE << EOF +data list notable list /foo * bar * dummy *. +begin data. +1.00 1.00 1 +1.00 2.00 1 +2.00 1.00 1 +1.00 4.00 . +2.00 5.00 . +1.00 19.00 . +2.00 7.00 1 +4.00 5.00 1 +1.00 12.00 1 +2.00 13.00 1 +2.00 2.00 1 +12.00 .00 1 +12.00 .00 1 +34.2 . 1 +12.00 1.00 1 +13.00 1.00 1 +end data + +variable labels foo "first" bar "second". + +npar test + /wilcoxon=foo with bar (paired) + /missing analysis + /method=exact. + +EOF +if [ $? -ne 0 ] ; then no_result ; fi + + +activity="run program 2" +$SUPERVISOR $PSPP --testing-mode -o raw-ascii $TESTFILE +if [ $? -ne 0 ] ; then no_result ; fi + - +activity="compare output 2" +diff pspp.list $TEMPDIR/results.txt +if [ $? -ne 0 ] ; then fail ; fi + + + +pass; diff --combined tests/command/reliability.sh index 0bfa7336,00000000..ed4b4b85 mode 100755,000000..100755 --- a/tests/command/reliability.sh +++ b/tests/command/reliability.sh @@@ -1,345 -1,0 +1,345 @@@ +#!/bin/sh + +# This program tests the reliability command. + +TEMPDIR=/tmp/pspp-tst-$$ +TESTFILE=$TEMPDIR/`basename $0`.sps + +# ensure that top_srcdir and top_builddir are absolute +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +top_srcdir=`cd $top_srcdir; pwd` +top_builddir=`cd $top_builddir; pwd` + +PSPP=$top_builddir/src/ui/terminal/pspp + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +LANG=C +export LANG + + +cleanup() +{ + if [ x"$PSPP_TEST_NO_CLEANUP" != x ] ; then + echo "NOT cleaning $TEMPDIR" + return ; + fi + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + + +activity="create program" +cat > $TESTFILE <