X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fcrosstabs.q;h=95f3dc3346862a783ccc8588916fefcec5eb17e0;hb=d87a2963d1fe93510e92aa3fdf64a7a412ec60e2;hp=c3a17aafac3c0ca014852e0d25418b15f72b72b2;hpb=97d6c6f6b1922621ca013668eba9a9a9f71d60fe;p=pspp-builds.git diff --git a/src/crosstabs.q b/src/crosstabs.q index c3a17aaf..95f3dc33 100644 --- a/src/crosstabs.q +++ b/src/crosstabs.q @@ -14,8 +14,8 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. */ /* FIXME: @@ -34,8 +34,11 @@ #include #include #include +#include #include "algorithm.h" #include "alloc.h" +#include "case.h" +#include "dictionary.h" #include "hash.h" #include "pool.h" #include "command.h" @@ -43,18 +46,20 @@ #include "error.h" #include "magic.h" #include "misc.h" -#include "stats.h" #include "output.h" +#include "str.h" #include "tab.h" #include "value-labels.h" #include "var.h" #include "vfm.h" +/* (headers) */ + #include "debug-print.h" /* (specification) crosstabs (crs_): - *tables=custom; + *^tables=custom; +variables=custom; +missing=miss:!table/include/report; +write[wr_]=none,cells,all; @@ -104,6 +109,22 @@ struct crosstab larger indices first. */ }; +/* Integer mode variable info. */ +struct var_range + { + int min; /* Minimum value. */ + int max; /* Maximum value + 1. */ + int count; /* max - min. */ + }; + +static inline struct var_range * +get_var_range (struct variable *v) +{ + assert (v != NULL); + assert (v->aux != NULL); + return v->aux; +} + /* Indexes into crosstab.v. */ enum { @@ -156,11 +177,6 @@ static void submit (struct tab_table *); static void format_short (char *s, const struct fmt_spec *fp, const union value *v); -#if DEBUGGING -static void debug_print (void); -static void print_table_entries (struct table_entry **tab); -#endif - /* Parse and execute CROSSTABS, then clean up. */ int cmd_crosstabs (void) @@ -190,11 +206,6 @@ internal_cmd_crosstabs (void) if (!parse_crosstabs (&cmd)) return CMD_FAILURE; -#if DEBUGGING - /* Needs variables. */ - debug_print (); -#endif - mode = variables ? INTEGER : GENERAL; /* CELLS. */ @@ -435,11 +446,13 @@ crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) } lex_get (); - for (i = orig_nv; i < variables_cnt; i++) - { - variables[i]->p.crs.min = min; - variables[i]->p.crs.max = max + 1.; - variables[i]->p.crs.count = max - min + 1; + for (i = orig_nv; i < variables_cnt; i++) + { + struct var_range *vr = xmalloc (sizeof *vr); + vr->min = min; + vr->max = max + 1.; + vr->count = max - min + 1; + var_attach_aux (variables[i], vr, var_dtor_free); } if (token == '/') @@ -453,58 +466,6 @@ crs_custom_variables (struct cmd_crosstabs *cmd UNUSED) variables = NULL; return 0; } - -#if DEBUGGING -static void -debug_print (void) -{ - printf ("CROSSTABS\n"); - - if (variables != NULL) - { - int i; - - printf ("\t/VARIABLES="); - for (i = 0; i < variables_cnt; i++) - { - struct variable *v = variables[i]; - - printf ("%s ", v->name); - if (i < variables_cnt - 1) - { - struct variable *nv = variables[i + 1]; - - if (v->p.crs.min == nv->p.crs.min - && v->p.crs.max == nv->p.crs.max) - continue; - } - printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1); - } - printf ("\n"); - } - - { - int i; - - printf ("\t/TABLES="); - for (i = 0; i < nxtab; i++) - { - struct crosstab *x = xtab[i]; - int j; - - if (i) - printf("\t\t"); - for (j = 0; j < x->nvar; j++) - { - if (j) - printf (" BY "); - printf ("%s", x->v[j]->name); - } - printf ("\n"); - } - } -} -#endif /* DEBUGGING */ /* Data file processing. */ @@ -536,14 +497,14 @@ precalc (void *aux UNUSED) x->ofs = n_sorted_tab; - for (j = 2; j < x->nvar; j++) - count *= x->vars[j - 2]->p.crs.count; - + for (j = 2; j < x->nvar; j++) + count *= get_var_range (x->vars[j - 2])->count; + sorted_tab = xrealloc (sorted_tab, sizeof *sorted_tab * (n_sorted_tab + count)); v = local_alloc (sizeof *v * x->nvar); - for (j = 2; j < x->nvar; j++) - v[j] = x->vars[j]->p.crs.min; + for (j = 2; j < x->nvar; j++) + v[j] = get_var_range (x->vars[j])->min; for (j = 0; j < count; j++) { struct table_entry *te; @@ -554,8 +515,9 @@ precalc (void *aux UNUSED) te->table = i; { - const int mat_size = (x->vars[0]->p.crs.count - * x->vars[1]->p.crs.count); + int row_cnt = get_var_range (x->vars[0])->count; + int col_cnt = get_var_range (x->vars[1])->count; + const int mat_size = row_cnt * col_cnt; int m; te->u.data = xmalloc (sizeof *te->u.data * mat_size); @@ -565,11 +527,14 @@ precalc (void *aux UNUSED) for (k = 2; k < x->nvar; k++) te->values[k].f = v[k]; - for (k = 2; k < x->nvar; k++) - if (++v[k] >= x->vars[k]->p.crs.max) - v[k] = x->vars[k]->p.crs.min; - else - break; + for (k = 2; k < x->nvar; k++) + { + struct var_range *vr = get_var_range (x->vars[k]); + if (++v[k] >= vr->max) + v[k] = vr->min; + else + break; + } } local_free (v); } @@ -584,8 +549,10 @@ precalc (void *aux UNUSED) static int calc_general (struct ccase *c, void *aux UNUSED) { + int bad_warn = 1; + /* Case weight. */ - double weight = dict_get_case_weight (default_dict, c); + double weight = dict_get_case_weight (default_dict, c, &bad_warn); /* Flattened current table index. */ int t; @@ -606,9 +573,9 @@ calc_general (struct ccase *c, void *aux UNUSED) for (j = 0; j < x->nvar; j++) { if ((cmd.miss == CRS_TABLE - && is_missing (&c->data[x->vars[j]->fv], x->vars[j])) + && is_missing (case_data (c, x->vars[j]->fv), x->vars[j])) || (cmd.miss == CRS_INCLUDE - && is_system_missing (&c->data[x->vars[j]->fv], + && is_system_missing (case_data (c, x->vars[j]->fv), x->vars[j]))) { x->missing += weight; @@ -616,10 +583,10 @@ calc_general (struct ccase *c, void *aux UNUSED) } if (x->vars[j]->type == NUMERIC) - te->values[j].f = c->data[x->vars[j]->fv].f; + te->values[j].f = case_num (c, x->vars[j]->fv); else { - memcpy (te->values[j].s, c->data[x->vars[j]->fv].s, + memcpy (te->values[j].s, case_str (c, x->vars[j]->fv), x->vars[j]->width); /* Necessary in order to simplify comparisons. */ @@ -656,8 +623,10 @@ calc_general (struct ccase *c, void *aux UNUSED) static int calc_integer (struct ccase *c, void *aux UNUSED) { + int bad_warn = 1; + /* Case weight. */ - double weight = dict_get_case_weight (default_dict, c); + double weight = dict_get_case_weight (default_dict, c, &bad_warn); /* Flattened current table index. */ int t; @@ -672,10 +641,11 @@ calc_integer (struct ccase *c, void *aux UNUSED) for (i = 0; i < x->nvar; i++) { struct variable *const v = x->vars[i]; - double value = c->data[v->fv].f; + struct var_range *vr = get_var_range (v); + double value = case_num (c, v->fv); /* Note that the first test also rules out SYSMIS. */ - if ((value < v->p.crs.min || value >= v->p.crs.max) + if ((value < vr->min || value >= vr->max) || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v))) { x->missing += weight; @@ -684,15 +654,19 @@ calc_integer (struct ccase *c, void *aux UNUSED) if (i > 1) { - ofs += fact * ((int) value - v->p.crs.min); - fact *= v->p.crs.count; + ofs += fact * ((int) value - vr->min); + fact *= vr->count; } } { - const int row = c->data[x->vars[ROW_VAR]->fv].f - x->vars[ROW_VAR]->p.crs.min; - const int col = c->data[x->vars[COL_VAR]->fv].f - x->vars[COL_VAR]->p.crs.min; - const int col_dim = x->vars[COL_VAR]->p.crs.count; + struct variable *row_var = x->vars[ROW_VAR]; + const int row = case_num (c, row_var->fv) - get_var_range (row_var)->min; + + struct variable *col_var = x->vars[COL_VAR]; + const int col = case_num (c, col_var->fv) - get_var_range (col_var)->min; + + const int col_dim = get_var_range (col_var)->count; sorted_tab[ofs]->u.data[col + row * col_dim] += weight; } @@ -703,36 +677,6 @@ calc_integer (struct ccase *c, void *aux UNUSED) return 1; } -#if DEBUGGING -/* Print out all table entries in NULL-terminated TAB for use by a - debugger (a person, not a program). */ -static void -print_table_entries (struct table_entry **tab) -{ - printf ("raw crosstabulation data:\n"); - for (; *tab; tab++) - { - const struct crosstab *x = xtab[(*tab)->table]; - int i; - - printf ("(%g) table:%d ", (*tab)->u.freq, (*tab)->table); - for (i = 0; i < x->nvar; i++) - { - if (i) - printf (", "); - printf ("%s:", x->v[i]->name); - - if (x->v[i]->type == NUMERIC) - printf ("%g", (*tab)->v[i].f); - else - printf ("%.*s", x->v[i]->width, (*tab)->v[i].s); - } - printf ("\n"); - } - fflush (stdout); -} -#endif - /* Compare the table_entry's at A and B and return a strcmp()-type result. */ static int @@ -808,9 +752,6 @@ postcalc (void *aux UNUSED) { n_sorted_tab = hsh_count (gen_tab); sorted_tab = (struct table_entry **) hsh_sort (gen_tab); -#if DEBUGGING - print_table_entries (sorted_tab); -#endif } make_summary_table (); @@ -894,8 +835,8 @@ make_summary_table (void) else { const struct crosstab *const x = xtab[(*pb)->table]; - const int n_cols = x->vars[COL_VAR]->p.crs.count; - const int n_rows = x->vars[ROW_VAR]->p.crs.count; + const int n_cols = get_var_range (x->vars[COL_VAR])->count; + const int n_rows = get_var_range (x->vars[ROW_VAR])->count; const int count = n_cols * n_rows; for (valid = 0.; pb < pe; pb++) @@ -1394,35 +1335,6 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, W = cum; } -#if DEBUGGING - /* Print the matrix. */ - { - int i, r, c; - - printf ("%s by %s for", x->v[0]->name, x->v[1]->name); - for (i = 2; i < nvar; i++) - printf (" %s=%g", x->v[i]->name, tb[0]->v[i].f); - printf ("\n"); - printf (" "); - for (c = 0; c < n_cols; c++) - printf ("%4g", cols[c].f); - printf ("\n"); - for (r = 0; r < n_rows; r++) - { - printf ("%4g:", rows[r].f); - for (c = 0; c < n_cols; c++) - printf ("%4g", mat[c + r * n_cols]); - printf ("%4g", row_tot[r]); - printf ("\n"); - } - printf (" "); - for (c = 0; c < n_cols; c++) - printf ("%4g", col_tot[c]); - printf ("%4g", W); - printf ("\n\n"); - } -#endif - /* Find the first variable that differs from the last subtable, then display the values of the dimensioning variables for each table that needs it. */ @@ -1688,7 +1600,7 @@ compare_value (const void *a_, const void *b_, void *width_) /* Given an array of ENTRY_CNT table_entry structures starting at ENTRIES, creates a sorted list of the values that the variable - with index VAR_INDEX takes on. The values are returned as a + with index VAR_IDX takes on. The values are returned as a malloc()'darray stored in *VALUES, with the number of values stored in *VALUE_CNT. */ @@ -1696,9 +1608,11 @@ static void enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx, union value **values, int *value_cnt) { + struct variable *v = xtab[(*entries)->table]->vars[var_idx]; + if (mode == GENERAL) { - int width = xtab[(*entries)->table]->vars[var_idx]->width; + int width = v->width; int i; *values = xmalloc (sizeof **values * entry_cnt); @@ -1709,15 +1623,14 @@ enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx, } else { - struct crosstab_proc *crs - = &xtab[(*entries)->table]->vars[var_idx]->p.crs; + struct var_range *vr = get_var_range (v); int i; assert (mode == INTEGER); - *values = xmalloc (sizeof **values * crs->count); - for (i = 0; i < crs->count; i++) - (*values)[i].f = i + crs->min; - *value_cnt = crs->count; + *values = xmalloc (sizeof **values * vr->count); + for (i = 0; i < vr->count; i++) + (*values)[i].f = i + vr->min; + *value_cnt = vr->count; } } @@ -1728,7 +1641,7 @@ static void table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, const union value *v, const struct variable *var) { - struct len_string s; + struct fixed_string s; const char *label = val_labs_find (var->val_labs, *v); if (label) @@ -1775,7 +1688,7 @@ format_cell_entry (struct tab_table *table, int c, int r, double value, { const struct fmt_spec f = {FMT_F, 10, 1}; union value v; - struct len_string s; + struct fixed_string s; s.length = 10; s.string = tab_alloc (table, 16); @@ -2030,7 +1943,7 @@ display_chisq (void) tab_float (chisq, 1, 0, TAB_RIGHT, chisq_v[i], 8, 3); tab_float (chisq, 2, 0, TAB_RIGHT, df[i], 8, 0); tab_float (chisq, 3, 0, TAB_RIGHT, - chisq_sig (chisq_v[i], df[i]), 8, 3); + gsl_cdf_chisq_Q (chisq_v[i], df[i]), 8, 3); } else { @@ -2695,10 +2608,10 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], if (cmd.a_statistics[CRS_ST_D]) { - d_yx_cum += fij * sqr (Dr * (Cij - Dij) - - (P - Q) * (W - row_tot[i])); - d_xy_cum += fij * sqr (Dc * (Dij - Cij) - - (Q - P) * (W - col_tot[j])); + d_yx_cum += fij * pow2 (Dr * (Cij - Dij) + - (P - Q) * (W - row_tot[i])); + d_xy_cum += fij * pow2 (Dc * (Dij - Cij) + - (Q - P) * (W - col_tot[j])); } if (++j == n_cols) @@ -2718,8 +2631,8 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], } btau_var = ((btau_cum - - (W * sqr (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) - / sqr (Dr * Dc)); + - (W * pow2 (W * (P - Q) / sqrt (Dr * Dc) * (Dr + Dc)))) + / pow2 (Dr * Dc)); if (cmd.a_statistics[CRS_ST_BTAU]) { ase[3] = sqrt (btau_var); @@ -2744,17 +2657,17 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], somers_d_ase[0] = 2. * btau_var / (Dr + Dc) * sqrt (Dr * Dc); somers_d_t[0] = (somers_d_v[0] / (4 / (Dc + Dr) - * sqrt (ctau_cum - sqr (P - Q) / W))); + * sqrt (ctau_cum - pow2 (P - Q) / W))); somers_d_v[1] = (P - Q) / Dc; - somers_d_ase[1] = 2. / sqr (Dc) * sqrt (d_xy_cum); + somers_d_ase[1] = 2. / pow2 (Dc) * sqrt (d_xy_cum); somers_d_t[1] = (somers_d_v[1] / (2. / Dc - * sqrt (ctau_cum - sqr (P - Q) / W))); + * sqrt (ctau_cum - pow2 (P - Q) / W))); somers_d_v[2] = (P - Q) / Dr; - somers_d_ase[2] = 2. / sqr (Dr) * sqrt (d_yx_cum); + somers_d_ase[2] = 2. / pow2 (Dr) * sqrt (d_yx_cum); somers_d_t[2] = (somers_d_v[2] / (2. / Dr - * sqrt (ctau_cum - sqr (P - Q) / W))); + * sqrt (ctau_cum - pow2 (P - Q) / W))); } free (cum); @@ -2847,12 +2760,12 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], / (W * (W * W - sum_rici) * (W * W - sum_rici))); #if 0 t[8] = v[8] / sqrt (W * (((sum_fii * (W - sum_fii)) - / sqr (W * W - sum_rici)) + / pow2 (W * W - sum_rici)) + ((2. * (W - sum_fii) * (2. * sum_fii * sum_rici - W * sum_fiiri_ci)) / cube (W * W - sum_rici)) - + (sqr (W - sum_fii) + + (pow2 (W - sum_fii) * (W * sum_fijri_ci2 - 4. * sum_rici * sum_rici) / pow4 (W * W - sum_rici)))); @@ -3015,7 +2928,7 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], { const int deltaj = j == cm_index; accum += (mat[j + i * n_cols] - * sqr ((j == fim_index[i]) + * pow2 ((j == fim_index[i]) - deltaj + v[0] * deltaj)); } @@ -3031,7 +2944,7 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], if (cm_index != fim_index[i]) accum += (mat[i * n_cols + fim_index[i]] + mat[i * n_cols + cm_index]); - t[2] = v[2] / (sqrt (accum - sqr (sum_fim - cm) / W) / (W - cm)); + t[2] = v[2] / (sqrt (accum - pow2 (sum_fim - cm) / W) / (W - cm)); } /* ASE1 for X given Y. */ @@ -3043,7 +2956,7 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], { const int deltaj = i == rm_index; accum += (mat[j + i * n_cols] - * sqr ((i == fmj_index[j]) + * pow2 ((i == fmj_index[j]) - deltaj + v[0] * deltaj)); } @@ -3059,7 +2972,7 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], if (rm_index != fmj_index[j]) accum += (mat[j + n_cols * fmj_index[j]] + mat[j + n_cols * rm_index]); - t[1] = v[1] / (sqrt (accum - sqr (sum_fmj - rm) / W) / (W - rm)); + t[1] = v[1] / (sqrt (accum - pow2 (sum_fmj - rm) / W) / (W - rm)); } /* Symmetric ASE0 and ASE1. */ @@ -3072,12 +2985,12 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], { int temp0 = (fmj_index[j] == i) + (fim_index[i] == j); int temp1 = (i == rm_index) + (j == cm_index); - accum0 += mat[j + i * n_cols] * sqr (temp0 - temp1); + accum0 += mat[j + i * n_cols] * pow2 (temp0 - temp1); accum1 += (mat[j + i * n_cols] - * sqr (temp0 + (v[0] - 1.) * temp1)); + * pow2 (temp0 + (v[0] - 1.) * temp1)); } ase[0] = sqrt (accum1 - 4. * W * v[0] * v[0]) / (2. * W - rm - cm); - t[0] = v[0] / (sqrt (accum0 - sqr ((sum_fim + sum_fmj - cm - rm) / W)) + t[0] = v[0] / (sqrt (accum0 - pow2 ((sum_fim + sum_fmj - cm - rm) / W)) / (2. * W - rm - cm)); } @@ -3093,7 +3006,7 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], for (sum_fij2_ri = sum_fij2_ci = 0., i = 0; i < n_rows; i++) for (j = 0; j < n_cols; j++) { - double temp = sqr (mat[j + i * n_cols]); + double temp = pow2 (mat[j + i * n_cols]); sum_fij2_ri += temp / row_tot[i]; sum_fij2_ci += temp / col_tot[j]; } @@ -3131,7 +3044,7 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], if (entry <= 0.) continue; - P += entry * sqr (log (col_tot[j] * row_tot[i] / (W * entry))); + P += entry * pow2 (log (col_tot[j] * row_tot[i] / (W * entry))); UXY -= entry / W * log (entry / W); } @@ -3143,27 +3056,27 @@ calc_directional (double v[N_DIRECTIONAL], double ase[N_DIRECTIONAL], if (entry <= 0.) continue; - ase1_yx += entry * sqr (UY * log (entry / row_tot[i]) + ase1_yx += entry * pow2 (UY * log (entry / row_tot[i]) + (UX - UXY) * log (col_tot[j] / W)); - ase1_xy += entry * sqr (UX * log (entry / col_tot[j]) + ase1_xy += entry * pow2 (UX * log (entry / col_tot[j]) + (UY - UXY) * log (row_tot[i] / W)); - ase1_sym += entry * sqr ((UXY + ase1_sym += entry * pow2 ((UXY * log (row_tot[i] * col_tot[j] / (W * W))) - (UX + UY) * log (entry / W)); } v[5] = 2. * ((UX + UY - UXY) / (UX + UY)); - ase[5] = (2. / (W * sqr (UX + UY))) * sqrt (ase1_sym); + ase[5] = (2. / (W * pow2 (UX + UY))) * sqrt (ase1_sym); t[5] = v[5] / ((2. / (W * (UX + UY))) - * sqrt (P - sqr (UX + UY - UXY) / W)); + * sqrt (P - pow2 (UX + UY - UXY) / W)); v[6] = (UX + UY - UXY) / UX; ase[6] = sqrt (ase1_xy) / (W * UX * UX); - t[6] = v[6] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UX)); + t[6] = v[6] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UX)); v[7] = (UX + UY - UXY) / UY; ase[7] = sqrt (ase1_yx) / (W * UY * UY); - t[7] = v[7] / (sqrt (P - W * sqr (UX + UY - UXY)) / (W * UY)); + t[7] = v[7] / (sqrt (P - W * pow2 (UX + UY - UXY)) / (W * UY)); } /* Somers' D. */