X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fcrosstabs.q;h=bf89d377cab49efdd3607d9eb9a2738b26b8130b;hb=722d896a394329ad9da31575439e1a082161fa6b;hp=7852ed84996a610a987e8c9d5f6f5dfcb97fc327;hpb=b18e1b9c95a478d434e9fcef9d8579d0b96b9a8d;p=pspp diff --git a/src/crosstabs.q b/src/crosstabs.q index 7852ed8499..bf89d377ca 100644 --- a/src/crosstabs.q +++ b/src/crosstabs.q @@ -51,8 +51,8 @@ char *alloca (); #include #include #include +#include "algorithm.h" #include "alloc.h" -#include "avl.h" #include "hash.h" #include "pool.h" #include "dcdflib/cdflib.h" @@ -64,11 +64,10 @@ char *alloca (); #include "stats.h" #include "output.h" #include "tab.h" +#include "value-labels.h" #include "var.h" #include "vfm.h" -#undef DEBUGGING -/*#define DEBUGGING 1*/ #include "debug-print.h" /* (specification) @@ -135,8 +134,9 @@ static struct hsh_table *gen_tab; /* Hash table. */ static int n_sorted_tab; /* Number of entries in sorted_tab. */ static struct table_entry **sorted_tab; /* Sorted table. */ -/* VARIABLES dictionary. */ -static struct dictionary *var_dict; +/* Variables specifies on VARIABLES. */ +static struct variable **variables; +static size_t variables_cnt; /* TABLES. */ static struct crosstab **xtab; @@ -166,7 +166,6 @@ static struct pool *pl_tc; /* For table cells. */ static struct pool *pl_col; /* For column data. */ static int internal_cmd_crosstabs (void); -static void free_var_dict (void); static void precalc (void); static int calc_general (struct ccase *); static int calc_integer (struct ccase *); @@ -184,7 +183,7 @@ cmd_crosstabs (void) { int result = internal_cmd_crosstabs (); - free_var_dict (); + free (variables); pool_destroy (pl_tc); pool_destroy (pl_col); @@ -195,7 +194,8 @@ cmd_crosstabs (void) static int internal_cmd_crosstabs (void) { - var_dict = NULL; + variables = NULL; + variables_cnt = 0; xtab = NULL; nxtab = 0; pl_tc = pool_create (); @@ -206,12 +206,11 @@ internal_cmd_crosstabs (void) return CMD_FAILURE; #if DEBUGGING - /* Needs var_dict. */ + /* Needs variables. */ debug_print (); #endif - mode = var_dict ? INTEGER : GENERAL; - free_var_dict(); + mode = variables ? INTEGER : GENERAL; /* CELLS. */ expected = 0; @@ -296,45 +295,16 @@ internal_cmd_crosstabs (void) else write = CRS_WR_NONE; - update_weighting (&default_dict); procedure (precalc, mode == GENERAL ? calc_general : calc_integer, postcalc); return CMD_SUCCESS; } -/* Frees var_dict once it's no longer needed. */ -static void -free_var_dict (void) -{ - if (!var_dict) - return; - - { - int i; - - if (var_dict->var_by_name) - { - avl_destroy (var_dict->var_by_name, NULL); - var_dict->var_by_name = NULL; - } - - for (i = 0; i < var_dict->nvar; i++) - free (var_dict->var[i]); - free (var_dict->var); - var_dict->var = NULL; - var_dict->nvar = 0; - - free_dictionary (var_dict); - - var_dict = NULL; - } -} - /* Parses the TABLES subcommand. */ static int crs_custom_tables (struct cmd_crosstabs *cmd unused) { - struct dictionary *dict; + struct var_set *var_set; int n_by; struct variable ***by = NULL; int *by_nvar = NULL; @@ -343,20 +313,24 @@ crs_custom_tables (struct cmd_crosstabs *cmd unused) /* Ensure that this is a TABLES subcommand. */ if (!lex_match_id ("TABLES") - && (token != T_ID || !is_varname (tokid)) + && (token != T_ID || dict_lookup_var (default_dict, tokid) == NULL) && token != T_ALL) return 2; lex_match ('='); - dict = var_dict ? var_dict : &default_dict; + if (variables != NULL) + var_set = var_set_create_from_array (variables, variables_cnt); + else + var_set = var_set_create_from_dict (default_dict); + assert (var_set != NULL); for (n_by = 0; ;) { by = xrealloc (by, sizeof *by * (n_by + 1)); by_nvar = xrealloc (by_nvar, sizeof *by_nvar * (n_by + 1)); - if (!parse_variables (dict, &by[n_by], &by_nvar[n_by], - PV_NO_DUPLICATE | PV_NO_SCRATCH)) - goto lossage; + if (!parse_var_set_vars (var_set, &by[n_by], &by_nvar[n_by], + PV_NO_DUPLICATE | PV_NO_SCRATCH)) + goto done; nx *= by_nvar[n_by]; n_by++; @@ -365,7 +339,7 @@ crs_custom_tables (struct cmd_crosstabs *cmd unused) if (n_by < 1) { lex_error (_("expecting BY")); - goto lossage; + goto done; } else break; @@ -388,12 +362,8 @@ crs_custom_tables (struct cmd_crosstabs *cmd unused) { int i; - if (var_dict == NULL) - for (i = 0; i < n_by; i++) - x->v[i] = by[i][by_iter[i]]; - else - for (i = 0; i < n_by; i++) - x->v[i] = default_dict.var[by[i][by_iter[i]]->foo]; + for (i = 0; i < n_by; i++) + x->v[i] = by[i][by_iter[i]]; } { @@ -411,11 +381,10 @@ crs_custom_tables (struct cmd_crosstabs *cmd unused) } free (by_iter); } - success = 1; - /* Despite the name, we come here whether we're successful or - not. */ - lossage: + + done: + /* All return paths lead here. */ { int i; @@ -425,6 +394,8 @@ crs_custom_tables (struct cmd_crosstabs *cmd unused) free (by_nvar); } + var_set_destroy (var_set); + return success; } @@ -432,9 +403,6 @@ crs_custom_tables (struct cmd_crosstabs *cmd unused) static int crs_custom_variables (struct cmd_crosstabs *cmd unused) { - struct variable **v = NULL; - int nv = 0; - if (nxtab) { msg (SE, _("VARIABLES must be specified before TABLES.")); @@ -445,12 +413,12 @@ crs_custom_variables (struct cmd_crosstabs *cmd unused) for (;;) { - int orig_nv = nv; + int orig_nv = variables_cnt; int i; long min, max; - if (!parse_variables (&default_dict, &v, &nv, + if (!parse_variables (default_dict, &variables, &variables_cnt, (PV_APPEND | PV_NUMERIC | PV_NO_DUPLICATE | PV_NO_SCRATCH))) return 0; @@ -487,40 +455,22 @@ crs_custom_variables (struct cmd_crosstabs *cmd unused) } lex_get (); - for (i = orig_nv; i < nv; i++) + for (i = orig_nv; i < variables_cnt; i++) { - v[i]->p.crs.min = min; - v[i]->p.crs.max = max + 1.; - v[i]->p.crs.count = max - min + 1; + variables[i]->p.crs.min = min; + variables[i]->p.crs.max = max + 1.; + variables[i]->p.crs.count = max - min + 1; } if (token == '/') break; } - { - int i; - - var_dict = new_dictionary (0); - var_dict->var = xmalloc (sizeof *var_dict->var * nv); - var_dict->nvar = nv; - for (i = 0; i < nv; i++) - { - struct variable *var = xmalloc (offsetof (struct variable, width)); - strcpy (var->name, v[i]->name); - var->index = i; - var->type = v[i]->type; - var->foo = v[i]->index; - var_dict->var[i] = var; - avl_force_insert (var_dict->var_by_name, var); - } - - free (v); - return 1; - } + return 1; lossage: - free (v); + free (variables); + variables = NULL; return 0; } @@ -530,27 +480,25 @@ debug_print (void) { printf ("CROSSTABS\n"); - if (var_dict) + if (variables != NULL) { int i; printf ("\t/VARIABLES="); - for (i = 0; i < var_dict->nvar; i++) + for (i = 0; i < variables_cnt; i++) { - struct variable *v = var_dict->var[i]; - struct variable *iv = default_dict.var[v->foo]; + struct variable *v = variables[i]; printf ("%s ", v->name); - if (i < var_dict->nvar - 1) + if (i < variables_cnt - 1) { - struct variable *nv = var_dict->var[i + 1]; - struct variable *niv = default_dict.var[nv->foo]; + struct variable *nv = variables[i + 1]; - if (iv->p.crs.min == niv->p.crs.min - && iv->p.crs.max == niv->p.crs.max) + if (v->p.crs.min == nv->p.crs.min + && v->p.crs.max == nv->p.crs.max) continue; } - printf ("(%d,%d) ", iv->p.crs.min, iv->p.crs.max - 1); + printf ("(%d,%d) ", v->p.crs.min, v->p.crs.max - 1); } printf ("\n"); } @@ -657,9 +605,7 @@ static int calc_general (struct ccase *c) { /* Case weight. */ - double w = (default_dict.weight_index != -1 - ? c->data[default_dict.var[default_dict.weight_index]->fv].f - : 1.0); + double weight = dict_get_case_weight (default_dict, c); /* Flattened current table index. */ int t; @@ -684,7 +630,7 @@ calc_general (struct ccase *c) || (cmd.miss == CRS_INCLUDE && is_system_missing (&c->data[x->v[j]->fv], x->v[j]))) { - x->missing += w; + x->missing += weight; goto next_crosstab; } @@ -703,18 +649,19 @@ calc_general (struct ccase *c) /* Add record to hash table. */ { - struct table_entry **tepp = (struct table_entry **) hsh_probe (gen_tab, te); - if (NULL == *tepp) + struct table_entry **tepp + = (struct table_entry **) hsh_probe (gen_tab, te); + if (*tepp == NULL) { struct table_entry *tep = pool_alloc (pl_tc, entry_size); - te->u.freq = w; + te->u.freq = weight; memcpy (tep, te, entry_size); *tepp = tep; } else - (*tepp)->u.freq += w; + (*tepp)->u.freq += weight; } next_crosstab: @@ -728,9 +675,7 @@ static int calc_integer (struct ccase *c) { /* Case weight. */ - double w = (default_dict.weight_index != -1 - ? c->data[default_dict.var[default_dict.weight_index]->fv].f - : 1.0); + double weight = dict_get_case_weight (default_dict, c); /* Flattened current table index. */ int t; @@ -751,7 +696,7 @@ calc_integer (struct ccase *c) if ((value < v->p.crs.min || value >= v->p.crs.max) || (cmd.miss == CRS_TABLE && is_num_user_missing (value, v))) { - x->missing += w; + x->missing += weight; goto next_crosstab; } @@ -767,7 +712,7 @@ calc_integer (struct ccase *c) const int col = c->data[x->v[COL_VAR]->fv].f - x->v[COL_VAR]->p.crs.min; const int col_dim = x->v[COL_VAR]->p.crs.count; - sorted_tab[ofs]->u.data[col + row * col_dim] += w; + sorted_tab[ofs]->u.data[col + row * col_dim] += weight; } next_crosstab: ; @@ -806,19 +751,18 @@ print_table_entries (struct table_entry **tab) } #endif -/* Compare the table_entry's at PA and PB and return a strcmp()-type +/* Compare the table_entry's at A and B and return a strcmp()-type result. */ static int -compare_table_entry (const void *pa, const void *pb, void *foo unused) +compare_table_entry (const void *a_, const void *b_, void *foo unused) { - const struct table_entry *a = pa; - const struct table_entry *b = pb; - - { - const int difftable = a->table - b->table; - if (difftable) - return difftable; - } + const struct table_entry *a = a_; + const struct table_entry *b = b_; + + if (a->table > b->table) + return 1; + else if (a->table < b->table) + return -1; { const struct crosstab *x = xtab[a->table]; @@ -870,10 +814,11 @@ hash_table_entry (const void *pa, void *foo unused) /* Post-data reading calculations. */ -static struct table_entry **find_pivot_extent (struct table_entry **, int *cnt, int pivot); -static void enum_var_values (struct table_entry **beg, int cnt, - union value **values, int *nvalues, - int var_index); +static struct table_entry **find_pivot_extent (struct table_entry **, + int *cnt, int pivot); +static void enum_var_values (struct table_entry **entries, int entry_cnt, + int var_idx, + union value **values, int *value_cnt); static void output_pivot_table (struct table_entry **, struct table_entry **, double **, double **, double **, int *, int *, int *); @@ -1110,7 +1055,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, struct table_entry *cmp; x = xtab[(*pb)->table]; - enum_var_values (pb, pe - pb, &cols, &n_cols, COL_VAR); + enum_var_values (pb, pe - pb, COL_VAR, &cols, &n_cols); nvar = cmd.pivot == CRS_PIVOT ? x->nvar : 2; @@ -1316,7 +1261,7 @@ output_pivot_table (struct table_entry **pb, struct table_entry **pe, break; /* Find all the row variable values. */ - enum_var_values (tb, te - tb, &rows, &n_rows, ROW_VAR); + enum_var_values (tb, te - tb, ROW_VAR, &rows, &n_rows); /* Allocate memory space for the column and row totals. */ if (n_rows > *maxrows) @@ -1710,8 +1655,8 @@ find_pivot_extent_general (struct table_entry **tp, int *cnt, int pivot) /* Integer mode correspondent to find_pivot_extent_general(). This could be optimized somewhat, but I just don't give a crap about - CROSSTABS performance in integer mode, which is just a wart on - CROSSTABS' ass as far as I'm concerned. + CROSSTABS performance in integer mode, which is just a + CROSSTABS wart as far as I'm concerned. That said, feel free to send optimization patches to me. */ static struct table_entry ** @@ -1742,67 +1687,54 @@ find_pivot_extent_integer (struct table_entry **tp, int *cnt, int pivot) return tp; } -/* Compare value * A and B, where WIDTH is the string width or 0 for - numerics, and return a strcmp()-type result. */ +/* Compares `union value's A_ and B_ and returns a strcmp()-like + result. WIDTH_ points to an int which is either 0 for a + numeric value or a string width for a string value. */ static int -compare_value (const void *pa, const void *pb, void *pwidth) +compare_value (const void *a_, const void *b_, void *width_) { - const union value *a = pa; - const union value *b = pb; - const int width = (int) pwidth; + const union value *a = a_; + const union value *b = b_; + const int *pwidth = width_; + const int width = *pwidth; - if (width) - return strncmp (a->s, b->s, width); + if (width == 0) + return (a->f < b->f) ? -1 : (a->f > b->f); else - return a->f < b->f ? -1 : (a->f > b->f ? 1 : 0); + return strncmp (a->s, b->s, width); } -/* Given a list of CNT table_entry's starting at BEG, creates a list - of *NVALUES values *VALUES of variable with index VAR_INDEX. */ +/* Given an array of ENTRY_CNT table_entry structures starting at + ENTRIES, creates a sorted list of the values that the variable + with index VAR_INDEX takes on. The values are returned as a + malloc()'darray stored in *VALUES, with the number of values + stored in *VALUE_CNT. + */ static void -enum_var_values (struct table_entry **beg, int cnt, union value **values, int *nvalues, - int var_index) +enum_var_values (struct table_entry **entries, int entry_cnt, int var_idx, + union value **values, int *value_cnt) { if (mode == GENERAL) { - avl_tree *tree; - - tree = avl_create (pl_col, compare_value, - (void *) (xtab[(*beg)->table]->v[var_index]->width)); - - { - int i; - - for (i = 0; i < cnt; i++) - avl_insert (tree, &beg[i]->v[var_index]); - *values = xmalloc (sizeof **values * avl_count (tree)); - } - - { - avl_traverser trav; - union value *v; - int i; - - i = 0; - while (NULL != (v = avl_traverse (tree, &trav))) - (*values)[i++] = *v; - *nvalues = i; - } + int width = xtab[(*entries)->table]->v[var_idx]->width; + int i; - /* Destroy tree. */ - pool_destroy (pl_col); - pl_col = pool_create (); + *values = xmalloc (sizeof **values * entry_cnt); + for (i = 0; i < entry_cnt; i++) + (*values)[i] = entries[i]->v[var_idx]; + *value_cnt = sort_unique (*values, entry_cnt, sizeof **values, + compare_value, &width); } else { - struct crosstab_proc *crs = &xtab[(*beg)->table]->v[var_index]->p.crs; + struct crosstab_proc *crs = &xtab[(*entries)->table]->v[var_idx]->p.crs; int i; assert (mode == INTEGER); *values = xmalloc (sizeof **values * crs->count); for (i = 0; i < crs->count; i++) (*values)[i].f = i + crs->min; - *nvalues = crs->count; + *value_cnt = crs->count; } } @@ -1815,7 +1747,7 @@ table_value_missing (struct tab_table *table, int c, int r, unsigned char opt, { struct len_string s; - char *label = get_val_lab (var, *v, 0); + const char *label = val_labs_find (var->val_labs, *v); if (label) { tab_text (table, c, r, TAB_LEFT, label); @@ -2359,7 +2291,7 @@ display_directional (void) /* Returns the value of the gamma (factorial) function for an integer argument X. */ -double +static double gamma_int (double x) { double r = 1; @@ -2916,7 +2848,7 @@ calc_symmetric (double v[N_SYMMETRIC], double ase[N_SYMMETRIC], + (sqr (W - sum_fii) * (W * sum_fijri_ci2 - 4. * sum_rici * sum_rici) - / hypercube (W * W - sum_rici)))); + / pow4 (W * W - sum_rici)))); #else t[8] = v[8] / ase[8]; #endif