From: John Darrington Date: Thu, 23 Jul 2009 06:15:20 +0000 (+0200) Subject: Merge commit 'origin/stable' X-Git-Tag: build37~48 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=commitdiff_plain;h=b5c82cc9aabe7e641011130240ae1b2e84348e23;hp=-c Merge commit 'origin/stable' Conflicts: src/language/stats/t-test.q --- b5c82cc9aabe7e641011130240ae1b2e84348e23 diff --combined src/data/por-file-reader.c index cd8b213e,823361b0..a4631242 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@@ -43,7 -43,6 +43,7 @@@ #include #include +#include "minmax.h" #include "xalloc.h" #include "gettext.h" @@@ -75,7 -74,8 +75,7 @@@ struct pfm_reade char *trans; /* 256-byte character set translation table. */ int var_cnt; /* Number of variables. */ int weight_index; /* 0-based index of weight variable, or -1. */ - int *widths; /* Variable widths, 0 for numeric. */ - size_t value_cnt; /* Number of `value's per case. */ + struct caseproto *proto; /* Format of output cases. */ bool ok; /* Set false on I/O error. */ }; @@@ -256,7 -256,8 +256,7 @@@ pfm_open_reader (struct file_handle *fh r->weight_index = -1; r->trans = NULL; r->var_cnt = 0; - r->widths = NULL; - r->value_cnt = 0; + r->proto = NULL; r->ok = true; if (setjmp (r->bail_out)) goto error; @@@ -295,8 -296,8 +295,8 @@@ if (!match (r, 'F')) error (r, _("Data record expected.")); - r->value_cnt = dict_get_next_value_idx (*dict); - return casereader_create_sequential (NULL, r->value_cnt, CASENUMBER_MAX, + r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); + return casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &por_file_casereader_class, r); error: @@@ -447,28 -448,6 +447,28 @@@ read_string (struct pfm_reader *r, cha *buf = '\0'; } + +/* Reads a string into BUF, which must have room for 256 + characters. + Returns the number of bytes read. +*/ +static size_t +read_bytes (struct pfm_reader *r, uint8_t *buf) +{ + int n = read_int (r); + if (n < 0 || n > 255) + error (r, _("Bad string length %d."), n); + + while (n-- > 0) + { + *buf++ = r->cc; + advance (r); + } + return n; +} + + + /* Reads a string and returns a copy of it allocated from R's pool. */ static char * @@@ -629,7 -608,7 +629,7 @@@ assign_default return fmt_default_for_width (var_get_width (v)); } -static union value parse_value (struct pfm_reader *, struct variable *); +static void parse_value (struct pfm_reader *, int width, union value *); /* Read information on all the variables. */ static void @@@ -644,6 -623,7 +644,6 @@@ read_variables (struct pfm_reader *r, s r->var_cnt = read_int (r); if (r->var_cnt <= 0) error (r, _("Invalid number of variables %d."), r->var_cnt); - r->widths = pool_nalloc (r->pool, r->var_cnt, sizeof *r->widths); /* Purpose of this value is unknown. It is typically 161. */ read_int (r); @@@ -672,6 -652,7 +672,6 @@@ width = read_int (r); if (width < 0) error (r, _("Invalid variable width %d."), width); - r->widths[i] = width; read_string (r, name); for (j = 0; j < 6; j++) @@@ -708,7 -689,7 +708,7 @@@ var_set_write_format (v, &write); /* Range missing values. */ - mv_init (&miss, var_get_width (v)); + mv_init (&miss, width); if (match (r, 'B')) { double x = read_float (r); @@@ -723,17 -704,11 +723,17 @@@ /* Single missing values. */ while (match (r, '8')) { - union value value = parse_value (r, v); + int mv_width = MIN (width, 8); + union value value; + + parse_value (r, mv_width, &value); + value_resize (&value, mv_width, width); mv_add_value (&miss, &value); + value_destroy (&value, width); } var_set_missing_values (v, &miss); + mv_destroy (&miss); if (match (r, 'C')) { @@@ -754,19 -729,22 +754,19 @@@ } } -/* Parse a value for variable VV into value V. */ -static union value -parse_value (struct pfm_reader *r, struct variable *vv) +/* Parse a value of with WIDTH into value V. */ +static void +parse_value (struct pfm_reader *r, int width, union value *v) { - union value v; - - if (var_is_alpha (vv)) + value_init (v, width); + if (width > 0) { - char string[256]; - read_string (r, string); - buf_copy_str_rpad (v.s, 8, string); + uint8_t buf[256]; + size_t n_bytes = read_bytes (r, buf); + value_copy_buf_rpad (v, width, buf, n_bytes, ' '); } else - v.f = read_float (r); - - return v; + v->f = read_float (r); } /* Parse a value label record and return success. */ @@@ -806,14 -784,17 +806,14 @@@ read_value_label (struct pfm_reader *r char label[256]; int j; - val = parse_value (r, v[0]); + parse_value (r, var_get_width (v[0]), &val); read_string (r, label); /* Assign the value label to each variable. */ for (j = 0; j < nv; j++) - { - struct variable *var = v[j]; + var_replace_value_label (v[j], &val, label); - if (!var_is_long_string (var)) - var_replace_value_label (var, &val, label); - } + value_destroy (&val, var_get_width (v[0])); } } @@@ -833,46 -814,50 +833,46 @@@ read_documents (struct pfm_reader *r, s } } -/* Reads one case from portable file R into C. */ -static bool -por_file_casereader_read (struct casereader *reader, void *r_, struct ccase *c) +/* Reads and returns one case from portable file R. Returns a + null pointer on failure. */ +static struct ccase * +por_file_casereader_read (struct casereader *reader, void *r_) { struct pfm_reader *r = r_; + struct ccase *volatile c; size_t i; - size_t idx; - case_create (c, casereader_get_value_cnt (reader)); + c = case_create (r->proto); setjmp (r->bail_out); if (!r->ok) { casereader_force_error (reader); - case_destroy (c); - return false; + case_unref (c); + return NULL; } /* Check for end of file. */ if (r->cc == 'Z') { - case_destroy (c); - return false; + case_unref (c); + return NULL; } - idx = 0; for (i = 0; i < r->var_cnt; i++) { - int width = r->widths[i]; + int width = caseproto_get_width (r->proto, i); if (width == 0) - { - case_data_rw_idx (c, idx)->f = read_float (r); - idx++; - } + case_data_rw_idx (c, i)->f = read_float (r); else { - char string[256]; - read_string (r, string); - buf_copy_str_rpad (case_data_rw_idx (c, idx)->s, width, string); - idx += DIV_RND_UP (width, MAX_SHORT_STRING); + uint8_t buf[256]; + size_t n_bytes = read_bytes (r, buf); + u8_buf_copy_rpad (case_str_rw_idx (c, i), width, buf, n_bytes, ' '); } } - return true; + return c; } /* Returns true if FILE is an SPSS portable file, @@@ -882,17 -867,30 +882,30 @@@ pfm_detect (FILE *file { unsigned char header[464]; char trans[256]; - int cooked_cnt, raw_cnt; + int cooked_cnt, raw_cnt, line_len; int i; cooked_cnt = raw_cnt = 0; + line_len = 0; while (cooked_cnt < sizeof header) { int c = getc (file); if (c == EOF || raw_cnt++ > 512) return false; - else if (c != '\n' && c != '\r') - header[cooked_cnt++] = c; + else if (c == '\n') + { + while (line_len < 80 && cooked_cnt < sizeof header) + { + header[cooked_cnt++] = ' '; + line_len++; + } + line_len = 0; + } + else if (c != '\r') + { + header[cooked_cnt++] = c; + line_len++; + } } memset (trans, 0, 256); diff --combined src/data/sys-file-reader.c index 9978d43a,fe7b5334..b0a41a83 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@@ -25,7 -25,6 +25,7 @@@ #include #include +#include #include #include #include @@@ -35,7 -34,6 +35,7 @@@ #include #include +#include #include #include #include @@@ -72,7 -70,7 +72,7 @@@ struct sfm_reade struct fh_lock *lock; /* Mutual exclusion for file handle. */ FILE *file; /* File stream. */ bool error; /* I/O or corruption error? */ - size_t value_cnt; /* Number of "union value"s in struct case. */ + struct caseproto *proto; /* Format of output cases. */ /* File format. */ enum integer_format integer_format; /* On-disk integer format. */ @@@ -100,11 -98,9 +100,11 @@@ static struct variable *lookup_var_by_v struct variable **, int value_idx); +static void sys_msg (struct sfm_reader *r, int class, + const char *format, va_list args) + PRINTF_FORMAT (3, 0); static void sys_warn (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3); - static void sys_error (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3) NO_RETURN; @@@ -116,23 -112,15 +116,23 @@@ static double read_float (struct sfm_re static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); -static struct variable_to_value_map *open_variable_to_value_map ( - struct sfm_reader *, size_t size); -static void close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *); -static bool read_variable_to_value_map (struct sfm_reader *, - struct dictionary *, - struct variable_to_value_map *, - struct variable **var, char **value, - int *warning_cnt); +static struct text_record *open_text_record (struct sfm_reader *, size_t size); +static void close_text_record (struct sfm_reader *r, + struct text_record *); +static bool read_variable_to_value_pair (struct sfm_reader *, + struct dictionary *, + struct text_record *, + struct variable **var, char **value); +static void text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) + PRINTF_FORMAT (3, 4); +static char *text_get_token (struct text_record *, + struct substring delimiters); +static bool text_match (struct text_record *, char c); +static bool text_read_short_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); static bool close_reader (struct sfm_reader *r); @@@ -163,9 -151,7 +163,9 @@@ static void read_extension_record (stru struct sfm_read_info *); static void read_machine_integer_info (struct sfm_reader *, size_t size, size_t count, - struct sfm_read_info *); + struct sfm_read_info *, + struct dictionary * + ); static void read_machine_float_info (struct sfm_reader *, size_t size, size_t count); static void read_display_parameters (struct sfm_reader *, @@@ -177,71 -163,7 +177,71 @@@ static void read_long_var_name_map (str static void read_long_string_map (struct sfm_reader *, size_t size, size_t count, struct dictionary *); +static void read_data_file_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_variable_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_long_string_value_labels (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); + +/* Convert all the strings in DICT from the dict encoding to UTF8 */ +static void +recode_strings (struct dictionary *dict) +{ + int i; + + const char *enc = dict_get_encoding (dict); + + if ( NULL == enc) + enc = get_default_encoding (); + + for (i = 0 ; i < dict_get_var_cnt (dict); ++i) + { + /* Convert the long variable name */ + struct variable *var = dict_get_var (dict, i); + const char *native_name = var_get_name (var); + char *utf8_name = recode_string (UTF8, enc, native_name, -1); + if ( 0 != strcmp (utf8_name, native_name)) + { + if ( NULL == dict_lookup_var (dict, utf8_name)) + dict_rename_var (dict, var, utf8_name); + else + msg (MW, + _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name); + } + + free (utf8_name); + + /* Convert the variable label */ + if (var_has_label (var)) + { + char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1); + var_set_label (var, utf8_label); + free (utf8_label); + } + + if (var_has_value_labels (var)) + { + const struct val_lab *vl = NULL; + const struct val_labs *vlabs = var_get_value_labels (var); + + for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl)) + { + const union value *val = val_lab_get_value (vl); + const char *label = val_lab_get_label (vl); + char *new_label = NULL; + new_label = recode_string (UTF8, enc, label, -1); + + var_replace_value_label (var, val, new_label); + free (new_label); + } + } + } +} /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. @@@ -360,8 -282,6 +360,8 @@@ sfm_open_reader (struct file_handle *fh r->has_long_var_names = true; } + recode_strings (*dict); + /* Read record 999 data, which is just filler. */ read_int (r); @@@ -381,11 -301,11 +381,11 @@@ dictionary and may destroy or modify its variables. */ sfm_dictionary_to_sfm_vars (*dict, &r->sfm_vars, &r->sfm_var_cnt); pool_register (r->pool, free, r->sfm_vars); + r->proto = caseproto_ref_pool (dict_get_proto (*dict), r->pool); pool_free (r->pool, var_by_value_idx); - r->value_cnt = dict_get_next_value_idx (*dict); return casereader_create_sequential - (NULL, r->value_cnt, + (NULL, r->proto, r->case_cnt == -1 ? CASENUMBER_MAX: r->case_cnt, &sys_file_casereader_class, r); @@@ -505,9 -425,21 +505,21 @@@ read_header (struct sfm_reader *r, stru read_bytes (r, raw_bias, sizeof raw_bias); if (float_identify (100.0, raw_bias, sizeof raw_bias, &r->float_format) == 0) { - sys_warn (r, _("Compression bias is not the usual " - "value of 100, or system file uses unrecognized " - "floating-point format.")); + uint8_t zero_bias[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (memcmp (raw_bias, zero_bias, 8)) + sys_warn (r, _("Compression bias is not the usual " + "value of 100, or system file uses unrecognized " + "floating-point format.")); + else + { + /* Some software is known to write all-zeros to this + field. Such software also writes floating-point + numbers in the format that we expect by default + (it seems that all software most likely does, in + reality), so don't warn in this case. */ + } + if (r->integer_format == INTEGER_MSB_FIRST) r->float_format = FLOAT_IEEE_DOUBLE_BE; else @@@ -577,7 -509,7 +589,7 @@@ read_variable_record (struct sfm_reade /* Create variable. */ if (width < 0 || width > 255) - sys_error (r, _("Bad variable width %d."), width); + sys_error (r, _("Bad width %d for variable %s."), width, name); var = dict_create_var (dict, name, width); if (var == NULL) sys_error (r, @@@ -611,7 -543,7 +623,7 @@@ struct missing_values mv; int i; - mv_init (&mv, var_get_width (var)); + mv_init_pool (r->pool, &mv, var_get_width (var)); if (var_is_numeric (var)) { if (missing_value_code < -3 || missing_value_code > 3 @@@ -630,24 -562,21 +642,24 @@@ } else { + int mv_width = MAX (width, 8); + union value value; + if (missing_value_code < 1 || missing_value_code > 3) sys_error (r, _("String missing value indicator field is not " "0, 1, 2, or 3.")); - if (var_is_long_string (var)) - sys_warn (r, _("Ignoring missing values on long string variable " - "%s, which PSPP does not yet support."), name); + + value_init (&value, mv_width); + value_set_missing (&value, mv_width); for (i = 0; i < missing_value_code; i++) { - char string[9]; - read_string (r, string, sizeof string); - mv_add_str (&mv, string); + uint8_t *s = value_str_rw (&value, mv_width); + read_bytes (r, s, 8); + mv_add_str (&mv, s); } + value_destroy (&value, mv_width); } - if (!var_is_long_string (var)) - var_set_missing_values (var, &mv); + var_set_missing_values (var, &mv); } /* Set formats. */ @@@ -792,7 -721,7 +804,7 @@@ read_extension_record (struct sfm_reade switch (subtype) { case 3: - read_machine_integer_info (r, size, count, info); + read_machine_integer_info (r, size, count, info, dict); return; case 4: @@@ -835,28 -764,21 +847,28 @@@ break; case 17: - /* Text field that defines variable attributes. New in - SPSS 14. */ - break; + read_data_file_attributes (r, size, count, dict); + return; + + case 18: + read_variable_attributes (r, size, count, dict); + return; case 20: /* New in SPSS 16. Contains a single string that describes the character encoding, e.g. "windows-1252". */ - break; + { + char *encoding = pool_calloc (r->pool, size, count + 1); + read_string (r, encoding, count + 1); + dict_set_encoding (dict, encoding); + return; + } case 21: /* New in SPSS 16. Encodes value labels for long string variables. */ - sys_warn (r, _("Ignoring value labels for long string variables, " - "which PSPP does not yet support.")); - break; + read_long_string_value_labels (r, size, count, dict); + return; default: sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"), @@@ -870,8 -792,7 +882,8 @@@ /* Read record type 7, subtype 3. */ static void read_machine_integer_info (struct sfm_reader *r, size_t size, size_t count, - struct sfm_read_info *info) + struct sfm_read_info *info, + struct dictionary *dict) { int version_major = read_int (r); int version_minor = read_int (r); @@@ -880,7 -801,7 +892,7 @@@ int float_representation = read_int (r); int compression_code UNUSED = read_int (r); int integer_representation = read_int (r); - int character_code UNUSED = read_int (r); + int character_code = read_int (r); int expected_float_format; int expected_integer_format; @@@ -925,47 -846,6 +937,47 @@@ gettext (endian[integer_representation == 1]), gettext (endian[expected_integer_format == 1])); } + + + /* + Record 7 (20) provides a much more reliable way of + setting the encoding. + The character_code is used as a fallback only. + */ + if ( NULL == dict_get_encoding (dict)) + { + switch (character_code) + { + case 1: + dict_set_encoding (dict, "EBCDIC-US"); + break; + case 2: + case 3: + /* These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic] + respectively. However, there are known to be many files + in the wild with character code 2, yet have data which are + clearly not ascii. + Therefore we ignore these values. + */ + return; + case 4: + dict_set_encoding (dict, "MS_KANJI"); + break; + case 65000: + dict_set_encoding (dict, "UTF-7"); + break; + case 65001: + dict_set_encoding (dict, "UTF-8"); + break; + default: + { + char enc[100]; + snprintf (enc, 100, "CP%d", character_code); + dict_set_encoding (dict, enc); + } + break; + }; + } } /* Read record type 7, subtype 4. */ @@@ -981,16 -861,11 +993,16 @@@ read_machine_float_info (struct sfm_rea size, count); if (sysmis != SYSMIS) - sys_warn (r, _("File specifies unexpected value %g as SYSMIS."), sysmis); + sys_warn (r, _("File specifies unexpected value %g as %s."), + sysmis, "SYSMIS"); + if (highest != HIGHEST) - sys_warn (r, _("File specifies unexpected value %g as HIGHEST."), highest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + highest, "HIGHEST"); + if (lowest != LOWEST) - sys_warn (r, _("File specifies unexpected value %g as LOWEST."), lowest); + sys_warn (r, _("File specifies unexpected value %g as %s."), + lowest, "LOWEST"); } /* Read record type 7, subtype 11, which specifies how variables @@@ -1069,12 -944,14 +1081,12 @@@ static voi read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *long_name; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &long_name, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { char **short_names; size_t short_name_cnt; @@@ -1120,7 -997,7 +1132,7 @@@ } free (short_names); } - close_variable_to_value_map (r, map); + close_text_record (r, text); r->has_long_var_names = true; } @@@ -1130,12 -1007,14 +1142,12 @@@ static voi read_long_string_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *length_s; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &length_s, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &length_s)) { size_t idx = var_get_dict_index (var); long int length; @@@ -1183,7 -1062,7 +1195,7 @@@ dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); var_set_width (var, length); } - close_variable_to_value_map (r, map); + close_text_record (r, text); dict_compact_values (dict); } @@@ -1197,7 -1076,7 +1209,7 @@@ read_value_labels (struct sfm_reader *r struct label { - char raw_value[8]; /* Value as uninterpreted bytes. */ + uint8_t raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; @@@ -1207,7 -1086,6 +1219,7 @@@ struct variable **var = NULL; /* Associated variables. */ int var_cnt; /* Number of associated variables. */ + int max_width; /* Maximum width of string variables. */ int i; @@@ -1266,15 -1144,12 +1278,15 @@@ /* Read the list of variables. */ var = pool_nalloc (subpool, var_cnt, sizeof *var); + max_width = 0; for (i = 0; i < var_cnt; i++) { var[i] = lookup_var_by_value_idx (r, var_by_value_idx, read_int (r)); - if (var_is_long_string (var[i])) - sys_error (r, _("Value labels are not allowed on long string " - "variables (%s)."), var_get_name (var[i])); + if (var_get_width (var[i]) > 8) + sys_error (r, _("Value labels may not be added to long string " + "variables (e.g. %s) using records types 3 and 4."), + var_get_name (var[i])); + max_width = MAX (max_width, var_get_width (var[i])); } /* Type check the variables. */ @@@ -1293,10 -1168,9 +1305,10 @@@ { struct label *label = labels + i; + value_init_pool (subpool, &label->value, max_width); if (var_is_alpha (var[0])) - buf_copy_rpad (label->value.s, sizeof label->value.s, - label->raw_value, sizeof label->raw_value); + u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, + label->raw_value, sizeof label->raw_value, ' '); else label->value.f = float_get_double (r->float_format, label->raw_value); } @@@ -1318,7 -1192,7 +1330,7 @@@ label->value.f, var_get_name (v)); else sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), - var_get_width (v), label->value.s, + max_width, value_str (&label->value, max_width), var_get_name (v)); } } @@@ -1326,203 -1200,6 +1338,203 @@@ pool_destroy (subpool); } + +/* Reads a set of custom attributes from TEXT into ATTRS. + ATTRS may be a null pointer, in which case the attributes are + read but discarded. */ +static void +read_attributes (struct sfm_reader *r, struct text_record *text, + struct attrset *attrs) +{ + do + { + struct attribute *attr; + char *key; + int index; + + /* Parse the key. */ + key = text_get_token (text, ss_cstr ("(")); + if (key == NULL) + return; + + attr = attribute_create (key); + for (index = 1; ; index++) + { + /* Parse the value. */ + char *value; + size_t length; + + value = text_get_token (text, ss_cstr ("\n")); + if (value == NULL) + { + text_warn (r, text, _("Error parsing attribute value %s[%d]"), + key, index); + break; + } + + length = strlen (value); + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + { + value[length - 1] = '\0'; + attribute_add_value (attr, value + 1); + } + else + { + text_warn (r, text, + _("Attribute value %s[%d] is not quoted: %s"), + key, index, value); + attribute_add_value (attr, value); + } + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + if (attrs != NULL) + attrset_add (attrs, attr); + else + attribute_destroy (attr); + } + while (!text_match (text, '/')); +} + +/* Reads record type 7, subtype 17, which lists custom + attributes on the data file. */ +static void +read_data_file_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + read_attributes (r, text, dict_get_attributes (dict)); + close_text_record (r, text); +} + +static void +skip_long_string_value_labels (struct sfm_reader *r, size_t n_labels) +{ + size_t i; + + for (i = 0; i < n_labels; i++) + { + size_t value_length, label_length; + + value_length = read_int (r); + skip_bytes (r, value_length); + label_length = read_int (r); + skip_bytes (r, label_length); + } +} + +static void +read_long_string_value_labels (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *d) +{ + const off_t start = ftello (r->file); + while (ftello (r->file) - start < size * count) + { + char var_name[VAR_NAME_LEN + 1]; + size_t n_labels, i; + struct variable *v; + union value value; + int var_name_len; + int width; + + /* Read header. */ + var_name_len = read_int (r); + if (var_name_len > VAR_NAME_LEN) + sys_error (r, _("Variable name length in long string value label " + "record (%d) exceeds %d-byte limit."), + var_name_len, VAR_NAME_LEN); + read_string (r, var_name, var_name_len + 1); + width = read_int (r); + n_labels = read_int (r); + + v = dict_lookup_var (d, var_name); + if (v == NULL) + { + sys_warn (r, _("Ignoring long string value record for " + "unknown variable %s."), var_name); + skip_long_string_value_labels (r, n_labels); + continue; + } + if (var_is_numeric (v)) + { + sys_warn (r, _("Ignoring long string value record for " + "numeric variable %s."), var_name); + skip_long_string_value_labels (r, n_labels); + continue; + } + if (width != var_get_width (v)) + { + sys_warn (r, _("Ignoring long string value record for variable %s " + "because the record's width (%d) does not match the " + "variable's width (%d)"), + var_name, width, var_get_width (v)); + skip_long_string_value_labels (r, n_labels); + continue; + } + + /* Read values. */ + value_init_pool (r->pool, &value, width); + for (i = 0; i < n_labels; i++) + { + size_t value_length, label_length; + char label[256]; + bool skip = false; + + /* Read value. */ + value_length = read_int (r); + if (value_length == width) + read_bytes (r, value_str_rw (&value, width), width); + else + { + sys_warn (r, _("Ignoring long string value %zu for variable %s, " + "with width %d, that has bad value width %zu."), + i, var_get_name (v), width, value_length); + skip_bytes (r, value_length); + skip = true; + } + + /* Read label. */ + label_length = read_int (r); + read_string (r, label, MIN (sizeof label, label_length + 1)); + if (label_length >= sizeof label) + { + /* Skip and silently ignore label text after the + first 255 bytes. The maximum documented length + of a label is 120 bytes so this is more than + generous. */ + skip_bytes (r, sizeof label - (label_length + 1)); + } + + if (!skip && !var_add_value_label (v, &value, label)) + sys_warn (r, _("Duplicate value label for \"%.*s\" on %s."), + width, value_str (&value, width), var_get_name (v)); + } + } +} + + +/* Reads record type 7, subtype 18, which lists custom + attributes on individual variables. */ +static void +read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + for (;;) + { + struct variable *var; + if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var)) + break; + read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL); + } + close_text_record (r, text); +} + /* Case reader. */ @@@ -1532,31 -1209,31 +1544,31 @@@ static void partial_record (struct sfm_ static void read_error (struct casereader *, const struct sfm_reader *); static bool read_case_number (struct sfm_reader *, double *); -static bool read_case_string (struct sfm_reader *, char *, size_t); +static bool read_case_string (struct sfm_reader *, uint8_t *, size_t); static int read_opcode (struct sfm_reader *); static bool read_compressed_number (struct sfm_reader *, double *); -static bool read_compressed_string (struct sfm_reader *, char *); -static bool read_whole_strings (struct sfm_reader *, char *, size_t); +static bool read_compressed_string (struct sfm_reader *, uint8_t *); +static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t); static bool skip_whole_strings (struct sfm_reader *, size_t); -/* Reads one case from READER's file into C. Returns true only - if successful. */ -static bool -sys_file_casereader_read (struct casereader *reader, void *r_, - struct ccase *c) +/* Reads and returns one case from READER's file. Returns a null + pointer if not successful. */ +static struct ccase * +sys_file_casereader_read (struct casereader *reader, void *r_) { struct sfm_reader *r = r_; + struct ccase *volatile c; int i; if (r->error) - return false; + return NULL; - case_create (c, r->value_cnt); + c = case_create (r->proto); if (setjmp (r->bail_out)) { casereader_force_error (reader); - case_destroy (c); - return false; + case_unref (c); + return NULL; } for (i = 0; i < r->sfm_var_cnt; i++) @@@ -1564,29 -1241,28 +1576,29 @@@ struct sfm_var *sv = &r->sfm_vars[i]; union value *v = case_data_rw_idx (c, sv->case_index); - if (sv->width == 0) + if (sv->var_width == 0) { if (!read_case_number (r, &v->f)) goto eof; } else { - if (!read_case_string (r, v->s + sv->offset, sv->width)) + uint8_t *s = value_str_rw (v, sv->var_width); + if (!read_case_string (r, s + sv->offset, sv->segment_width)) goto eof; if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8))) partial_record (r); } } - return true; + return c; eof: - case_destroy (c); + case_unref (c); if (i != 0) partial_record (r); if (r->case_cnt != -1) read_error (reader, r); - return false; + return NULL; } /* Issues an error that R ends in a partial record. */ @@@ -1633,7 -1309,7 +1645,7 @@@ read_case_number (struct sfm_reader *r Returns true if successful, false if end of file is reached immediately. */ static bool -read_case_string (struct sfm_reader *r, char *s, size_t length) +read_case_string (struct sfm_reader *r, uint8_t *s, size_t length) { size_t whole = ROUND_DOWN (length, 8); size_t partial = length % 8; @@@ -1646,7 -1322,7 +1658,7 @@@ if (partial) { - char bounce[8]; + uint8_t bounce[8]; if (!read_whole_strings (r, bounce, sizeof bounce)) { if (whole) @@@ -1717,7 -1393,7 +1729,7 @@@ read_compressed_number (struct sfm_read Returns true if successful, false if end of file is reached immediately. */ static bool -read_compressed_string (struct sfm_reader *r, char *dst) +read_compressed_string (struct sfm_reader *r, uint8_t *dst) { switch (read_opcode (r)) { @@@ -1746,7 -1422,7 +1758,7 @@@ Returns true if successful, false if end of file is reached immediately. */ static bool -read_whole_strings (struct sfm_reader *r, char *s, size_t length) +read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length) { assert (length % 8 == 0); if (!r->compressed) @@@ -1774,7 -1450,7 +1786,7 @@@ static bool skip_whole_strings (struct sfm_reader *r, size_t length) { - char buffer[1024]; + uint8_t buffer[1024]; assert (length < sizeof buffer); return read_whole_strings (r, buffer, length); } @@@ -1859,124 -1535,82 +1871,124 @@@ lookup_var_by_short_name (struct dictio return NULL; } -/* Helpers for reading records that contain "variable=value" - pairs. */ +/* Helpers for reading records that contain structured text + strings. */ + +/* Maximum number of warnings to issue for a single text + record. */ +#define MAX_TEXT_WARNINGS 5 /* State. */ -struct variable_to_value_map +struct text_record { struct substring buffer; /* Record contents. */ size_t pos; /* Current position in buffer. */ + int n_warnings; /* Number of warnings issued or suppressed. */ }; -/* Reads SIZE bytes into a "variable=value" map for R, - and returns the map. */ -static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) { - struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map); + struct text_record *text = pool_alloc (r->pool, sizeof *text); char *buffer = pool_malloc (r->pool, size + 1); read_bytes (r, buffer, size); - map->buffer = ss_buffer (buffer, size); - map->pos = 0; - return map; + text->buffer = ss_buffer (buffer, size); + text->pos = 0; + text->n_warnings = 0; + return text; } -/* Closes MAP and frees its storage. - Not really needed, because the pool will free the map anyway, - but can be used to free it earlier. */ +/* Closes TEXT, frees its storage, and issues a final warning + about suppressed warnings if necesary. */ static void -close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *map) +close_text_record (struct sfm_reader *r, struct text_record *text) { - pool_free (r->pool, ss_data (map->buffer)); + if (text->n_warnings > MAX_TEXT_WARNINGS) + sys_warn (r, _("Suppressed %d additional related warnings."), + text->n_warnings - MAX_TEXT_WARNINGS); + pool_free (r->pool, ss_data (text->buffer)); } -/* Reads the next variable=value pair from MAP. +/* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ static bool -read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict, - struct variable_to_value_map *map, - struct variable **var, char **value, - int *warning_cnt) +read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, + struct variable **var, char **value) { - int max_warnings = 5; - for (;;) { - struct substring short_name_ss, value_ss; + if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) + return false; + + *value = text_get_token (text, ss_buffer ("\t\0", 2)); + if (*value == NULL) + return false; - if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss) - || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos, - &value_ss)) - { - if (*warning_cnt > max_warnings) - sys_warn (r, _("Suppressed %d additional variable map warnings."), - *warning_cnt - max_warnings); - return false; - } + text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX), + ss_buffer ("\t\0", 2)); - map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX), - ss_buffer ("\t\0", 2)); + if (*var != NULL) + return true; + } +} - ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0'; - *var = lookup_var_by_short_name (dict, ss_data (short_name_ss)); - if (*var == NULL) - { - if (++*warning_cnt <= max_warnings) - sys_warn (r, _("Variable map refers to unknown variable %s."), - ss_data (short_name_ss)); - continue; - } +static bool +text_read_short_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *short_name = text_get_token (text, delimiters); + if (short_name == NULL) + return false; - ss_data (value_ss)[ss_length (value_ss)] = '\0'; - *value = ss_data (value_ss); + *var = lookup_var_by_short_name (dict, short_name); + if (*var == NULL) + text_warn (r, text, _("Variable map refers to unknown variable %s."), + short_name); + return true; +} + +/* Displays a warning for the current file position, limiting the + number to MAX_TEXT_WARNINGS for TEXT. */ +static void +text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) +{ + if (text->n_warnings++ < MAX_TEXT_WARNINGS) + { + va_list args; + va_start (args, format); + sys_msg (r, MW, format, args); + va_end (args); + } +} + +static char * +text_get_token (struct text_record *text, struct substring delimiters) +{ + struct substring token; + + if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) + return NULL; + ss_data (token)[ss_length (token)] = '\0'; + return ss_data (token); +} + +static bool +text_match (struct text_record *text, char c) +{ + if (text->buffer.string[text->pos] == c) + { + text->pos++; return true; } + else + return false; } /* Messages. */ diff --combined src/language/data-io/data-parser.c index 6ec3d0d5,a3a438b7..020f8e4c --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 2007 Free Software Foundation, Inc. + Copyright (C) 2007, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -41,7 -41,6 +41,7 @@@ /* Data parser for textual data like that read by DATA LIST. */ struct data_parser { + const struct dictionary *dict; /*Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ casenumber max_cases; /* Max number of cases to read. */ @@@ -80,7 -79,7 +80,7 @@@ static void set_any_sep (struct data_pa /* Creates and returns a new data parser. */ struct data_parser * -data_parser_create (void) +data_parser_create (const struct dictionary *dict) { struct data_parser *parser = xmalloc (sizeof *parser); @@@ -92,7 -91,6 +92,7 @@@ parser->fields = NULL; parser->field_cnt = 0; parser->field_allocated = 0; + parser->dict = dict; parser->span = true; parser->empty_line_has_field = false; @@@ -366,17 -364,14 +366,17 @@@ static bool parse_delimited_no_span (co static bool parse_fixed (const struct data_parser *, struct dfm_reader *, struct ccase *); -/* Reads a case from DFM into C, parsing it with PARSER. - Returns true if successful, false at end of file or on I/O error. */ +/* Reads a case from DFM into C, parsing it with PARSER. Returns + true if successful, false at end of file or on I/O error. + + Case C must not be shared. */ bool data_parser_parse (struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { bool retval; + assert (!case_is_shared (c)); assert (data_parser_any_fields (parser)); /* Skip the requested number of records before reading the @@@ -485,7 -480,8 +485,8 @@@ cut_field (const struct data_parser *pa /* Regular field. */ ss_get_chars (&p, ss_cspan (p, ds_ss (&parser->any_sep)), field); *last_column = dfm_column_start (reader); - if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p)) + if (!ss_ltrim (&p, parser->soft_seps) || ss_is_empty (p) + || ss_find_char (parser->hard_seps, p.string[0]) != SIZE_MAX) { /* Advance past a trailing hard separator, regardless of whether one actually existed. If @@@ -507,7 -503,7 +508,7 @@@ static boo parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct field *f; int row; @@@ -533,7 -529,6 +534,7 @@@ f->format.w), encoding, f->format.type, f->format.d, f->first_column, f->first_column + f->format.w, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); @@@ -550,7 -545,7 +551,7 @@@ static boo parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@@ -577,7 -572,6 +578,7 @@@ data_in (s, encoding, f->format.type, 0, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } @@@ -592,7 -586,7 +593,7 @@@ static boo parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; struct field *f; @@@ -618,7 -612,6 +619,7 @@@ data_in (s, encoding, f->format.type, 0, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } @@@ -652,7 -645,7 +653,7 @@@ dump_fixed_table (const struct data_par tab_text (t, 3, 0, TAB_CENTER | TAT_TITLE, _("Format")); tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 3, parser->field_cnt); tab_hline (t, TAL_2, 0, 3, 1); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); for (i = 0; i < parser->field_cnt; i++) { @@@ -661,9 -654,9 +662,9 @@@ int row = i + 1; tab_text (t, 0, row, TAB_LEFT, f->name); - tab_text (t, 1, row, TAT_PRINTF, "%d", f->record); - tab_text (t, 2, row, TAT_PRINTF, "%3d-%3d", - f->first_column, f->first_column + f->format.w - 1); + tab_text_format (t, 1, row, 0, "%d", f->record); + tab_text_format (t, 2, row, 0, "%3d-%3d", + f->first_column, f->first_column + f->format.w - 1); tab_text (t, 3, row, TAB_LEFT | TAB_FIX, fmt_to_string (&f->format, fmt_string)); } @@@ -691,7 -684,7 +692,7 @@@ dump_delimited_table (const struct data tab_text (t, 1, 0, TAB_CENTER | TAT_TITLE, _("Format")); tab_box (t, TAL_1, TAL_1, TAL_0, TAL_1, 0, 0, 1, parser->field_cnt); tab_hline (t, TAL_2, 0, 1, 1); - tab_dim (t, tab_natural_dimensions); + tab_dim (t, tab_natural_dimensions, NULL); for (i = 0; i < parser->field_cnt; i++) { @@@ -726,7 -719,7 +727,7 @@@ struct data_parser_casereade { struct data_parser *parser; /* Parser. */ struct dfm_reader *reader; /* Data file reader. */ - size_t value_cnt; /* Number of `union value's in case. */ + struct caseproto *proto; /* Format of cases. */ }; static const struct casereader_class data_parser_casereader_class; @@@ -747,25 -740,25 +748,25 @@@ data_parser_make_active_file (struct da r = xmalloc (sizeof *r); r->parser = parser; r->reader = reader; - r->value_cnt = dict_get_next_value_idx (dict); - casereader = casereader_create_sequential (NULL, r->value_cnt, + r->proto = caseproto_ref (dict_get_proto (dict)); + casereader = casereader_create_sequential (NULL, r->proto, CASENUMBER_MAX, &data_parser_casereader_class, r); proc_set_active_file (ds, casereader, dict); } -static bool -data_parser_casereader_read (struct casereader *reader UNUSED, void *r_, - struct ccase *c) +static struct ccase * +data_parser_casereader_read (struct casereader *reader UNUSED, void *r_) { struct data_parser_casereader *r = r_; - bool ok; - - case_create (c, r->value_cnt); - ok = data_parser_parse (r->parser, r->reader, c); - if (!ok) - case_destroy (c); - return ok; + struct ccase *c = case_create (r->proto); + if (data_parser_parse (r->parser, r->reader, c)) + return c; + else + { + case_unref (c); + return NULL; + } } static void @@@ -776,7 -769,6 +777,7 @@@ data_parser_casereader_destroy (struct casereader_force_error (reader); data_parser_destroy (r->parser); dfm_close_reader (r->reader); + caseproto_unref (r->proto); free (r); } diff --combined src/language/stats/t-test.q index 09a96cfc,c69a6cdb..d02cdb28 --- a/src/language/stats/t-test.q +++ b/src/language/stats/t-test.q @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -32,8 -32,6 +32,8 @@@ #include #include #include +#include +#include #include #include #include @@@ -48,7 -46,6 +48,7 @@@ #include #include "xalloc.h" +#include "xmemdup0.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@@ -57,721 -54,1000 +57,721 @@@ /* (specification) "T-TEST" (tts_): - +groups=custom; - testval=double; - +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); - +pairs=custom; - missing=miss:!analysis/listwise, - incl:include/!exclude; - +format=fmt:!labels/nolabels; - criteria=:cin(d:criteria,"%s > 0. && %s < 1."). + +groups=custom; + testval=double; + +variables=varlist("PV_NO_SCRATCH | PV_NUMERIC"); + +pairs=custom; + missing=miss:!analysis/listwise, + incl:include/!exclude; + +format=fmt:!labels/nolabels; + criteria=:cin(d:criteria,"%s > 0. && %s < 1."). */ /* (declarations) */ /* (functions) */ - -/* Variable for the GROUPS subcommand, if given. */ -static struct variable *indep_var; - enum comparison { - CMP_LE = -2, - CMP_EQ = 0, + CMP_LE, + CMP_EQ, }; -struct group_properties -{ - /* The comparison criterion */ - enum comparison criterion; - - /* The width of the independent variable */ - int indep_width ; - - union { - /* The value of the independent variable at which groups are determined to - belong to one group or the other */ - double critical_value; - - - /* The values of the independent variable for each group */ - union value g_value[2]; - } v ; - -}; - - -static struct group_properties gp ; - - - -/* PAIRS: Number of pairs to be compared ; each pair. */ -static int n_pairs = 0 ; +/* A pair of variables to be compared. */ struct pair -{ - /* The variables comprising the pair */ - const struct variable *v[2]; - - /* The number of valid variable pairs */ - double n; - - /* The sum of the members */ - double sum[2]; - - /* sum of squares of the members */ - double ssq[2]; - - /* Std deviation of the members */ - double std_dev[2]; - - - /* Sample Std deviation of the members */ - double s_std_dev[2]; - - /* The means of the members */ - double mean[2]; - - /* The correlation coefficient between the variables */ - double correlation; - - /* The sum of the differences */ - double sum_of_diffs; - - /* The sum of the products */ - double sum_of_prod; - - /* The mean of the differences */ - double mean_diff; - - /* The sum of the squares of the differences */ - double ssq_diffs; + { + const struct variable *v[2]; /* The paired variables. */ + double n; /* The number of valid variable pairs */ + double sum[2]; /* The sum of the members */ + double ssq[2]; /* sum of squares of the members */ + double std_dev[2]; /* Std deviation of the members */ + double s_std_dev[2]; /* Sample Std deviation of the members */ + double mean[2]; /* The means of the members */ + double correlation; /* Correlation coefficient between the variables. */ + double sum_of_diffs; /* The sum of the differences */ + double sum_of_prod; /* The sum of the products */ + double mean_diff; /* The mean of the differences */ + double ssq_diffs; /* The sum of the squares of the differences */ + double std_dev_diff; /* The std deviation of the differences */ + }; - /* The std deviation of the differences */ - double std_dev_diff; +/* Which mode was T-TEST invoked */ +enum t_test_mode { + T_1_SAMPLE, /* One-sample tests. */ + T_IND_SAMPLES, /* Independent-sample tests. */ + T_PAIRED /* Paired-sample tests. */ }; -static struct pair *pairs=0; - -static int parse_value (struct lexer *lexer, union value * v, enum val_type); - -/* Structures and Functions for the Statistics Summary Box */ -struct ssbox; -typedef void populate_ssbox_func (struct ssbox *ssb, - const struct dictionary *, - struct cmd_t_test *cmd); -typedef void finalize_ssbox_func (struct ssbox *ssb); +/* Total state of a T-TEST procedure. */ +struct t_test_proc + { + enum t_test_mode mode; /* Mode that T-TEST was invoked in. */ + double criteria; /* Confidence interval in (0, 1). */ + enum mv_class exclude; /* Classes of missing values to exclude. */ + bool listwise_missing; /* Drop whole case if one missing var? */ + struct fmt_spec weight_format; /* Format of weight variable. */ + + /* Dependent variables. */ + const struct variable **vars; + size_t n_vars; + + /* For mode == T_1_SAMPLE. */ + double testval; + + /* For mode == T_PAIRED only. */ + struct pair *pairs; + size_t n_pairs; + + /* For mode == T_IND_SAMPLES only. */ + struct variable *indep_var; /* Independent variable. */ + enum comparison criterion; /* Type of comparison. */ + double critical_value; /* CMP_LE only: Grouping threshold value. */ + union value g_value[2]; /* CMP_EQ only: Per-group indep var values. */ + }; +/* Statistics Summary Box */ struct ssbox -{ - struct tab_table *t; - - populate_ssbox_func *populate; - finalize_ssbox_func *finalize; - -}; - -/* Create a ssbox */ -void ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode); - -/* Populate a ssbox according to cmd */ -void ssbox_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd); - -/* Submit and destroy a ssbox */ -void ssbox_finalize (struct ssbox *ssb); - -/* A function to create, populate and submit the Paired Samples Correlation - box */ -static void pscbox (const struct dictionary *); + { + struct tab_table *t; + void (*populate) (struct ssbox *, struct t_test_proc *); + void (*finalize) (struct ssbox *); + }; +static void ssbox_create (struct ssbox *, struct t_test_proc *); +static void ssbox_populate (struct ssbox *, struct t_test_proc *); +static void ssbox_finalize (struct ssbox *); -/* Structures and Functions for the Test Results Box */ -struct trbox; +/* Paired Samples Correlation box */ +static void pscbox (struct t_test_proc *); -typedef void populate_trbox_func (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd); -typedef void finalize_trbox_func (struct trbox *trb); +/* Test Results Box. */ struct trbox { struct tab_table *t; - populate_trbox_func *populate; - finalize_trbox_func *finalize; -}; - -/* Create a trbox */ -void trbox_create (struct trbox *trb, struct cmd_t_test *cmd, int mode); - -/* Populate a ssbox according to cmd */ -static void trbox_populate (struct trbox *trb, const struct dictionary *dict, - struct cmd_t_test *cmd); - -/* Submit and destroy a ssbox */ -void trbox_finalize (struct trbox *trb); - -/* Which mode was T-TEST invoked */ -enum { - T_1_SAMPLE = 0 , - T_IND_SAMPLES, - T_PAIRED -}; - - -static int common_calc (const struct dictionary *dict, - const struct ccase *, void *, - enum mv_class); -static void common_precalc (struct cmd_t_test *); -static void common_postcalc (struct cmd_t_test *); - -static int one_sample_calc (const struct dictionary *dict, const struct ccase *, void *, enum mv_class); -static void one_sample_precalc (struct cmd_t_test *); -static void one_sample_postcalc (struct cmd_t_test *); - -static int paired_calc (const struct dictionary *dict, const struct ccase *, - struct cmd_t_test*, enum mv_class); -static void paired_precalc (struct cmd_t_test *); -static void paired_postcalc (struct cmd_t_test *); - -static void group_precalc (struct cmd_t_test *); -static int group_calc (const struct dictionary *dict, const struct ccase *, - struct cmd_t_test *, enum mv_class); -static void group_postcalc (struct cmd_t_test *); - - -static void calculate (struct cmd_t_test *, - struct casereader *, - const struct dataset *); - -static int mode; - -static struct cmd_t_test cmd; + void (*populate) (struct trbox *, struct t_test_proc *); + void (*finalize) (struct trbox *); + }; -static bool bad_weight_warn = false; +static void trbox_create (struct trbox *, struct t_test_proc *); +static void trbox_populate (struct trbox *, struct t_test_proc *); +static void trbox_finalize (struct trbox *); +static void calculate (struct t_test_proc *, struct casereader *, + const struct dataset *); static int compare_group_binary (const struct group_statistics *a, - const struct group_statistics *b, - const struct group_properties *p); - - -static unsigned hash_group_binary (const struct group_statistics *g, - const struct group_properties *p); - - + const struct group_statistics *b, + const struct t_test_proc *); +static unsigned hash_group_binary (const struct group_statistics *g, + const struct t_test_proc *p); int cmd_t_test (struct lexer *lexer, struct dataset *ds) { + struct cmd_t_test cmd; + struct t_test_proc proc; struct casegrouper *grouper; struct casereader *group; - bool ok; + struct variable *wv; + bool ok = false; - if ( !parse_t_test (lexer, ds, &cmd, NULL) ) - return CMD_FAILURE; + proc.pairs = NULL; + proc.n_pairs = 0; + proc.vars = NULL; + proc.indep_var = NULL; + if (!parse_t_test (lexer, ds, &cmd, &proc)) + goto parse_failed; - if (! cmd.sbc_criteria) - cmd.criteria=0.95; + wv = dict_get_weight (dataset_dict (ds)); + proc.weight_format = wv ? *var_get_print_format (wv) : F_8_0; - { - int m=0; - if (cmd.sbc_testval) ++m; - if (cmd.sbc_groups) ++m; - if (cmd.sbc_pairs) ++m; - - if ( m != 1) - { - msg (SE, - _ ("TESTVAL, GROUPS and PAIRS subcommands are mutually exclusive.") - ); - free_t_test (&cmd); - return CMD_FAILURE; - } - } - - if (cmd.sbc_testval) - mode=T_1_SAMPLE; - else if (cmd.sbc_groups) - mode=T_IND_SAMPLES; - else - mode=T_PAIRED; - - if ( mode == T_PAIRED) + if ((cmd.sbc_testval != 0) + (cmd.sbc_groups != 0) + (cmd.sbc_pairs != 0) + != 1) { - if (cmd.sbc_variables) - { - msg (SE, _ ("VARIABLES subcommand is not appropriate with PAIRS")); - free_t_test (&cmd); - return CMD_FAILURE; - } - else - { - /* Iterate through the pairs and put each variable that is a - member of a pair into cmd.v_variables */ + msg (SE, _("Exactly one of TESTVAL, GROUPS and PAIRS subcommands " + "must be specified.")); + goto done; + } - int i; - struct hsh_iterator hi; - struct const_hsh_table *hash; - const struct variable *v; + proc.mode = (cmd.sbc_testval ? T_1_SAMPLE + : cmd.sbc_groups ? T_IND_SAMPLES + : T_PAIRED); + proc.criteria = cmd.sbc_criteria ? cmd.criteria : 0.95; + proc.exclude = cmd.incl != TTS_INCLUDE ? MV_ANY : MV_SYSTEM; + proc.listwise_missing = cmd.miss == TTS_LISTWISE; - hash = const_hsh_create (n_pairs, compare_vars_by_name, hash_var_by_name, - 0, 0); + if (proc.mode == T_1_SAMPLE) + proc.testval = cmd.n_testval[0]; - for (i=0; i < n_pairs; ++i) - { - const_hsh_insert (hash, pairs[i].v[0]); - const_hsh_insert (hash, pairs[i].v[1]); - } + if (proc.mode == T_PAIRED) + { + size_t i, j; - assert (cmd.n_variables == 0); - cmd.n_variables = const_hsh_count (hash); - - cmd.v_variables = xnrealloc (cmd.v_variables, cmd.n_variables, - sizeof *cmd.v_variables); - /* Iterate through the hash */ - for (i=0,v = const_hsh_first (hash, &hi); - v != 0; - v = const_hsh_next (hash, &hi) ) - cmd.v_variables[i++]=v; - const_hsh_destroy (hash); + if (cmd.sbc_variables) + { + msg (SE, _("VARIABLES subcommand may not be used with PAIRS.")); + goto done; } + + /* Fill proc.vars with the unique variables from pairs. */ + proc.n_vars = proc.n_pairs * 2; + proc.vars = xmalloc (sizeof *proc.vars * proc.n_vars); + for (i = j = 0; i < proc.n_pairs; i++) + { + proc.vars[j++] = proc.pairs[i].v[0]; + proc.vars[j++] = proc.pairs[i].v[1]; + } + proc.n_vars = sort_unique (proc.vars, proc.n_vars, sizeof *proc.vars, + compare_var_ptrs_by_name, NULL); } - else if ( !cmd.sbc_variables) + else { - msg (SE, _ ("One or more VARIABLES must be specified.")); - free_t_test (&cmd); - return CMD_FAILURE; + if (!cmd.n_variables) + { + msg (SE, _("One or more VARIABLES must be specified.")); + goto done; + } + proc.n_vars = cmd.n_variables; + proc.vars = cmd.v_variables; + cmd.v_variables = NULL; } - bad_weight_warn = true; - /* Data pass. */ grouper = casegrouper_create_splits (proc_open (ds), dataset_dict (ds)); while (casegrouper_get_next_group (grouper, &group)) - calculate (&cmd, group, ds); + calculate (&proc, group, ds); ok = casegrouper_destroy (grouper); ok = proc_commit (ds) && ok; - n_pairs=0; - free (pairs); - pairs=0; - - if ( mode == T_IND_SAMPLES) + if (proc.mode == T_IND_SAMPLES) { int v; /* Destroy any group statistics we created */ - for (v = 0 ; v < cmd.n_variables ; ++v ) + for (v = 0; v < proc.n_vars; v++) { - struct group_proc *grpp = group_proc_get (cmd.v_variables[v]); + struct group_proc *grpp = group_proc_get (proc.vars[v]); hsh_destroy (grpp->group_hash); } } +done: free_t_test (&cmd); - return ok ? CMD_SUCCESS : CMD_CASCADING_FAILURE; +parse_failed: + if (proc.indep_var != NULL) + { + int width = var_get_width (proc.indep_var); + value_destroy (&proc.g_value[0], width); + value_destroy (&proc.g_value[1], width); + } + free (proc.vars); + free (proc.pairs); + return ok ? CMD_SUCCESS : CMD_FAILURE; } static int -tts_custom_groups (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, - void *aux UNUSED) +tts_custom_groups (struct lexer *lexer, struct dataset *ds, + struct cmd_t_test *cmd UNUSED, void *proc_) { - int n_group_values=0; + struct t_test_proc *proc = proc_; + int n_values; + int width; lex_match (lexer, '='); - indep_var = parse_variable (lexer, dataset_dict (ds)); - if (!indep_var) + proc->indep_var = parse_variable (lexer, dataset_dict (ds)); + if (proc->indep_var == NULL) { lex_error (lexer, "expecting variable name in GROUPS subcommand"); return 0; } - - if (var_is_long_string (indep_var)) - { - msg (SE, _ ("Long string variable %s is not valid here."), - var_get_name (indep_var)); - return 0; - } + width = var_get_width (proc->indep_var); + value_init (&proc->g_value[0], width); + value_init (&proc->g_value[1], width); if (!lex_match (lexer, '(')) + n_values = 0; + else { - if (var_is_numeric (indep_var)) - { - gp.v.g_value[0].f = 1; - gp.v.g_value[1].f = 2; - - gp.criterion = CMP_EQ; - - n_group_values = 2; - - return 1; - } + if (!parse_value (lexer, &proc->g_value[0], width)) + return 0; + lex_match (lexer, ','); + if (lex_match (lexer, ')')) + n_values = 1; else - { - msg (SE, _ ("When applying GROUPS to a string variable, two " - "values must be specified.")); - return 0; - } + { + if (!parse_value (lexer, &proc->g_value[1], width) + || !lex_force_match (lexer, ')')) + return 0; + n_values = 2; + } } - if (!parse_value (lexer, &gp.v.g_value[0], var_get_type (indep_var))) - return 0; - - lex_match (lexer, ','); - if (lex_match (lexer, ')')) + if (var_is_numeric (proc->indep_var)) { - if (var_is_alpha (indep_var)) + proc->criterion = n_values == 1 ? CMP_LE : CMP_EQ; + if (n_values == 1) + proc->critical_value = proc->g_value[0].f; + else if (n_values == 0) { - msg (SE, _ ("When applying GROUPS to a string variable, two " - "values must be specified.")); - return 0; + proc->g_value[0].f = 1; + proc->g_value[1].f = 2; } - gp.criterion = CMP_LE; - gp.v.critical_value = gp.v.g_value[0].f; - - n_group_values = 1; - return 1; } - - if (!parse_value (lexer, &gp.v.g_value[1], var_get_type (indep_var))) - return 0; - - n_group_values = 2; - if (!lex_force_match (lexer, ')')) - return 0; - - if ( n_group_values == 2 ) - gp.criterion = CMP_EQ ; else - gp.criterion = CMP_LE ; - - - if ( var_is_alpha (indep_var)) { - buf_copy_rpad (gp.v.g_value [0].s, var_get_width (indep_var), - gp.v.g_value [0].s, strlen (gp.v.g_value[0].s)); - - buf_copy_rpad (gp.v.g_value [1].s, var_get_width (indep_var), - gp.v.g_value [1].s, strlen (gp.v.g_value[1].s)); + proc->criterion = CMP_EQ; + if (n_values != 2) + { + msg (SE, _("When applying GROUPS to a string variable, two " + "values must be specified.")); + return 0; + } } - return 1; } +static void +add_pair (struct t_test_proc *proc, + const struct variable *v0, const struct variable *v1) +{ + struct pair *p = &proc->pairs[proc->n_pairs++]; + p->v[0] = v0; + p->v[1] = v1; +} static int -tts_custom_pairs (struct lexer *lexer, struct dataset *ds, struct cmd_t_test *cmd UNUSED, void *aux UNUSED) +tts_custom_pairs (struct lexer *lexer, struct dataset *ds, + struct cmd_t_test *cmd UNUSED, void *proc_) { - const struct variable **vars; - size_t n_vars; - size_t n_pairs_local; - - size_t n_before_WITH; - size_t n_after_WITH = SIZE_MAX; - int paired ; /* Was the PAIRED keyword given ? */ - - lex_match (lexer, '='); + struct t_test_proc *proc = proc_; - n_vars=0; - if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars, - PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) - { - free (vars); - return 0; - } - assert (n_vars); - - n_before_WITH = 0; - if (lex_match (lexer, T_WITH)) - { - n_before_WITH = n_vars; - if (!parse_variables_const (lexer, dataset_dict (ds), &vars, &n_vars, - PV_DUPLICATE | PV_APPEND - | PV_NUMERIC | PV_NO_SCRATCH)) - { - free (vars); - return 0; - } - n_after_WITH = n_vars - n_before_WITH; - } + const struct variable **vars1 = NULL; + size_t n_vars1 = 0; - paired = (lex_match (lexer, '(') && lex_match_id (lexer, "PAIRED") && lex_match (lexer, ')')); + const struct variable **vars2 = NULL; + size_t n_vars2 = 0; - /* Determine the number of pairs needed */ - if (paired) - { - if (n_before_WITH != n_after_WITH) - { - free (vars); - msg (SE, _ ("PAIRED was specified but the number of variables " - "preceding WITH (%zu) did not match the number " - "following (%zu)."), - n_before_WITH, n_after_WITH); - return 0; - } - n_pairs_local = n_before_WITH; - } - else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ - { - n_pairs_local = n_before_WITH * n_after_WITH ; - } - else /* Neither WITH nor PAIRED keyword given */ - { - if (n_vars < 2) - { - free (vars); - msg (SE, _ ("At least two variables must be specified " - "on PAIRS.")); - return 0; - } - - /* how many ways can you pick 2 from n_vars ? */ - n_pairs_local = n_vars * (n_vars - 1) / 2; - } + bool paired = false; + size_t n_total_pairs; + size_t i, j; - /* Allocate storage for the pairs */ - pairs = xnrealloc (pairs, n_pairs + n_pairs_local, sizeof *pairs); + lex_match (lexer, '='); - /* Populate the pairs with the appropriate variables */ - if ( paired ) - { - int i; + if (!parse_variables_const (lexer, dataset_dict (ds), &vars1, &n_vars1, + PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) + return 0; - assert (n_pairs_local == n_vars / 2); - for (i = 0; i < n_pairs_local; ++i) - { - pairs[i].v[n_pairs] = vars[i]; - pairs[i].v[n_pairs + 1] = vars[i + n_pairs_local]; - } - } - else if (n_before_WITH > 0) /* WITH keyword given, but not PAIRED keyword */ + if (lex_match (lexer, T_WITH)) { - int i,j; - size_t p = n_pairs; - - for (i=0 ; i < n_before_WITH ; ++i ) - { - for (j=0 ; j < n_after_WITH ; ++j) - { - pairs[p].v[0] = vars[i]; - pairs[p].v[1] = vars[j+n_before_WITH]; - ++p; - } - } + if (!parse_variables_const (lexer, dataset_dict (ds), &vars2, &n_vars2, + PV_DUPLICATE | PV_NUMERIC | PV_NO_SCRATCH)) + { + free (vars1); + return 0; + } + + if (lex_match (lexer, '(') + && lex_match_id (lexer, "PAIRED") + && lex_match (lexer, ')')) + { + paired = true; + if (n_vars1 != n_vars2) + { + msg (SE, _("PAIRED was specified but the number of variables " + "preceding WITH (%zu) did not match the number " + "following (%zu)."), + n_vars1, n_vars2); + free (vars1); + free (vars2); + return 0; + } + } } - else /* Neither WITH nor PAIRED given */ + else { - size_t i,j; - size_t p=n_pairs; - - for (i=0 ; i < n_vars ; ++i ) + if (n_vars1 < 2) { - for (j=i+1 ; j < n_vars ; ++j) - { - pairs[p].v[0] = vars[i]; - pairs[p].v[1] = vars[j]; - ++p; - } + free (vars1); + msg (SE, _("At least two variables must be specified on PAIRS.")); + return 0; } } - n_pairs+=n_pairs_local; + /* Allocate storage for the new pairs. */ + n_total_pairs = proc->n_pairs + (paired ? n_vars1 + : n_vars2 > 0 ? n_vars1 * n_vars2 + : n_vars1 * (n_vars1 - 1) / 2); + proc->pairs = xnrealloc (proc->pairs, n_total_pairs, sizeof *proc->pairs); - free (vars); - return 1; -} - -/* Parses the current token (numeric or string, depending on type) - value v and returns success. */ -static int -parse_value (struct lexer *lexer, union value * v, enum val_type type) -{ - if (type == VAL_NUMERIC) - { - if (!lex_force_num (lexer)) - return 0; - v->f = lex_tokval (lexer); - } + /* Populate the pairs with the appropriate variables. */ + if (paired) + for (i = 0; i < n_vars1; i++) + add_pair (proc, vars1[i], vars2[i]); + else if (n_vars2 > 0) + for (i = 0; i < n_vars1; i++) + for (j = 0; j < n_vars2; j++) + add_pair (proc, vars1[i], vars2[j]); else - { - if (!lex_force_string (lexer)) - return 0; - memset (v->s, ' ', MAX_SHORT_STRING); - strncpy (v->s, ds_cstr (lex_tokstr (lexer)), ds_length (lex_tokstr (lexer))); - } - - lex_get (lexer); + for (i = 0; i < n_vars1; i++) + for (j = i + 1; j < n_vars1; j++) + add_pair (proc, vars1[i], vars1[j]); + assert (proc->n_pairs == n_total_pairs); + free (vars1); + free (vars2); return 1; } + +/* Implementation of the SSBOX object. */ +static void ssbox_base_init (struct ssbox *, int cols, int rows); +static void ssbox_base_finalize (struct ssbox *); +static void ssbox_one_sample_init (struct ssbox *, struct t_test_proc *); +static void ssbox_independent_samples_init (struct ssbox *, struct t_test_proc *); +static void ssbox_paired_init (struct ssbox *, struct t_test_proc *); -/* Implementation of the SSBOX object */ - -void ssbox_base_init (struct ssbox *this, int cols,int rows); - -void ssbox_base_finalize (struct ssbox *ssb); - -void ssbox_one_sample_init (struct ssbox *this, - struct cmd_t_test *cmd ); - -void ssbox_independent_samples_init (struct ssbox *this, - struct cmd_t_test *cmd); - -void ssbox_paired_init (struct ssbox *this, - struct cmd_t_test *cmd); - - -/* Factory to create an ssbox */ -void -ssbox_create (struct ssbox *ssb, struct cmd_t_test *cmd, int mode) +/* Factory to create an ssbox. */ +static void +ssbox_create (struct ssbox *ssb, struct t_test_proc *proc) { - switch (mode) - { - case T_1_SAMPLE: - ssbox_one_sample_init (ssb,cmd); - break; - case T_IND_SAMPLES: - ssbox_independent_samples_init (ssb,cmd); - break; - case T_PAIRED: - ssbox_paired_init (ssb,cmd); - break; - default: - NOT_REACHED (); - } + switch (proc->mode) + { + case T_1_SAMPLE: + ssbox_one_sample_init (ssb, proc); + break; + case T_IND_SAMPLES: + ssbox_independent_samples_init (ssb, proc); + break; + case T_PAIRED: + ssbox_paired_init (ssb, proc); + break; + default: + NOT_REACHED (); + } } - - /* Despatcher for the populate method */ -void -ssbox_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd) +static void +ssbox_populate (struct ssbox *ssb, struct t_test_proc *proc) { - ssb->populate (ssb, dict, cmd); + ssb->populate (ssb, proc); } - /* Despatcher for finalize */ -void +static void ssbox_finalize (struct ssbox *ssb) { ssb->finalize (ssb); } - /* Submit the box and clear up */ -void +static void ssbox_base_finalize (struct ssbox *ssb) { tab_submit (ssb->t); } - - /* Initialize a ssbox struct */ -void -ssbox_base_init (struct ssbox *this, int cols,int rows) +static void +ssbox_base_init (struct ssbox *this, int cols, int rows) { this->finalize = ssbox_base_finalize; this->t = tab_create (cols, rows, 0); tab_columns (this->t, SOM_COL_DOWN, 1); - tab_headers (this->t,0,0,1,0); - tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 ); - tab_hline (this->t, TAL_2,0,cols-1,1); - tab_dim (this->t, tab_natural_dimensions); + tab_headers (this->t, 0, 0, 1, 0); + tab_box (this->t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); + tab_hline (this->t, TAL_2, 0, cols- 1, 1); + tab_dim (this->t, tab_natural_dimensions, NULL); } + +/* ssbox implementations. */ -void ssbox_one_sample_populate (struct ssbox *ssb, - const struct dictionary *, - struct cmd_t_test *cmd); +static void ssbox_one_sample_populate (struct ssbox *, struct t_test_proc *); +static void ssbox_independent_samples_populate (struct ssbox *, + struct t_test_proc *); +static void ssbox_paired_populate (struct ssbox *, struct t_test_proc *); /* Initialize the one_sample ssbox */ -void -ssbox_one_sample_init (struct ssbox *this, - struct cmd_t_test *cmd ) +static void +ssbox_one_sample_init (struct ssbox *this, struct t_test_proc *proc) { - const int hsize=5; - const int vsize=cmd->n_variables+1; + const int hsize = 5; + const int vsize = proc->n_vars + 1; this->populate = ssbox_one_sample_populate; - ssbox_base_init (this, hsize,vsize); - tab_title (this->t, _ ("One-Sample Statistics")); - tab_vline (this->t, TAL_2, 1,0,vsize - 1); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); + ssbox_base_init (this, hsize, vsize); + tab_title (this->t, _("One-Sample Statistics")); + tab_vline (this->t, TAL_2, 1, 0, vsize - 1); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } -static void ssbox_independent_samples_populate (struct ssbox *ssb, - const struct dictionary *, - struct cmd_t_test *cmd); - /* Initialize the independent samples ssbox */ -void -ssbox_independent_samples_init (struct ssbox *this, - struct cmd_t_test *cmd) +static void +ssbox_independent_samples_init (struct ssbox *this, struct t_test_proc *proc) { int hsize=6; - int vsize = cmd->n_variables*2 +1; + int vsize = proc->n_vars * 2 + 1; this->populate = ssbox_independent_samples_populate; - ssbox_base_init (this, hsize,vsize); - tab_vline (this->t, TAL_GAP, 1, 0,vsize - 1); - tab_title (this->t, _ ("Group Statistics")); - tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, var_get_name (indep_var)); - tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); + ssbox_base_init (this, hsize, vsize); + tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1); + tab_title (this->t, _("Group Statistics")); + tab_text (this->t, 1, 0, TAB_CENTER | TAT_TITLE, + var_get_name (proc->indep_var)); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } - /* Populate the ssbox for independent samples */ static void ssbox_independent_samples_populate (struct ssbox *ssb, - const struct dictionary *dict, - struct cmd_t_test *cmd) + struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - char *val_lab[2] = {NULL, NULL}; + char *val_lab[2]; double indep_value[2]; - char prefix[2][3]={"",""}; + char prefix[2][3]; - if ( var_is_numeric (indep_var) ) + for (i = 0; i < 2; i++) { - const char *s; + union value *value = &proc->g_value[i]; + int width = var_get_width (proc->indep_var); - s = var_lookup_value_label (indep_var, &gp.v.g_value[0]); - val_lab[0] = s ? strdup (s) : NULL; + indep_value[i] = (proc->criterion == CMP_LE ? proc->critical_value + : value->f); - s = var_lookup_value_label (indep_var, &gp.v.g_value[1]); - val_lab[1] = s ? strdup (s) : NULL; - } - else - { - val_lab[0] = calloc (sizeof (char), MAX_SHORT_STRING + 1); - val_lab[1] = calloc (sizeof (char), MAX_SHORT_STRING + 1); - memcpy (val_lab[0], gp.v.g_value[0].s, MAX_SHORT_STRING); - memcpy (val_lab[1], gp.v.g_value[1].s, MAX_SHORT_STRING); + if (val_type_from_width (width) == VAL_NUMERIC) + { + const char *s = var_lookup_value_label (proc->indep_var, value); + val_lab[i] = s ? xstrdup (s) : xasprintf ("%g", indep_value[i]); + } + else + val_lab[i] = xmemdup0 (value_str (value, width), width); } - if (gp.criterion == CMP_LE ) + if (proc->criterion == CMP_LE) { - strcpy (prefix[0],">="); - strcpy (prefix[1],"<"); - indep_value[0] = gp.v.critical_value; - indep_value[1] = gp.v.critical_value; + strcpy (prefix[0], ">="); + strcpy (prefix[1], "<"); } else { - indep_value[0] = gp.v.g_value[0].f; - indep_value[1] = gp.v.g_value[1].f; + strcpy (prefix[0], ""); + strcpy (prefix[1], ""); } - assert (ssb->t); - - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { - const struct variable *var = cmd->v_variables[i]; + const struct variable *var = proc->vars[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; int count=0; - tab_text (ssb->t, 0, i*2+1, TAB_LEFT, - var_get_name (cmd->v_variables[i])); - - if (val_lab[0]) - tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, - "%s%s", prefix[0], val_lab[0]); - else - tab_text (ssb->t, 1, i*2+1, TAB_LEFT | TAT_PRINTF, - "%s%g", prefix[0], indep_value[0]); - - - if (val_lab[1]) - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, - "%s%s", prefix[1], val_lab[1]); - else - tab_text (ssb->t, 1, i*2+1+1, TAB_LEFT | TAT_PRINTF, - "%s%g", prefix[1], indep_value[1]); - + tab_text (ssb->t, 0, i * 2 + 1, TAB_LEFT, + var_get_name (proc->vars[i])); + tab_text_format (ssb->t, 1, i * 2 + 1, TAB_LEFT, + "%s%s", prefix[0], val_lab[0]); + tab_text_format (ssb->t, 1, i * 2 + 1+ 1, TAB_LEFT, + "%s%s", prefix[1], val_lab[1]); /* Fill in the group statistics */ - for ( count = 0 ; count < 2 ; ++count ) + for (count = 0; count < 2; count++) { union value search_val; - struct group_statistics *gs; - if ( gp.criterion == CMP_LE ) - { - if ( count == 0 ) - { - /* >= case */ - search_val.f = gp.v.critical_value + 1.0; - } - else - { - /* less than ( < ) case */ - search_val.f = gp.v.critical_value - 1.0; - } - } + if (proc->criterion == CMP_LE) + search_val.f = proc->critical_value + (count == 0 ? 1.0 : -1.0); else - { - search_val = gp.v.g_value[count]; - } + search_val = proc->g_value[count]; - gs = hsh_find (grp_hash, (void *) &search_val); + gs = hsh_find (grp_hash, &search_val); assert (gs); - tab_double (ssb->t, 2, i*2+count+1, TAB_RIGHT, gs->n, wfmt); - tab_double (ssb->t, 3, i*2+count+1, TAB_RIGHT, gs->mean, NULL); - tab_double (ssb->t, 4, i*2+count+1, TAB_RIGHT, gs->std_dev, NULL); - tab_double (ssb->t, 5, i*2+count+1, TAB_RIGHT, gs->se_mean, NULL); + tab_double (ssb->t, 2, i * 2 + count+ 1, TAB_RIGHT, gs->n, + &proc->weight_format); + tab_double (ssb->t, 3, i * 2 + count+ 1, TAB_RIGHT, gs->mean, NULL); + tab_double (ssb->t, 4, i * 2 + count+ 1, TAB_RIGHT, gs->std_dev, + NULL); + tab_double (ssb->t, 5, i * 2 + count+ 1, TAB_RIGHT, gs->se_mean, + NULL); } } free (val_lab[0]); free (val_lab[1]); } - -static void ssbox_paired_populate (struct ssbox *ssb, - const struct dictionary *dict, - struct cmd_t_test *cmd); - /* Initialize the paired values ssbox */ -void -ssbox_paired_init (struct ssbox *this, struct cmd_t_test *cmd UNUSED) +static void +ssbox_paired_init (struct ssbox *this, struct t_test_proc *proc) { - int hsize=6; - - int vsize = n_pairs*2+1; + int hsize = 6; + int vsize = proc->n_pairs * 2 + 1; this->populate = ssbox_paired_populate; - ssbox_base_init (this, hsize,vsize); - tab_title (this->t, _ ("Paired Sample Statistics")); - tab_vline (this->t,TAL_GAP,1,0,vsize-1); - tab_vline (this->t,TAL_2,2,0,vsize-1); - tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _ ("SE. Mean")); + ssbox_base_init (this, hsize, vsize); + tab_title (this->t, _("Paired Sample Statistics")); + tab_vline (this->t, TAL_GAP, 1, 0, vsize - 1); + tab_vline (this->t, TAL_2, 2, 0, vsize - 1); + tab_text (this->t, 2, 0, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (this->t, 3, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (this->t, 4, 0, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (this->t, 5, 0, TAB_CENTER | TAT_TITLE, _("SE. Mean")); } - /* Populate the ssbox for paired values */ -void -ssbox_paired_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd UNUSED) +static void +ssbox_paired_populate (struct ssbox *ssb, struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - assert (ssb->t); - - for (i=0; i < n_pairs; ++i) + for (i = 0; i < proc->n_pairs; i++) { + struct pair *p = &proc->pairs[i]; int j; - tab_text (ssb->t, 0, i*2+1, TAB_LEFT | TAT_PRINTF , _ ("Pair %d"),i); - - for (j=0 ; j < 2 ; ++j) + tab_text_format (ssb->t, 0, i * 2 + 1, TAB_LEFT, _("Pair %d"), i); + for (j=0; j < 2; j++) { - struct group_statistics *gs; - - gs = &group_proc_get (pairs[i].v[j])->ugs; - /* Titles */ - - tab_text (ssb->t, 1, i*2+j+1, TAB_LEFT, - var_get_name (pairs[i].v[j])); + tab_text (ssb->t, 1, i * 2 + j + 1, TAB_LEFT, + var_get_name (p->v[j])); /* Values */ - tab_double (ssb->t,2, i*2+j+1, TAB_RIGHT, pairs[i].mean[j], NULL); - tab_double (ssb->t,3, i*2+j+1, TAB_RIGHT, pairs[i].n, wfmt); - tab_double (ssb->t,4, i*2+j+1, TAB_RIGHT, pairs[i].std_dev[j], NULL); - tab_double (ssb->t,5, i*2+j+1, TAB_RIGHT, - pairs[i].std_dev[j]/sqrt (pairs[i].n), NULL); - + tab_double (ssb->t, 2, i * 2 + j + 1, TAB_RIGHT, p->mean[j], NULL); + tab_double (ssb->t, 3, i * 2 + j + 1, TAB_RIGHT, p->n, + &proc->weight_format); + tab_double (ssb->t, 4, i * 2 + j + 1, TAB_RIGHT, p->std_dev[j], + NULL); + tab_double (ssb->t, 5, i * 2 + j + 1, TAB_RIGHT, + p->std_dev[j] /sqrt (p->n), NULL); } } } /* Populate the one sample ssbox */ -void -ssbox_one_sample_populate (struct ssbox *ssb, const struct dictionary *dict, - struct cmd_t_test *cmd) +static void +ssbox_one_sample_populate (struct ssbox *ssb, struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - assert (ssb->t); - - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; - tab_text (ssb->t, 0, i+1, TAB_LEFT, var_get_name (cmd->v_variables[i])); - tab_double (ssb->t,1, i+1, TAB_RIGHT, gs->n, wfmt); - tab_double (ssb->t,2, i+1, TAB_RIGHT, gs->mean, NULL); - tab_double (ssb->t,3, i+1, TAB_RIGHT, gs->std_dev, NULL); - tab_double (ssb->t,4, i+1, TAB_RIGHT, gs->se_mean, NULL); + tab_text (ssb->t, 0, i + 1, TAB_LEFT, var_get_name (proc->vars[i])); + tab_double (ssb->t, 1, i + 1, TAB_RIGHT, gs->n, &proc->weight_format); + tab_double (ssb->t, 2, i + 1, TAB_RIGHT, gs->mean, NULL); + tab_double (ssb->t, 3, i + 1, TAB_RIGHT, gs->std_dev, NULL); + tab_double (ssb->t, 4, i + 1, TAB_RIGHT, gs->se_mean, NULL); } } - - - + /* Implementation of the Test Results box struct */ -void trbox_base_init (struct trbox *self,size_t n_vars, int cols); -void trbox_base_finalize (struct trbox *trb); - -void trbox_independent_samples_init (struct trbox *trb, - struct cmd_t_test *cmd ); - -static void trbox_independent_samples_populate (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd); - -void trbox_one_sample_init (struct trbox *self, - struct cmd_t_test *cmd ); - -static void trbox_one_sample_populate (struct trbox *trb, - const struct dictionary *, - struct cmd_t_test *cmd); - -void trbox_paired_init (struct trbox *self, - struct cmd_t_test *cmd ); - -static void trbox_paired_populate (struct trbox *trb, - const struct dictionary *, - struct cmd_t_test *cmd); - - +static void trbox_base_init (struct trbox *, size_t n_vars, int cols); +static void trbox_base_finalize (struct trbox *); +static void trbox_independent_samples_init (struct trbox *, + struct t_test_proc *); +static void trbox_independent_samples_populate (struct trbox *, + struct t_test_proc *); +static void trbox_one_sample_init (struct trbox *, struct t_test_proc *); +static void trbox_one_sample_populate (struct trbox *, struct t_test_proc *); +static void trbox_paired_init (struct trbox *, struct t_test_proc *); +static void trbox_paired_populate (struct trbox *, struct t_test_proc *); /* Create a trbox according to mode*/ -void -trbox_create (struct trbox *trb, - struct cmd_t_test *cmd, int mode) +static void +trbox_create (struct trbox *trb, struct t_test_proc *proc) { - switch (mode) - { - case T_1_SAMPLE: - trbox_one_sample_init (trb,cmd); - break; - case T_IND_SAMPLES: - trbox_independent_samples_init (trb,cmd); - break; - case T_PAIRED: - trbox_paired_init (trb,cmd); - break; - default: - NOT_REACHED (); - } + switch (proc->mode) + { + case T_1_SAMPLE: + trbox_one_sample_init (trb, proc); + break; + case T_IND_SAMPLES: + trbox_independent_samples_init (trb, proc); + break; + case T_PAIRED: + trbox_paired_init (trb, proc); + break; + default: + NOT_REACHED (); + } } -/* Populate a trbox according to cmd */ +/* Populate a trbox according to proc */ static void -trbox_populate (struct trbox *trb, const struct dictionary *dict, - struct cmd_t_test *cmd) +trbox_populate (struct trbox *trb, struct t_test_proc *proc) { - trb->populate (trb, dict, cmd); + trb->populate (trb, proc); } /* Submit and destroy a trbox */ -void +static void trbox_finalize (struct trbox *trb) { trb->finalize (trb); } /* Initialize the independent samples trbox */ -void +static void trbox_independent_samples_init (struct trbox *self, - struct cmd_t_test *cmd UNUSED) + struct t_test_proc *proc) { - const int hsize=11; - const int vsize=cmd->n_variables*2+3; + const int hsize = 11; + const int vsize = proc->n_vars * 2 + 3; assert (self); self->populate = trbox_independent_samples_populate; - trbox_base_init (self,cmd->n_variables*2,hsize); - tab_title (self->t,_ ("Independent Samples Test")); - tab_hline (self->t,TAL_1,2,hsize-1,1); - tab_vline (self->t,TAL_2,2,0,vsize-1); - tab_vline (self->t,TAL_1,4,0,vsize-1); - tab_box (self->t,-1,-1,-1,TAL_1, 2,1,hsize-2,vsize-1); - tab_hline (self->t,TAL_1, hsize-2,hsize-1,2); - tab_box (self->t,-1,-1,-1,TAL_1, hsize-2,2,hsize-1,vsize-1); + trbox_base_init (self, proc->n_vars * 2, hsize); + tab_title (self->t, _("Independent Samples Test")); + tab_hline (self->t, TAL_1, 2, hsize - 1, 1); + tab_vline (self->t, TAL_2, 2, 0, vsize - 1); + tab_vline (self->t, TAL_1, 4, 0, vsize - 1); + tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, hsize - 2, vsize - 1); + tab_hline (self->t, TAL_1, hsize - 2, hsize - 1, 2); + tab_box (self->t, -1, -1, -1, TAL_1, hsize - 2, 2, hsize - 1, vsize - 1); tab_joint_text (self->t, 2, 0, 3, 0, - TAB_CENTER,_ ("Levene's Test for Equality of Variances")); - tab_joint_text (self->t, 4,0,hsize-1,0, - TAB_CENTER,_ ("t-test for Equality of Means")); - - tab_text (self->t,2,2, TAB_CENTER | TAT_TITLE,_ ("F")); - tab_text (self->t,3,2, TAB_CENTER | TAT_TITLE,_ ("Sig.")); - tab_text (self->t,4,2, TAB_CENTER | TAT_TITLE,_ ("t")); - tab_text (self->t,5,2, TAB_CENTER | TAT_TITLE,_ ("df")); - tab_text (self->t,6,2, TAB_CENTER | TAT_TITLE,_ ("Sig. (2-tailed)")); - tab_text (self->t,7,2, TAB_CENTER | TAT_TITLE,_ ("Mean Difference")); - tab_text (self->t,8,2, TAB_CENTER | TAT_TITLE,_ ("Std. Error Difference")); - tab_text (self->t,9,2, TAB_CENTER | TAT_TITLE,_ ("Lower")); - tab_text (self->t,10,2, TAB_CENTER | TAT_TITLE,_ ("Upper")); - - tab_joint_text (self->t, 9, 1, 10, 1, TAB_CENTER | TAT_PRINTF, - _ ("%g%% Confidence Interval of the Difference"), - cmd->criteria*100.0); - + TAB_CENTER, _("Levene's Test for Equality of Variances")); + tab_joint_text (self->t, 4, 0, hsize- 1, 0, + TAB_CENTER, _("t-test for Equality of Means")); + + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("F")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig.")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); + tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference")); + tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Difference")); + tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 10, 2, TAB_CENTER | TAT_TITLE, _("Upper")); + + tab_joint_text_format (self->t, 9, 1, 10, 1, TAB_CENTER, + _("%g%% Confidence Interval of the Difference"), + proc->criteria * 100.0); } /* Populate the independent samples trbox */ static void trbox_independent_samples_populate (struct trbox *self, - const struct dictionary *dict UNUSED, - struct cmd_t_test *cmd) + struct t_test_proc *proc) { int i; - assert (self); - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { - double p,q; + double p, q; double t; double df; @@@ -782,738 -1058,889 +782,738 @@@ double std_err_diff; double mean_diff; - const struct variable *var = cmd->v_variables[i]; + double se2; + + const struct variable *var = proc->vars[i]; struct group_proc *grp_data = group_proc_get (var); struct hsh_table *grp_hash = grp_data->group_hash; - struct group_statistics *gs0 ; - struct group_statistics *gs1 ; + struct group_statistics *gs0; + struct group_statistics *gs1; union value search_val; - if ( gp.criterion == CMP_LE ) - search_val.f = gp.v.critical_value - 1.0; + if (proc->criterion == CMP_LE) + search_val.f = proc->critical_value - 1.0; else - search_val = gp.v.g_value[0]; + search_val = proc->g_value[0]; - gs0 = hsh_find (grp_hash, (void *) &search_val); + gs0 = hsh_find (grp_hash, &search_val); assert (gs0); - if ( gp.criterion == CMP_LE ) - search_val.f = gp.v.critical_value + 1.0; + if (proc->criterion == CMP_LE) + search_val.f = proc->critical_value + 1.0; else - search_val = gp.v.g_value[1]; + search_val = proc->g_value[1]; - gs1 = hsh_find (grp_hash, (void *) &search_val); + gs1 = hsh_find (grp_hash, &search_val); assert (gs1); - tab_text (self->t, 0, i*2+3, TAB_LEFT, var_get_name (cmd->v_variables[i])); - - tab_text (self->t, 1, i*2+3, TAB_LEFT, _ ("Equal variances assumed")); - - - tab_double (self->t, 2, i*2+3, TAB_CENTER, grp_data->levene, NULL); + tab_text (self->t, 0, i * 2 + 3, TAB_LEFT, var_get_name (proc->vars[i])); + tab_text (self->t, 1, i * 2 + 3, TAB_LEFT, _("Equal variances assumed")); + tab_double (self->t, 2, i * 2 + 3, TAB_CENTER, grp_data->levene, NULL); /* Now work out the significance of the Levene test */ - df1 = 1; df2 = grp_data->ugs.n - 2; + df1 = 1; + df2 = grp_data->ugs.n - 2; q = gsl_cdf_fdist_Q (grp_data->levene, df1, df2); + tab_double (self->t, 3, i * 2 + 3, TAB_CENTER, q, NULL); - tab_double (self->t, 3, i*2+3, TAB_CENTER, q, NULL); + df = gs0->n + gs1->n - 2.0; + tab_double (self->t, 5, i * 2 + 3, TAB_RIGHT, df, NULL); - df = gs0->n + gs1->n - 2.0 ; - tab_double (self->t, 5, i*2+3, TAB_RIGHT, df, NULL); + pooled_variance = (gs0->n * pow2 (gs0->s_std_dev) + + gs1->n *pow2 (gs1->s_std_dev)) / df ; - pooled_variance = ( (gs0->n )*pow2 (gs0->s_std_dev) - + - (gs1->n )*pow2 (gs1->s_std_dev) - ) / df ; + t = (gs0->mean - gs1->mean) / sqrt (pooled_variance); + t /= sqrt ((gs0->n + gs1->n) / (gs0->n * gs1->n)); - t = (gs0->mean - gs1->mean) / sqrt (pooled_variance) ; - t /= sqrt ((gs0->n + gs1->n)/ (gs0->n*gs1->n)); - - tab_double (self->t, 4, i*2+3, TAB_RIGHT, t, NULL); + tab_double (self->t, 4, i * 2 + 3, TAB_RIGHT, t, NULL); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); - tab_double (self->t, 6, i*2+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); + tab_double (self->t, 6, i * 2 + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), + NULL); mean_diff = gs0->mean - gs1->mean; - tab_double (self->t, 7, i*2+3, TAB_RIGHT, mean_diff, NULL); - + tab_double (self->t, 7, i * 2 + 3, TAB_RIGHT, mean_diff, NULL); - std_err_diff = sqrt ( pow2 (gs0->se_mean) + pow2 (gs1->se_mean)); - tab_double (self->t, 8, i*2+3, TAB_RIGHT, std_err_diff, NULL); + std_err_diff = sqrt (pow2 (gs0->se_mean) + pow2 (gs1->se_mean)); + tab_double (self->t, 8, i * 2 + 3, TAB_RIGHT, std_err_diff, NULL); /* Now work out the confidence interval */ - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria)/2.0; /* 2-tailed test */ - t = gsl_cdf_tdist_Qinv (q,df); - tab_double (self->t, 9, i*2+3, TAB_RIGHT, - mean_diff - t * std_err_diff, NULL); + t = gsl_cdf_tdist_Qinv (q, df); + tab_double (self->t, 9, i * 2 + 3, TAB_RIGHT, + mean_diff - t * std_err_diff, NULL); - tab_double (self->t, 10, i*2+3, TAB_RIGHT, - mean_diff + t * std_err_diff, NULL); + tab_double (self->t, 10, i * 2 + 3, TAB_RIGHT, + mean_diff + t * std_err_diff, NULL); - { - double se2; /* Now for the \sigma_1 != \sigma_2 case */ - tab_text (self->t, 1, i*2+3+1, - TAB_LEFT, _ ("Equal variances not assumed")); - + tab_text (self->t, 1, i * 2 + 3 + 1, + TAB_LEFT, _("Equal variances not assumed")); - se2 = (pow2 (gs0->s_std_dev)/ (gs0->n -1) ) + - (pow2 (gs1->s_std_dev)/ (gs1->n -1) ); + se2 = ((pow2 (gs0->s_std_dev) / (gs0->n - 1)) + + (pow2 (gs1->s_std_dev) / (gs1->n - 1))); - t = mean_diff / sqrt (se2) ; - tab_double (self->t, 4, i*2+3+1, TAB_RIGHT, t, NULL); + t = mean_diff / sqrt (se2); + tab_double (self->t, 4, i * 2 + 3 + 1, TAB_RIGHT, t, NULL); - df = pow2 (se2) / ( - (pow2 (pow2 (gs0->s_std_dev)/ (gs0->n - 1 )) - / (gs0->n -1 ) - ) - + - (pow2 (pow2 (gs1->s_std_dev)/ (gs1->n - 1 )) - / (gs1->n -1 ) - ) - ) ; - - tab_double (self->t, 5, i*2+3+1, TAB_RIGHT, df, NULL); + df = pow2 (se2) / ((pow2 (pow2 (gs0->s_std_dev) / (gs0->n - 1)) + / (gs0->n - 1)) + + (pow2 (pow2 (gs1->s_std_dev) / (gs1->n - 1)) + / (gs1->n - 1))); + tab_double (self->t, 5, i * 2 + 3 + 1, TAB_RIGHT, df, NULL); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); - tab_double (self->t, 6, i*2+3+1, TAB_RIGHT, 2.0* (t>0?q:p), NULL); + tab_double (self->t, 6, i * 2 + 3 + 1, TAB_RIGHT, 2.0 * (t > 0 ? q : p), + NULL); /* Now work out the confidence interval */ - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria) / 2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_double (self->t, 7, i*2+3+1, TAB_RIGHT, mean_diff, NULL); - - - tab_double (self->t, 8, i*2+3+1, TAB_RIGHT, std_err_diff, NULL); - - - tab_double (self->t, 9, i*2+3+1, TAB_RIGHT, - mean_diff - t * std_err_diff, NULL); - - tab_double (self->t, 10, i*2+3+1, TAB_RIGHT, - mean_diff + t * std_err_diff, NULL); - } + tab_double (self->t, 7, i * 2 + 3 + 1, TAB_RIGHT, mean_diff, NULL); + tab_double (self->t, 8, i * 2 + 3 + 1, TAB_RIGHT, std_err_diff, NULL); + tab_double (self->t, 9, i * 2 + 3 + 1, TAB_RIGHT, + mean_diff - t * std_err_diff, NULL); + tab_double (self->t, 10, i * 2 + 3 + 1, TAB_RIGHT, + mean_diff + t * std_err_diff, NULL); } } /* Initialize the paired samples trbox */ -void -trbox_paired_init (struct trbox *self, - struct cmd_t_test *cmd UNUSED) +static void +trbox_paired_init (struct trbox *self, struct t_test_proc *proc) { - const int hsize=10; - const int vsize=n_pairs+3; + const int vsize=proc->n_pairs+ 3; self->populate = trbox_paired_populate; - trbox_base_init (self,n_pairs,hsize); - tab_title (self->t, _ ("Paired Samples Test")); - tab_hline (self->t,TAL_1,2,6,1); - tab_vline (self->t,TAL_2,2,0,vsize - 1); - tab_joint_text (self->t,2,0,6,0,TAB_CENTER,_ ("Paired Differences")); - tab_box (self->t,-1,-1,-1,TAL_1, 2,1,6,vsize-1); - tab_box (self->t,-1,-1,-1,TAL_1, 6,0,hsize-1,vsize-1); - tab_hline (self->t,TAL_1,5,6, 2); - tab_vline (self->t,TAL_GAP,6,0,1); - - tab_joint_text (self->t, 5, 1, 6, 1, TAB_CENTER | TAT_PRINTF, - _ ("%g%% Confidence Interval of the Difference"), - cmd->criteria*100.0); - - tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _ ("Mean")); - tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _ ("Std. Deviation")); - tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _ ("Std. Error Mean")); - tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _ ("Lower")); - tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _ ("Upper")); - tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _ ("t")); - tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _ ("df")); - tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _ ("Sig. (2-tailed)")); + trbox_base_init (self, proc->n_pairs, hsize); + tab_title (self->t, _("Paired Samples Test")); + tab_hline (self->t, TAL_1, 2, 6, 1); + tab_vline (self->t, TAL_2, 2, 0, vsize - 1); + tab_joint_text (self->t, 2, 0, 6, 0, TAB_CENTER, _("Paired Differences")); + tab_box (self->t, -1, -1, -1, TAL_1, 2, 1, 6, vsize - 1); + tab_box (self->t, -1, -1, -1, TAL_1, 6, 0, hsize - 1, vsize - 1); + tab_hline (self->t, TAL_1, 5, 6, 2); + tab_vline (self->t, TAL_GAP, 6, 0, 1); + + tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER, + _("%g%% Confidence Interval of the Difference"), + proc->criteria*100.0); + + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("Mean")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Std. Deviation")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Std. Error Mean")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); + tab_text (self->t, 7, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 8, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 9, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); } /* Populate the paired samples trbox */ static void trbox_paired_populate (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd UNUSED) + struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - for (i=0; i < n_pairs; ++i) + for (i = 0; i < proc->n_pairs; i++) { - double p,q; + struct pair *pair = &proc->pairs[i]; + double p, q; double se_mean; - double n = pairs[i].n; + double n = pair->n; double t; double df = n - 1; - tab_text (trb->t, 0, i+3, TAB_LEFT | TAT_PRINTF, _ ("Pair %d"),i); - - tab_text (trb->t, 1, i+3, TAB_LEFT | TAT_PRINTF, "%s - %s", - var_get_name (pairs[i].v[0]), - var_get_name (pairs[i].v[1])); - - tab_double (trb->t, 2, i+3, TAB_RIGHT, pairs[i].mean_diff, NULL); - - tab_double (trb->t, 3, i+3, TAB_RIGHT, pairs[i].std_dev_diff, NULL); + tab_text_format (trb->t, 0, i + 3, TAB_LEFT, _("Pair %d"), i); + tab_text_format (trb->t, 1, i + 3, TAB_LEFT, "%s - %s", + var_get_name (pair->v[0]), + var_get_name (pair->v[1])); + tab_double (trb->t, 2, i + 3, TAB_RIGHT, pair->mean_diff, NULL); + tab_double (trb->t, 3, i + 3, TAB_RIGHT, pair->std_dev_diff, NULL); /* SE Mean */ - se_mean = pairs[i].std_dev_diff / sqrt (n) ; - tab_double (trb->t, 4, i+3, TAB_RIGHT, se_mean, NULL); + se_mean = pair->std_dev_diff / sqrt (n); + tab_double (trb->t, 4, i + 3, TAB_RIGHT, se_mean, NULL); /* Now work out the confidence interval */ - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria) / 2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_double (trb->t, 5, i+3, TAB_RIGHT, - pairs[i].mean_diff - t * se_mean , NULL); + tab_double (trb->t, 5, i + 3, TAB_RIGHT, + pair->mean_diff - t * se_mean, NULL); + tab_double (trb->t, 6, i + 3, TAB_RIGHT, + pair->mean_diff + t * se_mean, NULL); - tab_double (trb->t, 6, i+3, TAB_RIGHT, - pairs[i].mean_diff + t * se_mean , NULL); + t = ((pair->mean[0] - pair->mean[1]) + / sqrt ((pow2 (pair->s_std_dev[0]) + pow2 (pair->s_std_dev[1]) + - (2 * pair->correlation + * pair->s_std_dev[0] * pair->s_std_dev[1])) + / (n - 1))); - t = (pairs[i].mean[0] - pairs[i].mean[1]) - / sqrt ( - ( pow2 (pairs[i].s_std_dev[0]) + pow2 (pairs[i].s_std_dev[1]) - - 2 * pairs[i].correlation * - pairs[i].s_std_dev[0] * pairs[i].s_std_dev[1] ) - / (n - 1) - ); - - tab_double (trb->t, 7, i+3, TAB_RIGHT, t, NULL); + tab_double (trb->t, 7, i + 3, TAB_RIGHT, t, NULL); /* Degrees of freedom */ - tab_double (trb->t, 8, i+3, TAB_RIGHT, df, wfmt); + tab_double (trb->t, 8, i + 3, TAB_RIGHT, df, &proc->weight_format); - p = gsl_cdf_tdist_P (t, df); - q = gsl_cdf_tdist_P (t, df); + p = gsl_cdf_tdist_P (t,df); + q = gsl_cdf_tdist_Q (t,df); - tab_double (trb->t, 9, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); - + tab_double (trb->t, 9, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL); } } /* Initialize the one sample trbox */ -void -trbox_one_sample_init (struct trbox *self, struct cmd_t_test *cmd ) +static void +trbox_one_sample_init (struct trbox *self, struct t_test_proc *proc) { - const int hsize=7; - const int vsize=cmd->n_variables+3; + const int hsize = 7; + const int vsize = proc->n_vars + 3; self->populate = trbox_one_sample_populate; - trbox_base_init (self, cmd->n_variables,hsize); - tab_title (self->t, _ ("One-Sample Test")); + trbox_base_init (self, proc->n_vars, hsize); + tab_title (self->t, _("One-Sample Test")); tab_hline (self->t, TAL_1, 1, hsize - 1, 1); tab_vline (self->t, TAL_2, 1, 0, vsize - 1); - tab_joint_text (self->t, 1, 0, hsize-1,0, TAB_CENTER | TAT_PRINTF, - _ ("Test Value = %f"), cmd->n_testval[0]); - - tab_box (self->t, -1, -1, -1, TAL_1, 1,1,hsize-1,vsize-1); + tab_joint_text_format (self->t, 1, 0, hsize - 1, 0, TAB_CENTER, + _("Test Value = %f"), proc->testval); + tab_box (self->t, -1, -1, -1, TAL_1, 1, 1, hsize - 1, vsize - 1); - tab_joint_text (self->t,5,1,6,1,TAB_CENTER | TAT_PRINTF, - _ ("%g%% Confidence Interval of the Difference"), - cmd->criteria*100.0); - tab_vline (self->t,TAL_GAP,6,1,1); - tab_hline (self->t,TAL_1,5,6,2); - tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _ ("t")); - tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _ ("df")); - tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _ ("Sig. (2-tailed)")); - tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _ ("Mean Difference")); - tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _ ("Lower")); - tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _ ("Upper")); + tab_joint_text_format (self->t, 5, 1, 6, 1, TAB_CENTER, + _("%g%% Confidence Interval of the Difference"), + proc->criteria * 100.0); + tab_vline (self->t, TAL_GAP, 6, 1, 1); + tab_hline (self->t, TAL_1, 5, 6, 2); + tab_text (self->t, 1, 2, TAB_CENTER | TAT_TITLE, _("t")); + tab_text (self->t, 2, 2, TAB_CENTER | TAT_TITLE, _("df")); + tab_text (self->t, 3, 2, TAB_CENTER | TAT_TITLE, _("Sig. (2-tailed)")); + tab_text (self->t, 4, 2, TAB_CENTER | TAT_TITLE, _("Mean Difference")); + tab_text (self->t, 5, 2, TAB_CENTER | TAT_TITLE, _("Lower")); + tab_text (self->t, 6, 2, TAB_CENTER | TAT_TITLE, _("Upper")); } - /* Populate the one sample trbox */ static void -trbox_one_sample_populate (struct trbox *trb, - const struct dictionary *dict, - struct cmd_t_test *cmd) +trbox_one_sample_populate (struct trbox *trb, struct t_test_proc *proc) { int i; - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - assert (trb->t); - for (i=0; i < cmd->n_variables; ++i) + for (i = 0; i < proc->n_vars; i++) { double t; - double p,q; + double p, q; double df; - struct group_statistics *gs = &group_proc_get (cmd->v_variables[i])->ugs; - + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; - tab_text (trb->t, 0, i+3, TAB_LEFT, var_get_name (cmd->v_variables[i])); + tab_text (trb->t, 0, i + 3, TAB_LEFT, var_get_name (proc->vars[i])); - t = (gs->mean - cmd->n_testval[0] ) * sqrt (gs->n) / gs->std_dev ; + t = (gs->mean - proc->testval) * sqrt (gs->n) / gs->std_dev; - tab_double (trb->t, 1, i+3, TAB_RIGHT, t, NULL); + tab_double (trb->t, 1, i + 3, TAB_RIGHT, t, NULL); /* degrees of freedom */ df = gs->n - 1; - tab_double (trb->t, 2, i+3, TAB_RIGHT, df, wfmt); + tab_double (trb->t, 2, i + 3, TAB_RIGHT, df, &proc->weight_format); p = gsl_cdf_tdist_P (t, df); q = gsl_cdf_tdist_Q (t, df); /* Multiply by 2 to get 2-tailed significance, makeing sure we've got the correct tail*/ - tab_double (trb->t, 3, i+3, TAB_RIGHT, 2.0* (t>0?q:p), NULL); + tab_double (trb->t, 3, i + 3, TAB_RIGHT, 2.0 * (t > 0 ? q : p), NULL); + tab_double (trb->t, 4, i + 3, TAB_RIGHT, gs->mean_diff, NULL); - tab_double (trb->t, 4, i+3, TAB_RIGHT, gs->mean_diff, NULL); - - q = (1 - cmd->criteria)/2.0; /* 2-tailed test */ + q = (1 - proc->criteria) / 2.0; /* 2-tailed test */ t = gsl_cdf_tdist_Qinv (q, df); - tab_double (trb->t, 5, i+3, TAB_RIGHT, + tab_double (trb->t, 5, i + 3, TAB_RIGHT, gs->mean_diff - t * gs->se_mean, NULL); - - tab_double (trb->t, 6, i+3, TAB_RIGHT, + tab_double (trb->t, 6, i + 3, TAB_RIGHT, gs->mean_diff + t * gs->se_mean, NULL); } } /* Base initializer for the generalized trbox */ -void +static void trbox_base_init (struct trbox *self, size_t data_rows, int cols) { const size_t rows = 3 + data_rows; self->finalize = trbox_base_finalize; self->t = tab_create (cols, rows, 0); - tab_headers (self->t,0,0,3,0); - tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols -1, rows -1); - tab_hline (self->t, TAL_2,0,cols-1,3); - tab_dim (self->t, tab_natural_dimensions); + tab_headers (self->t, 0, 0, 3, 0); + tab_box (self->t, TAL_2, TAL_2, TAL_0, TAL_0, 0, 0, cols - 1, rows - 1); + tab_hline (self->t, TAL_2, 0, cols- 1, 3); + tab_dim (self->t, tab_natural_dimensions, NULL); } - /* Base finalizer for the trbox */ -void +static void trbox_base_finalize (struct trbox *trb) { tab_submit (trb->t); } - -/* Create , populate and submit the Paired Samples Correlation box */ +/* Create, populate and submit the Paired Samples Correlation box */ static void -pscbox (const struct dictionary *dict) +pscbox (struct t_test_proc *proc) { - const struct variable *wv = dict_get_weight (dict); - const struct fmt_spec *wfmt = wv ? var_get_print_format (wv) : &F_8_0; - - const int rows = 1 + n_pairs; - const int cols = 5; + const int rows=1+proc->n_pairs; + const int cols=5; int i; struct tab_table *table; - table = tab_create (cols,rows,0); + table = tab_create (cols, rows, 0); tab_columns (table, SOM_COL_DOWN, 1); - tab_headers (table,0,0,1,0); - tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols -1, rows -1 ); + tab_headers (table, 0, 0, 1, 0); + tab_box (table, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, cols - 1, rows - 1); tab_hline (table, TAL_2, 0, cols - 1, 1); tab_vline (table, TAL_2, 2, 0, rows - 1); - tab_dim (table, tab_natural_dimensions); - tab_title (table, _ ("Paired Samples Correlations")); + tab_dim (table, tab_natural_dimensions, NULL); + tab_title (table, _("Paired Samples Correlations")); /* column headings */ - tab_text (table, 2,0, TAB_CENTER | TAT_TITLE, _ ("N")); - tab_text (table, 3,0, TAB_CENTER | TAT_TITLE, _ ("Correlation")); - tab_text (table, 4,0, TAB_CENTER | TAT_TITLE, _ ("Sig.")); + tab_text (table, 2, 0, TAB_CENTER | TAT_TITLE, _("N")); + tab_text (table, 3, 0, TAB_CENTER | TAT_TITLE, _("Correlation")); + tab_text (table, 4, 0, TAB_CENTER | TAT_TITLE, _("Sig.")); - for (i=0; i < n_pairs; ++i) + for (i = 0; i < proc->n_pairs; i++) { - double p,q; - - double df = pairs[i].n -2; - - double correlation_t = - pairs[i].correlation * sqrt (df) / - sqrt (1 - pow2 (pairs[i].correlation)); - + struct pair *pair = &proc->pairs[i]; + double p, q; + double df = pair->n -2; + double correlation_t = (pair->correlation * sqrt (df) / + sqrt (1 - pow2 (pair->correlation))); /* row headings */ - tab_text (table, 0,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("Pair %d"), i); - - tab_text (table, 1,i+1, TAB_LEFT | TAT_TITLE | TAT_PRINTF, - _ ("%s & %s"), - var_get_name (pairs[i].v[0]), - var_get_name (pairs[i].v[1])); - + tab_text_format (table, 0, i + 1, TAB_LEFT | TAT_TITLE, + _("Pair %d"), i); + tab_text_format (table, 1, i + 1, TAB_LEFT | TAT_TITLE, + _("%s & %s"), + var_get_name (pair->v[0]), + var_get_name (pair->v[1])); /* row data */ - tab_double (table, 2, i+1, TAB_RIGHT, pairs[i].n, wfmt); - tab_double (table, 3, i+1, TAB_RIGHT, pairs[i].correlation, NULL); + tab_double (table, 2, i + 1, TAB_RIGHT, pair->n, &proc->weight_format); + tab_double (table, 3, i + 1, TAB_RIGHT, pair->correlation, NULL); p = gsl_cdf_tdist_P (correlation_t, df); q = gsl_cdf_tdist_Q (correlation_t, df); - - tab_double (table, 4, i+1, TAB_RIGHT, 2.0* (correlation_t>0?q:p), NULL); + tab_double (table, 4, i + 1, TAB_RIGHT, + 2.0 * (correlation_t > 0 ? q : p), NULL); } tab_submit (table); } - - - - + /* Calculation Implementation */ -/* Per case calculations common to all variants of the T test */ -static int +/* Calculations common to all variants of the T test. */ +static void common_calc (const struct dictionary *dict, - const struct ccase *c, - void *_cmd, - enum mv_class exclude) + struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - struct cmd_t_test *cmd = (struct cmd_t_test *)_cmd; - - double weight = dict_get_case_weight (dict, c, NULL); - - - /* Listwise has to be implicit if the independent variable is missing ?? */ - if ( cmd->sbc_groups ) - { - if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude)) - return 0; - } - for (i = 0; i < cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - const struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v); - - if (!var_is_value_missing (v, val, exclude)) - { - struct group_statistics *gs; - gs = &group_proc_get (v)->ugs; - - gs->n += weight; - gs->sum += weight * val->f; - gs->ssq += weight * val->f * val->f; - } + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->sum = 0; + gs->n = 0; + gs->ssq = 0; + gs->sum_diff = 0; } - return 0; -} - -/* Pre calculations common to all variants of the T test */ -static void -common_precalc ( struct cmd_t_test *cmd ) -{ - int i=0; - for (i=0; i< cmd->n_variables ; ++i) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->sum=0; - gs->n=0; - gs->ssq=0; - gs->sum_diff=0; + double weight = dict_get_case_weight (dict, c, NULL); + + /* Listwise has to be implicit if the independent variable + is missing ?? */ + if (proc->mode == T_IND_SAMPLES) + { + if (var_is_value_missing (proc->indep_var, + case_data (c, proc->indep_var), + proc->exclude)) + continue; + } + + for (i = 0; i < proc->n_vars; i++) + { + const struct variable *v = proc->vars[i]; + const union value *val = case_data (c, v); + + if (!var_is_value_missing (v, val, proc->exclude)) + { + struct group_statistics *gs; + gs = &group_proc_get (v)->ugs; + + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); + } + } } -} + casereader_destroy (reader); -/* Post calculations common to all variants of the T test */ -void -common_postcalc (struct cmd_t_test *cmd) -{ - int i=0; - - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->mean=gs->sum / gs->n; - gs->s_std_dev= sqrt ( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; - - gs->std_dev= sqrt ( - gs->n/ (gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->mean = gs->sum / gs->n; + gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean))); + gs->std_dev = sqrt (gs->n / (gs->n- 1) + * ((gs->ssq / gs->n) - pow2 (gs->mean))); gs->se_mean = gs->std_dev / sqrt (gs->n); - gs->mean_diff= gs->sum_diff / gs->n; + gs->mean_diff = gs->sum_diff / gs->n; } } -/* Per case calculations for one sample t test */ +/* Calculations for one sample T test. */ static int -one_sample_calc (const struct dictionary *dict, - const struct ccase *c, void *cmd_, - enum mv_class exclude) +one_sample_calc (const struct dictionary *dict, struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - struct cmd_t_test *cmd = (struct cmd_t_test *)cmd_; - - double weight = dict_get_case_weight (dict, c, NULL); - - - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs; - const struct variable *v = cmd->v_variables[i]; - const union value *val = case_data (c, v); - - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - if (!var_is_value_missing (v, val, exclude)) - gs->sum_diff += weight * (val->f - cmd->n_testval[0]); + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->sum_diff = 0; } - return 0; -} - -/* Pre calculations for one sample t test */ -static void -one_sample_precalc ( struct cmd_t_test *cmd ) -{ - int i=0; - - for (i=0; i< cmd->n_variables ; ++i) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->sum_diff=0; + double weight = dict_get_case_weight (dict, c, NULL); + for (i = 0; i < proc->n_vars; i++) + { + const struct variable *v = proc->vars[i]; + struct group_statistics *gs = &group_proc_get (v)->ugs; + const union value *val = case_data (c, v); + if (!var_is_value_missing (v, val, proc->exclude)) + gs->sum_diff += weight * (val->f - proc->testval); + } } -} -/* Post calculations for one sample t test */ -static void -one_sample_postcalc (struct cmd_t_test *cmd) -{ - int i=0; - - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_statistics *gs; - gs= &group_proc_get (cmd->v_variables[i])->ugs; - - gs->mean_diff = gs->sum_diff / gs->n ; + struct group_statistics *gs = &group_proc_get (proc->vars[i])->ugs; + gs->mean_diff = gs->sum_diff / gs->n; } -} - + casereader_destroy (reader); -static void -paired_precalc (struct cmd_t_test *cmd UNUSED) -{ - int i; - - for (i=0; i < n_pairs ; ++i ) - { - pairs[i].n = 0; - pairs[i].sum[0] = 0; pairs[i].sum[1] = 0; - pairs[i].ssq[0] = 0; pairs[i].ssq[1] = 0; - pairs[i].sum_of_prod = 0; - pairs[i].correlation = 0; - pairs[i].sum_of_diffs = 0; - pairs[i].ssq_diffs = 0; - } - + return 0; } - static int -paired_calc (const struct dictionary *dict, const struct ccase *c, - struct cmd_t_test *cmd UNUSED, enum mv_class exclude) +paired_calc (const struct dictionary *dict, struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - double weight = dict_get_case_weight (dict, c, NULL); - - for (i=0; i < n_pairs ; ++i ) + for (i = 0; i < proc->n_pairs; i++) { - const struct variable *v0 = pairs[i].v[0]; - const struct variable *v1 = pairs[i].v[1]; - - const union value *val0 = case_data (c, v0); - const union value *val1 = case_data (c, v1); - - if (!var_is_value_missing (v0, val0, exclude) && - !var_is_value_missing (v1, val1, exclude)) - { - pairs[i].n += weight; - pairs[i].sum[0] += weight * val0->f; - pairs[i].sum[1] += weight * val1->f; - - pairs[i].ssq[0] += weight * pow2 (val0->f); - pairs[i].ssq[1] += weight * pow2 (val1->f); - - pairs[i].sum_of_prod += weight * val0->f * val1->f ; - - pairs[i].sum_of_diffs += weight * ( val0->f - val1->f ) ; - pairs[i].ssq_diffs += weight * pow2 (val0->f - val1->f); - } + struct pair *pair = &proc->pairs[i]; + pair->n = 0; + pair->sum[0] = pair->sum[1] = 0; + pair->ssq[0] = pair->ssq[1] = 0; + pair->sum_of_prod = 0; + pair->correlation = 0; + pair->sum_of_diffs = 0; + pair->ssq_diffs = 0; } - return 0; -} - -static void -paired_postcalc (struct cmd_t_test *cmd UNUSED) -{ - int i; + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) + { + double weight = dict_get_case_weight (dict, c, NULL); + for (i = 0; i < proc->n_pairs; i++) + { + struct pair *pair = &proc->pairs[i]; + const struct variable *v0 = pair->v[0]; + const struct variable *v1 = pair->v[1]; + + const union value *val0 = case_data (c, v0); + const union value *val1 = case_data (c, v1); + + if (!var_is_value_missing (v0, val0, proc->exclude) + && !var_is_value_missing (v1, val1, proc->exclude)) + { + pair->n += weight; + pair->sum[0] += weight * val0->f; + pair->sum[1] += weight * val1->f; + pair->ssq[0] += weight * pow2 (val0->f); + pair->ssq[1] += weight * pow2 (val1->f); + pair->sum_of_prod += weight * val0->f * val1->f; + pair->sum_of_diffs += weight * (val0->f - val1->f); + pair->ssq_diffs += weight * pow2 (val0->f - val1->f); + } + } + } - for (i=0; i < n_pairs ; ++i ) + for (i = 0; i < proc->n_pairs; i++) { + struct pair *pair = &proc->pairs[i]; + const double n = pair->n; int j; - const double n = pairs[i].n; - for (j=0; j < 2 ; ++j) + for (j=0; j < 2; j++) { - pairs[i].mean[j] = pairs[i].sum[j] / n ; - pairs[i].s_std_dev[j] = sqrt ((pairs[i].ssq[j] / n - - pow2 (pairs[i].mean[j])) - ); - - pairs[i].std_dev[j] = sqrt (n/ (n-1)* (pairs[i].ssq[j] / n - - pow2 (pairs[i].mean[j])) - ); + pair->mean[j] = pair->sum[j] / n; + pair->s_std_dev[j] = sqrt ((pair->ssq[j] / n + - pow2 (pair->mean[j]))); + pair->std_dev[j] = sqrt (n / (n- 1) * (pair->ssq[j] / n + - pow2 (pair->mean[j]))); } - pairs[i].correlation = pairs[i].sum_of_prod / pairs[i].n - - pairs[i].mean[0] * pairs[i].mean[1] ; + pair->correlation = (pair->sum_of_prod / pair->n + - pair->mean[0] * pair->mean[1]); /* correlation now actually contains the covariance */ + pair->correlation /= pair->std_dev[0] * pair->std_dev[1]; + pair->correlation *= pair->n / (pair->n - 1); - pairs[i].correlation /= pairs[i].std_dev[0] * pairs[i].std_dev[1]; - pairs[i].correlation *= pairs[i].n / ( pairs[i].n - 1 ); - - pairs[i].mean_diff = pairs[i].sum_of_diffs / n ; - - pairs[i].std_dev_diff = sqrt ( n / (n - 1) * ( - ( pairs[i].ssq_diffs / n ) - - - pow2 (pairs[i].mean_diff ) - ) ); + pair->mean_diff = pair->sum_of_diffs / n; + pair->std_dev_diff = sqrt (n / (n - 1) * ((pair->ssq_diffs / n) + - pow2 (pair->mean_diff))); } + + casereader_destroy (reader); + return 0; } -static void -group_precalc (struct cmd_t_test *cmd ) +static int +group_calc (const struct dictionary *dict, struct t_test_proc *proc, + struct casereader *reader) { + struct ccase *c; int i; - int j; - for (i=0; i< cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - struct group_proc *ttpr = group_proc_get (cmd->v_variables[i]); + struct group_proc *ttpr = group_proc_get (proc->vars[i]); + int j; /* There's always 2 groups for a T - TEST */ ttpr->n_groups = 2; - - gp.indep_width = var_get_width (indep_var); - ttpr->group_hash = hsh_create (2, - (hsh_compare_func *) compare_group_binary, - (hsh_hash_func *) hash_group_binary, - (hsh_free_func *) free_group, - (void *) &gp ); + (hsh_compare_func *) compare_group_binary, + (hsh_hash_func *) hash_group_binary, + (hsh_free_func *) free_group, + proc); - for (j=0 ; j < 2 ; ++j) + for (j = 0; j < 2; j++) { struct group_statistics *gs = xmalloc (sizeof *gs); - gs->sum = 0; gs->n = 0; gs->ssq = 0; - - if ( gp.criterion == CMP_EQ ) - { - gs->id = gp.v.g_value[j]; - } + if (proc->criterion == CMP_EQ) + gs->id = proc->g_value[j]; else { - if ( j == 0 ) - gs->id.f = gp.v.critical_value - 1.0 ; + if (j == 0) + gs->id.f = proc->critical_value - 1.0; else - gs->id.f = gp.v.critical_value + 1.0 ; + gs->id.f = proc->critical_value + 1.0; } - hsh_insert ( ttpr->group_hash, (void *) gs ); + hsh_insert (ttpr->group_hash, gs); } } -} - -static int -group_calc (const struct dictionary *dict, - const struct ccase *c, struct cmd_t_test *cmd, - enum mv_class exclude) -{ - int i; - - const double weight = dict_get_case_weight (dict, c, NULL); - - const union value *gv; - - if (var_is_value_missing (indep_var, case_data (c, indep_var), exclude)) - return 0; - - gv = case_data (c, indep_var); - - for (i=0; i< cmd->n_variables ; ++i) + for (; (c = casereader_read (reader)) != NULL; case_unref (c)) { - const struct variable *var = cmd->v_variables[i]; - const union value *val = case_data (c, var); - struct hsh_table *grp_hash = group_proc_get (var)->group_hash; - struct group_statistics *gs; - - gs = hsh_find (grp_hash, (void *) gv); - - /* If the independent variable doesn't match either of the values - for this case then move on to the next case */ - if ( ! gs ) - return 0; - - if (!var_is_value_missing (var, val, exclude)) - { - gs->n += weight; - gs->sum += weight * val->f; - gs->ssq += weight * pow2 (val->f); - } + const double weight = dict_get_case_weight (dict, c, NULL); + const union value *gv; + + if (var_is_value_missing (proc->indep_var, + case_data (c, proc->indep_var), proc->exclude)) + continue; + + gv = case_data (c, proc->indep_var); + for (i = 0; i < proc->n_vars; i++) + { + const struct variable *var = proc->vars[i]; + const union value *val = case_data (c, var); + struct hsh_table *grp_hash = group_proc_get (var)->group_hash; + struct group_statistics *gs = hsh_find (grp_hash, gv); + + /* If the independent variable doesn't match either of the values + for this case then move on to the next case. */ + if (gs == NULL) + break; + + if (!var_is_value_missing (var, val, proc->exclude)) + { + gs->n += weight; + gs->sum += weight * val->f; + gs->ssq += weight * pow2 (val->f); + } + } } - return 0; -} - - -static void -group_postcalc ( struct cmd_t_test *cmd ) -{ - int i; - - for (i = 0; i < cmd->n_variables ; ++i) + for (i = 0; i < proc->n_vars; i++) { - const struct variable *var = cmd->v_variables[i]; + const struct variable *var = proc->vars[i]; struct hsh_table *grp_hash = group_proc_get (var)->group_hash; struct hsh_iterator g; struct group_statistics *gs; - int count=0; + int count = 0; - for (gs = hsh_first (grp_hash,&g); - gs != 0; - gs = hsh_next (grp_hash,&g)) + for (gs = hsh_first (grp_hash, &g); gs != NULL; + gs = hsh_next (grp_hash, &g)) { gs->mean = gs->sum / gs->n; - - gs->s_std_dev= sqrt ( - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; - - gs->std_dev= sqrt ( - gs->n/ (gs->n-1) * - ( (gs->ssq / gs->n ) - gs->mean * gs->mean ) - ) ; - + gs->s_std_dev = sqrt (((gs->ssq / gs->n) - pow2 (gs->mean))); + gs->std_dev = sqrt (gs->n / (gs->n- 1) + * ((gs->ssq / gs->n) - pow2 (gs->mean))); gs->se_mean = gs->std_dev / sqrt (gs->n); - count ++; + count++; } assert (count == 2); } -} + casereader_destroy (reader); + return 0; +} static void -calculate (struct cmd_t_test *cmd, - struct casereader *input, const struct dataset *ds) +calculate (struct t_test_proc *proc, + struct casereader *input, const struct dataset *ds) { const struct dictionary *dict = dataset_dict (ds); struct ssbox stat_summary_box; struct trbox test_results_box; - - struct casereader *pass1, *pass2, *pass3; struct taint *taint; - struct ccase c; - - enum mv_class exclude = cmd->miss != TTS_INCLUDE ? MV_ANY : MV_SYSTEM; + struct ccase *c; - if (!casereader_peek (input, 0, &c)) + c = casereader_peek (input, 0); + if (c == NULL) { casereader_destroy (input); return; } - output_split_file_values (ds, &c); - case_destroy (&c); + output_split_file_values (ds, c); + case_unref (c); - if ( cmd->miss == TTS_LISTWISE ) + if (proc->listwise_missing) input = casereader_create_filter_missing (input, - cmd->v_variables, - cmd->n_variables, - exclude, NULL); - + proc->vars, + proc->n_vars, + proc->exclude, NULL, NULL); input = casereader_create_filter_weight (input, dict, NULL, NULL); - taint = taint_clone (casereader_get_taint (input)); - casereader_split (input, &pass1, &pass2); - - common_precalc (cmd); - for (; casereader_read (pass1, &c); case_destroy (&c)) - common_calc (dict, &c, cmd, exclude); - casereader_destroy (pass1); - common_postcalc (cmd); - switch (mode) + common_calc (dict, proc, casereader_clone (input)); + switch (proc->mode) { case T_1_SAMPLE: - one_sample_precalc (cmd); - for (; casereader_read (pass2, &c); case_destroy (&c)) - one_sample_calc (dict, &c, cmd, exclude); - one_sample_postcalc (cmd); + one_sample_calc (dict, proc, input); break; case T_PAIRED: - paired_precalc (cmd); - for (; casereader_read (pass2, &c); case_destroy (&c)) - paired_calc (dict, &c, cmd, exclude); - paired_postcalc (cmd); + paired_calc (dict, proc, input); break; case T_IND_SAMPLES: - pass3 = casereader_clone (pass2); - - group_precalc (cmd); - for (; casereader_read (pass2, &c); case_destroy (&c)) - group_calc (dict, &c, cmd, exclude); - group_postcalc (cmd); - - levene (dict, pass3, indep_var, cmd->n_variables, cmd->v_variables, - exclude); + group_calc (dict, proc, casereader_clone (input)); + levene (dict, input, proc->indep_var, proc->n_vars, proc->vars, + proc->exclude); break; + default: + NOT_REACHED (); } - casereader_destroy (pass2); if (!taint_has_tainted_successor (taint)) { - ssbox_create (&stat_summary_box,cmd,mode); - ssbox_populate (&stat_summary_box, dict, cmd); + ssbox_create (&stat_summary_box, proc); + ssbox_populate (&stat_summary_box, proc); ssbox_finalize (&stat_summary_box); - if ( mode == T_PAIRED ) - pscbox (dict); + if (proc->mode == T_PAIRED) + pscbox (proc); - trbox_create (&test_results_box, cmd, mode); - trbox_populate (&test_results_box, dict, cmd); + trbox_create (&test_results_box, proc); + trbox_populate (&test_results_box, proc); trbox_finalize (&test_results_box); } taint_destroy (taint); } -short which_group (const struct group_statistics *g, - const struct group_properties *p); +/* return 0 if G belongs to group 0, + 1 if it belongs to group 1, + 2 if it belongs to neither group */ +static int +which_group (const struct group_statistics *g, + const struct t_test_proc *proc) +{ + int width = var_get_width (proc->indep_var); + + if (0 == value_compare_3way (&g->id, &proc->g_value[0], width)) + return 0; + + if (0 == value_compare_3way (&g->id, &proc->g_value[1], width)) + return 1; + + return 2; +} /* Return -1 if the id of a is less than b; +1 if greater than and 0 if equal */ static int compare_group_binary (const struct group_statistics *a, - const struct group_statistics *b, - const struct group_properties *p) + const struct group_statistics *b, + const struct t_test_proc *proc) { - short flag_a; - short flag_b; + int flag_a; + int flag_b; - if ( p->criterion == CMP_LE ) + if (proc->criterion == CMP_LE) { - /* less-than comparision is not meaningfull for - alpha variables, so we shouldn't ever arrive here */ - assert (p->indep_width == 0 ) ; - - flag_a = ( a->id.f < p->v.critical_value ) ; - flag_b = ( b->id.f < p->v.critical_value ) ; + flag_a = (a->id.f < proc->critical_value); + flag_b = (b->id.f < proc->critical_value); } else { - flag_a = which_group (a, p); - flag_b = which_group (b, p); + flag_a = which_group (a, proc); + flag_b = which_group (b, proc); } - if (flag_a < flag_b ) - return -1; + if (flag_a < flag_b) + return - 1; return (flag_a > flag_b); } @@@ -1523,11 -1950,40 +1523,11 @@@ static unsigned hash_group_binary (const struct group_statistics *g, - const struct group_properties *p) -{ - short flag = -1; - - if ( p->criterion == CMP_LE ) - { - /* Not meaningfull to do a less than compare for alpha values ? */ - assert (p->indep_width == 0 ) ; - flag = ( g->id.f < p->v.critical_value ) ; - } - else if ( p->criterion == CMP_EQ) - { - flag = which_group (g,p); - } - else - NOT_REACHED (); - - return flag; -} - -/* return 0 if G belongs to group 0, - 1 if it belongs to group 1, - 2 if it belongs to neither group */ -short -which_group (const struct group_statistics *g, - const struct group_properties *p) + const struct t_test_proc *proc) { - if ( 0 == compare_values (&g->id, &p->v.g_value[0], p->indep_width)) - return 0; - - if ( 0 == compare_values (&g->id, &p->v.g_value[1], p->indep_width)) - return 1; - - return 2; + return (proc->criterion == CMP_LE + ? g->id.f < proc->critical_value + : which_group (g, proc)); } /* diff --combined tests/automake.mk index aba1a62c,5092eca2..15e297a0 --- a/tests/automake.mk +++ b/tests/automake.mk @@@ -6,16 -6,13 +6,16 @@@ TESTS_ENVIRONMENT += PERL='@PERL@' PG_C # Allow locale_charset to find charset.alias before running "make install". TESTS_ENVIRONMENT += CHARSETALIASDIR='$(abs_top_builddir)/gl' +TESTS_ENVIRONMENT += LC_ALL=C + dist_TESTS = \ + tests/command/add-files.sh \ tests/command/aggregate.sh \ + tests/command/attributes.sh \ tests/command/autorecod.sh \ tests/command/beg-data.sh \ tests/command/bignum.sh \ tests/command/count.sh \ - tests/command/datasheet.sh \ tests/command/data-list.sh \ tests/command/do-if.sh \ tests/command/do-repeat.sh \ @@@ -31,9 -28,9 +31,9 @@@ tests/command/get-data-txt-examples.sh \ tests/command/get-data-txt-importcases.sh \ tests/command/import-export.sh \ - tests/command/input-program.sh \ tests/command/insert.sh \ tests/command/lag.sh \ + tests/command/line-ends.sh \ tests/command/list.sh \ tests/command/loop.sh \ tests/command/longvars.sh \ @@@ -43,8 -40,6 +43,8 @@@ tests/command/n_of_cases.sh \ tests/command/npar-binomial.sh \ tests/command/npar-chisquare.sh \ + tests/command/npar-wilcoxon.sh \ + tests/command/npar-sign.sh \ tests/command/oneway.sh \ tests/command/oneway-missing.sh \ tests/command/oneway-with-splits.sh \ @@@ -55,9 -50,6 +55,9 @@@ tests/command/rename.sh \ tests/command/regression.sh \ tests/command/regression-qr.sh \ + tests/command/reliability.sh \ + tests/command/roc.sh \ + tests/command/roc2.sh \ tests/command/sample.sh \ tests/command/sort.sh \ tests/command/sysfiles.sh \ @@@ -76,7 -68,6 +76,7 @@@ tests/command/t-test-pairs.sh \ tests/command/trimmed-mean.sh \ tests/command/tabs.sh \ + tests/command/update.sh \ tests/command/use.sh \ tests/command/variable-display.sh \ tests/command/vector.sh \ @@@ -109,7 -100,6 +109,7 @@@ tests/bugs/compute-fmt.sh \ tests/bugs/compression.sh \ tests/bugs/crosstabs.sh \ + tests/bugs/crosstabs2.sh \ tests/bugs/crosstabs-crash.sh \ tests/bugs/crosstabs-crash2.sh \ tests/bugs/curtailed.sh \ @@@ -117,9 -107,6 +117,9 @@@ tests/bugs/double-frequency.sh \ tests/bugs/empty-do-repeat.sh \ tests/bugs/get.sh \ + tests/bugs/examine-crash.sh \ + tests/bugs/examine-crash2.sh \ + tests/bugs/examine-crash3.sh \ tests/bugs/examine-1sample.sh \ tests/bugs/examine-missing.sh \ tests/bugs/examine-missing2.sh \ @@@ -136,13 -123,13 +136,14 @@@ tests/bugs/overwrite-special-file.sh \ tests/bugs/piechart.sh \ tests/bugs/random.sh \ + tests/bugs/shbang.sh \ tests/bugs/signals.sh \ tests/bugs/t-test-with-temp.sh \ tests/bugs/t-test.sh \ tests/bugs/t-test-alpha.sh \ tests/bugs/t-test-alpha2.sh \ tests/bugs/t-test-alpha3.sh \ + tests/bugs/t-test-paired.sh \ tests/bugs/temporary.sh \ tests/bugs/unwritable-dir.sh \ tests/bugs/val-labs.sh \ @@@ -154,8 -141,6 +155,8 @@@ tests/bugs/temp-freq.sh \ tests/bugs/print-crash.sh \ tests/bugs/keep-all.sh \ + tests/data/datasheet-test.sh \ + tests/libpspp/sparse-xarray-test.sh \ tests/output/paper-size.sh \ tests/xforms/recode.sh \ tests/stats/descript-basic.sh \ @@@ -184,8 -169,6 +185,8 @@@ nodist_TESTS = tests/libpspp/abt-test \ tests/libpspp/bt-test \ tests/libpspp/heap-test \ + tests/libpspp/hmap-test \ + tests/libpspp/hmapx-test \ tests/libpspp/ll-test \ tests/libpspp/llx-test \ tests/libpspp/range-map-test \ @@@ -198,20 -181,13 +199,20 @@@ TESTS = $(dist_TESTS) $(nodist_TESTS check_PROGRAMS += \ $(nodist_TESTS) \ - tests/formats/inexactify + tests/data/datasheet-test \ + tests/formats/inexactify \ + tests/libpspp/sparse-xarray-test + +tests_data_datasheet_test_SOURCES = \ + tests/data/datasheet-test.c +tests_data_datasheet_test_LDADD = src/libpspp-core.la @LIBINTL@ tests_libpspp_ll_test_SOURCES = \ src/libpspp/ll.c \ src/libpspp/ll.h \ tests/libpspp/ll-test.c tests_libpspp_ll_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_ll_test_CFLAGS = $(AM_CFLAGS) tests_libpspp_llx_test_SOURCES = \ src/libpspp/ll.c \ @@@ -220,7 -196,6 +221,7 @@@ src/libpspp/llx.h \ tests/libpspp/llx-test.c tests_libpspp_llx_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_llx_test_CFLAGS = $(AM_CFLAGS) tests_libpspp_heap_test_SOURCES = \ src/libpspp/heap.c \ @@@ -228,25 -203,9 +229,25 @@@ src/libpspp/pool.c \ src/libpspp/pool.h \ tests/libpspp/heap-test.c -tests_libpspp_heap_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_heap_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_heap_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 +tests_libpspp_hmap_test_SOURCES = \ + src/libpspp/hmap.c \ + src/libpspp/hmap.h \ + tests/libpspp/hmap-test.c +tests_libpspp_hmap_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_hmap_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + +tests_libpspp_hmapx_test_SOURCES = \ + src/libpspp/hmap.c \ + src/libpspp/hmap.h \ + src/libpspp/hmapx.c \ + src/libpspp/hmapx.h \ + tests/libpspp/hmapx-test.c +tests_libpspp_hmapx_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_hmapx_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + tests_libpspp_abt_test_SOURCES = \ src/libpspp/abt.c \ src/libpspp/abt.h \ @@@ -278,12 -237,12 +279,12 @@@ tests_libpspp_range_set_test_SOURCES = src/libpspp/range-set.c \ src/libpspp/range-set.h \ tests/libpspp/range-set-test.c -tests_libpspp_range_set_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_range_set_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_range_set_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 tests_libpspp_str_test_SOURCES = \ tests/libpspp/str-test.c -tests_libpspp_str_test_LDADD = src/libpspp/libpspp.a gl/libgl.la @LIBINTL@ +tests_libpspp_str_test_LDADD = src/libpspp/libpspp.la gl/libgl.la @LIBINTL@ tests_libpspp_tower_test_SOURCES = \ src/libpspp/abt.c \ @@@ -293,7 -252,7 +294,7 @@@ src/libpspp/tower.c \ src/libpspp/tower.h \ tests/libpspp/tower-test.c -tests_libpspp_tower_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_tower_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_tower_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 tests_libpspp_sparse_array_test_SOURCES = \ @@@ -302,24 -261,9 +303,24 @@@ src/libpspp/pool.c \ src/libpspp/pool.h \ tests/libpspp/sparse-array-test.c -tests_libpspp_sparse_array_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_sparse_array_test_LDADD = gl/libgl.la @LIBINTL@ tests_libpspp_sparse_array_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 +tests_libpspp_sparse_xarray_test_SOURCES = \ + src/libpspp/argv-parser.c \ + src/libpspp/bt.c \ + src/libpspp/deque.c \ + src/libpspp/model-checker.c \ + src/libpspp/range-set.c \ + src/libpspp/sparse-array.c \ + src/libpspp/sparse-xarray.c \ + src/libpspp/str.c \ + src/libpspp/pool.c \ + src/libpspp/tmpfile.c \ + tests/libpspp/sparse-xarray-test.c +tests_libpspp_sparse_xarray_test_LDADD = gl/libgl.la @LIBINTL@ +tests_libpspp_sparse_xarray_test_CPPFLAGS = $(AM_CPPFLAGS) -DASSERT_LEVEL=10 + tests_formats_inexactify_SOURCES = tests/formats/inexactify.c noinst_PROGRAMS += tests/dissect-sysfile @@@ -327,7 -271,7 +328,7 @@@ tests_dissect_sysfile_SOURCES = src/libpspp/integer-format.c \ src/libpspp/float-format.c \ tests/dissect-sysfile.c -tests_dissect_sysfile_LDADD = gl/libgl.la @LIBINTL@ +tests_dissect_sysfile_LDADD = gl/libgl.la @LIBINTL@ tests_dissect_sysfile_CPPFLAGS = $(AM_CPPFLAGS) -DINSTALLDIR=\"$(bindir)\" EXTRA_DIST += \ @@@ -370,12 -314,11 +371,12 @@@ CLEANFILES += *.save pspp.* foo* -DIST_HOOKS += check-for-export-var-val check-for-export-var-val: @if grep -q 'export .*=' $(dist_TESTS) ; then \ echo 'One or more tests contain non-portable "export VAR=val" syntax' ; \ false ; \ fi +DIST_HOOKS += check-for-export-var-val + EXTRA_DIST += tests/OChangeLog