From: John Darrington Date: Mon, 2 Mar 2009 22:12:26 +0000 (+0900) Subject: Merge branch 'master'; commit 'origin/stable' X-Git-Tag: v0.7.3~263 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8af88c0b7ea2fe75df7e45497988ed0371006a86;hp=-c;p=pspp-builds.git Merge branch 'master'; commit 'origin/stable' Conflicts: src/math/linreg.h src/ui/gui/psppire-data-editor.c --- 8af88c0b7ea2fe75df7e45497988ed0371006a86 diff --combined src/data/sys-file-reader.c index af09189e,e8349abf..84d7f83c --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@@ -34,7 -34,6 +34,7 @@@ #include #include +#include #include #include #include @@@ -99,11 -98,9 +99,11 @@@ static struct variable *lookup_var_by_v struct variable **, int value_idx); +static void sys_msg (struct sfm_reader *r, int class, + const char *format, va_list args) + PRINTF_FORMAT (3, 0); static void sys_warn (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3); - static void sys_error (struct sfm_reader *, const char *, ...) PRINTF_FORMAT (2, 3) NO_RETURN; @@@ -115,23 -112,15 +115,23 @@@ static double read_float (struct sfm_re static void read_string (struct sfm_reader *, char *, size_t); static void skip_bytes (struct sfm_reader *, size_t); -static struct variable_to_value_map *open_variable_to_value_map ( - struct sfm_reader *, size_t size); -static void close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *); -static bool read_variable_to_value_map (struct sfm_reader *, - struct dictionary *, - struct variable_to_value_map *, - struct variable **var, char **value, - int *warning_cnt); +static struct text_record *open_text_record (struct sfm_reader *, size_t size); +static void close_text_record (struct sfm_reader *r, + struct text_record *); +static bool read_variable_to_value_pair (struct sfm_reader *, + struct dictionary *, + struct text_record *, + struct variable **var, char **value); +static void text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) + PRINTF_FORMAT (3, 4); +static char *text_get_token (struct text_record *, + struct substring delimiters); +static bool text_match (struct text_record *, char c); +static bool text_read_short_name (struct sfm_reader *, struct dictionary *, + struct text_record *, + struct substring delimiters, + struct variable **); static bool close_reader (struct sfm_reader *r); @@@ -174,12 -163,7 +174,12 @@@ static void read_long_var_name_map (str static void read_long_string_map (struct sfm_reader *, size_t size, size_t count, struct dictionary *); - +static void read_data_file_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); +static void read_variable_attributes (struct sfm_reader *, + size_t size, size_t count, + struct dictionary *); /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. @@@ -744,7 -728,11 +744,11 @@@ read_extension_record (struct sfm_reade break; case 7: - /* Unknown purpose. */ + /* Used by the MRSETS command. */ + break; + + case 8: + /* Used by the SPSS Data Entry software. */ break; case 11: @@@ -764,12 -752,9 +768,12 @@@ break; case 17: - /* Text field that defines variable attributes. New in - SPSS 14. */ - break; + read_data_file_attributes (r, size, count, dict); + return; + + case 18: + read_variable_attributes (r, size, count, dict); + return; case 20: /* New in SPSS 16. Contains a single string that describes @@@ -784,7 -769,8 +788,8 @@@ break; default: - sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype); + sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"), + subtype, PACKAGE_BUGREPORT); break; } @@@ -946,12 -932,14 +951,12 @@@ static voi read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *long_name; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &long_name, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &long_name)) { char **short_names; size_t short_name_cnt; @@@ -997,7 -985,7 +1002,7 @@@ } free (short_names); } - close_variable_to_value_map (r, map); + close_text_record (r, text); r->has_long_var_names = true; } @@@ -1007,12 -995,14 +1012,12 @@@ static voi read_long_string_map (struct sfm_reader *r, size_t size, size_t count, struct dictionary *dict) { - struct variable_to_value_map *map; + struct text_record *text; struct variable *var; char *length_s; - int warning_cnt = 0; - map = open_variable_to_value_map (r, size * count); - while (read_variable_to_value_map (r, dict, map, &var, &length_s, - &warning_cnt)) + text = open_text_record (r, size * count); + while (read_variable_to_value_pair (r, dict, text, &var, &length_s)) { size_t idx = var_get_dict_index (var); long int length; @@@ -1060,7 -1050,7 +1065,7 @@@ dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1); var_set_width (var, length); } - close_variable_to_value_map (r, map); + close_text_record (r, text); dict_compact_values (dict); } @@@ -1198,96 -1188,6 +1203,96 @@@ read_value_labels (struct sfm_reader *r pool_destroy (subpool); } + +/* Reads a set of custom attributes from TEXT into ATTRS. + ATTRS may be a null pointer, in which case the attributes are + read but discarded. */ +static void +read_attributes (struct sfm_reader *r, struct text_record *text, + struct attrset *attrs) +{ + do + { + struct attribute *attr; + char *key; + int index; + + /* Parse the key. */ + key = text_get_token (text, ss_cstr ("(")); + if (key == NULL) + return; + + attr = attribute_create (key); + for (index = 1; ; index++) + { + /* Parse the value. */ + char *value; + size_t length; + + value = text_get_token (text, ss_cstr ("\n")); + if (value == NULL) + { + text_warn (r, text, _("Error parsing attribute value %s[%d]"), + key, index); + break; + } + + length = strlen (value); + if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'') + { + value[length - 1] = '\0'; + attribute_add_value (attr, value + 1); + } + else + { + text_warn (r, text, + _("Attribute value %s[%d] is not quoted: %s"), + key, index, value); + attribute_add_value (attr, value); + } + + /* Was this the last value for this attribute? */ + if (text_match (text, ')')) + break; + } + if (attrs != NULL) + attrset_add (attrs, attr); + else + attribute_destroy (attr); + } + while (!text_match (text, '/')); +} + +/* Reads record type 7, subtype 17, which lists custom + attributes on the data file. */ +static void +read_data_file_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + read_attributes (r, text, dict_get_attributes (dict)); + close_text_record (r, text); +} + +/* Reads record type 7, subtype 18, which lists custom + attributes on individual variables. */ +static void +read_variable_attributes (struct sfm_reader *r, + size_t size, size_t count, + struct dictionary *dict) +{ + struct text_record *text = open_text_record (r, size * count); + for (;;) + { + struct variable *var; + if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var)) + break; + read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL); + } + close_text_record (r, text); +} + /* Case reader. */ @@@ -1304,24 -1204,24 +1309,24 @@@ static bool read_compressed_string (str static bool read_whole_strings (struct sfm_reader *, char *, size_t); static bool skip_whole_strings (struct sfm_reader *, size_t); -/* Reads one case from READER's file into C. Returns true only - if successful. */ -static bool -sys_file_casereader_read (struct casereader *reader, void *r_, - struct ccase *c) +/* Reads and returns one case from READER's file. Returns a null + pointer if not successful. */ +static struct ccase * +sys_file_casereader_read (struct casereader *reader, void *r_) { struct sfm_reader *r = r_; + struct ccase *volatile c; int i; if (r->error) - return false; + return NULL; - case_create (c, r->value_cnt); + c = case_create (r->value_cnt); if (setjmp (r->bail_out)) { casereader_force_error (reader); - case_destroy (c); - return false; + case_unref (c); + return NULL; } for (i = 0; i < r->sfm_var_cnt; i++) @@@ -1342,15 -1242,15 +1347,15 @@@ partial_record (r); } } - return true; + return c; eof: - case_destroy (c); + case_unref (c); if (i != 0) partial_record (r); if (r->case_cnt != -1) read_error (reader, r); - return false; + return NULL; } /* Issues an error that R ends in a partial record. */ @@@ -1623,124 -1523,82 +1628,124 @@@ lookup_var_by_short_name (struct dictio return NULL; } -/* Helpers for reading records that contain "variable=value" - pairs. */ +/* Helpers for reading records that contain structured text + strings. */ + +/* Maximum number of warnings to issue for a single text + record. */ +#define MAX_TEXT_WARNINGS 5 /* State. */ -struct variable_to_value_map +struct text_record { struct substring buffer; /* Record contents. */ size_t pos; /* Current position in buffer. */ + int n_warnings; /* Number of warnings issued or suppressed. */ }; -/* Reads SIZE bytes into a "variable=value" map for R, - and returns the map. */ -static struct variable_to_value_map * -open_variable_to_value_map (struct sfm_reader *r, size_t size) +/* Reads SIZE bytes into a text record for R, + and returns the new text record. */ +static struct text_record * +open_text_record (struct sfm_reader *r, size_t size) { - struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map); + struct text_record *text = pool_alloc (r->pool, sizeof *text); char *buffer = pool_malloc (r->pool, size + 1); read_bytes (r, buffer, size); - map->buffer = ss_buffer (buffer, size); - map->pos = 0; - return map; + text->buffer = ss_buffer (buffer, size); + text->pos = 0; + text->n_warnings = 0; + return text; } -/* Closes MAP and frees its storage. - Not really needed, because the pool will free the map anyway, - but can be used to free it earlier. */ +/* Closes TEXT, frees its storage, and issues a final warning + about suppressed warnings if necesary. */ static void -close_variable_to_value_map (struct sfm_reader *r, - struct variable_to_value_map *map) +close_text_record (struct sfm_reader *r, struct text_record *text) { - pool_free (r->pool, ss_data (map->buffer)); + if (text->n_warnings > MAX_TEXT_WARNINGS) + sys_warn (r, _("Suppressed %d additional related warnings."), + text->n_warnings - MAX_TEXT_WARNINGS); + pool_free (r->pool, ss_data (text->buffer)); } -/* Reads the next variable=value pair from MAP. +/* Reads a variable=value pair from TEXT. Looks up the variable in DICT and stores it into *VAR. Stores a null-terminated value into *VALUE. */ static bool -read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict, - struct variable_to_value_map *map, - struct variable **var, char **value, - int *warning_cnt) +read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, + struct variable **var, char **value) { - int max_warnings = 5; - for (;;) { - struct substring short_name_ss, value_ss; + if (!text_read_short_name (r, dict, text, ss_cstr ("="), var)) + return false; + + *value = text_get_token (text, ss_buffer ("\t\0", 2)); + if (*value == NULL) + return false; - if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss) - || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos, - &value_ss)) - { - if (*warning_cnt > max_warnings) - sys_warn (r, _("Suppressed %d additional variable map warnings."), - *warning_cnt - max_warnings); - return false; - } + text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX), + ss_buffer ("\t\0", 2)); - map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX), - ss_buffer ("\t\0", 2)); + if (*var != NULL) + return true; + } +} - ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0'; - *var = lookup_var_by_short_name (dict, ss_data (short_name_ss)); - if (*var == NULL) - { - if (++*warning_cnt <= max_warnings) - sys_warn (r, _("Variable map refers to unknown variable %s."), - ss_data (short_name_ss)); - continue; - } +static bool +text_read_short_name (struct sfm_reader *r, struct dictionary *dict, + struct text_record *text, struct substring delimiters, + struct variable **var) +{ + char *short_name = text_get_token (text, delimiters); + if (short_name == NULL) + return false; - ss_data (value_ss)[ss_length (value_ss)] = '\0'; - *value = ss_data (value_ss); + *var = lookup_var_by_short_name (dict, short_name); + if (*var == NULL) + text_warn (r, text, _("Variable map refers to unknown variable %s."), + short_name); + return true; +} + +/* Displays a warning for the current file position, limiting the + number to MAX_TEXT_WARNINGS for TEXT. */ +static void +text_warn (struct sfm_reader *r, struct text_record *text, + const char *format, ...) +{ + if (text->n_warnings++ < MAX_TEXT_WARNINGS) + { + va_list args; + + va_start (args, format); + sys_msg (r, MW, format, args); + va_end (args); + } +} +static char * +text_get_token (struct text_record *text, struct substring delimiters) +{ + struct substring token; + + if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token)) + return NULL; + ss_data (token)[ss_length (token)] = '\0'; + return ss_data (token); +} + +static bool +text_match (struct text_record *text, char c) +{ + if (text->buffer.string[text->pos] == c) + { + text->pos++; return true; } + else + return false; } /* Messages. */ diff --combined src/libpspp/str.c index f054c9ef,552968b5..3cff0492 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@@ -1,5 -1,5 +1,5 @@@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@@ -26,6 -26,7 +26,7 @@@ #include #include + #include #include "minmax.h" #include "xalloc.h" #include "xsize.h" @@@ -194,18 -195,15 +195,18 @@@ buf_copy_rpad (char *dst, size_t dst_si void str_copy_rpad (char *dst, size_t dst_size, const char *src) { - size_t src_len = strlen (src); - if (src_len < dst_size - 1) + if (dst_size > 0) { - memcpy (dst, src, src_len); - memset (&dst[src_len], ' ', dst_size - 1 - src_len); + size_t src_len = strlen (src); + if (src_len < dst_size - 1) + { + memcpy (dst, src, src_len); + memset (&dst[src_len], ' ', dst_size - 1 - src_len); + } + else + memcpy (dst, src, dst_size - 1); + dst[dst_size - 1] = 0; } - else - memcpy (dst, src, dst_size - 1); - dst[dst_size - 1] = 0; } /* Copies SRC to DST, which is in a buffer DST_SIZE bytes long. @@@ -1190,42 -1188,48 +1191,42 @@@ ds_cstr (const struct string *st_ return st->ss.string; } -/* Appends to ST a newline-terminated line read from STREAM, but - no more than MAX_LENGTH characters. - Newline is the last character of ST on return, if encountering - a newline was the reason for terminating. - Returns true if at least one character was read from STREAM - and appended to ST, false if no characters at all were read - before an I/O error or end of file was encountered (or - MAX_LENGTH was 0). */ +/* Reads characters from STREAM and appends them to ST, stopping + after MAX_LENGTH characters, after appending a newline, or + after an I/O error or end of file was encountered, whichever + comes first. Returns true if at least one character was added + to ST, false if no characters were read before an I/O error or + end of file (or if MAX_LENGTH was 0). + + This function accepts LF, CR LF, and CR sequences as new-line, + and translates each of them to a single '\n' new-line + character in ST. */ bool ds_read_line (struct string *st, FILE *stream, size_t max_length) { - if (!st->ss.length && max_length == SIZE_MAX) - { - size_t capacity = st->capacity ? st->capacity + 1 : 0; - ssize_t n = getline (&st->ss.string, &capacity, stream); - if (capacity) - st->capacity = capacity - 1; - if (n > 0) - { - st->ss.length = n; - return true; - } - else - return false; - } - else + size_t length; + + for (length = 0; length < max_length; length++) { - size_t length; + int c = getc (stream); + if (c == EOF) + break; - for (length = 0; length < max_length; length++) + if (c == '\r') { - int c = getc (stream); - if (c == EOF) - break; - - ds_put_char (st, c); - if (c == '\n') - return true; + c = getc (stream); + if (c != '\n') + { + ungetc (c, stream); + c = '\n'; + } } - - return length > 0; + ds_put_char (st, c); + if (c == '\n') + return true; } + + return length > 0; } /* Removes a comment introduced by `#' from ST, @@@ -1393,3 -1397,20 +1394,20 @@@ ds_put_char_multiple (struct string *st { memset (ds_put_uninit (st, cnt), ch, cnt); } + + + /* If relocation has been enabled, replace ST, + with its relocated version */ + void + ds_relocate (struct string *st) + { + const char *orig = ds_cstr (st); + const char *rel = relocate (orig); + + if ( orig != rel) + { + ds_clear (st); + ds_put_cstr (st, rel); + free ((char *) rel); + } + } diff --combined src/math/linreg.c index f5ae33e5,609a78b6..811f9d23 --- a/src/math/linreg.c +++ b/src/math/linreg.c @@@ -24,6 -24,7 +24,6 @@@ #include #include #include -#include #include #include #include @@@ -137,15 -138,12 +137,15 @@@ pspp_linreg_get_vars (const void *c_, c independent variables. */ pspp_linreg_cache * -pspp_linreg_cache_alloc (size_t n, size_t p) +pspp_linreg_cache_alloc (const struct variable *depvar, const struct variable **indep_vars, + size_t n, size_t p) { + size_t i; pspp_linreg_cache *c; c = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache)); - c->depvar = NULL; + c->depvar = depvar; + c->indep_vars = indep_vars; c->indep_means = gsl_vector_alloc (p); c->indep_std = gsl_vector_alloc (p); c->ssx = gsl_vector_alloc (p); /* Sums of squares for the @@@ -154,22 -152,9 +154,22 @@@ c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the model parameters. */ - c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */ c->n_obs = n; c->n_indeps = p; + c->n_coeffs = 0; + for (i = 0; i < p; i++) + { + if (var_is_numeric (indep_vars[i])) + { + c->n_coeffs++; + } + else + { + c->n_coeffs += cat_get_n_categories (indep_vars[i]) - 1; + } + } + + c->cov = gsl_matrix_alloc (c->n_coeffs + 1, c->n_coeffs + 1); /* Default settings. */ @@@ -209,12 -194,13 +209,12 @@@ pspp_linreg_cache_free (void *m return true; } static void -cache_init (pspp_linreg_cache *cache, const struct design_matrix *dm) +cache_init (pspp_linreg_cache *cache) { assert (cache != NULL); cache->dft = cache->n_obs - 1; cache->dfm = cache->n_indeps; cache->dfe = cache->dft - cache->dfm; - cache->n_coeffs = dm->m->size2; cache->intercept = 0.0; } @@@ -334,8 -320,7 +334,8 @@@ pspp_linreg (const gsl_vector * Y, cons cache->depvar_std = s; cache->sst = ss; } - cache_init (cache, dm); + cache_init (cache); + cache->n_coeffs = dm->m->size2; for (i = 0; i < dm->m->size2; i++) { if (opts->get_indep_mean_std[i]) @@@ -660,81 -645,100 +660,81 @@@ void pspp_linreg_set_indep_variable_mea only variables in the model are in the covariance matrix. */ static struct design_matrix * -rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache *c) +rearrange_covariance_matrix (const struct covariance_matrix *cm, pspp_linreg_cache *c) { - struct variable **v; - struct variable **model_vars; - struct variable *tmp; + const struct variable **model_vars; + struct design_matrix *cov; struct design_matrix *result; - int n_vars; - int found; - size_t *columns; + size_t *permutation; size_t i; size_t j; size_t k; - size_t dep_col; + assert (cm != NULL); + cov = covariance_to_design (cm); assert (cov != NULL); assert (c != NULL); assert (cov->m->size1 > 0); assert (cov->m->size2 == cov->m->size1); - v = xnmalloc (c->n_coeffs, sizeof (*v)); - model_vars = xnmalloc (c->n_coeffs, sizeof (*model_vars)); - columns = xnmalloc (cov->m->size2, sizeof (*columns)); - n_vars = pspp_linreg_get_vars (c, (const struct variable **) v); - dep_col = 0; - k = 0; - for (i = 0; i < cov->m->size2; i++) - { - tmp = design_matrix_col_to_var (cov, i); - found = 0; - j = 0; - while (!found && j < n_vars) - { - if (tmp == v[j]) - { - found = 1; - if (tmp == c->depvar) - { - dep_col = j; - } - else - { - columns[k] = j; - k++; - } - } - j++; - } - } - k++; - columns[k] = dep_col; - /* - K should now be equal to C->N_INDEPS + 1. If it is not, then - either the code above is wrong or the caller didn't send us the - correct values in C. - */ - assert (k == c->n_indeps + 1); + permutation = xnmalloc (1 + c->n_indeps, sizeof (*permutation)); + model_vars = xnmalloc (1 + c->n_indeps, sizeof (*model_vars)); + /* Put the model variables in the right order in MODEL_VARS. */ - for (i = 0; i < k; i++) + for (i = 0; i < c->n_indeps; i++) { - model_vars[i] = v[columns[i]]; + model_vars[i] = c->indep_vars[i]; } - - result = covariance_matrix_create (k, model_vars); - for (i = 0; i < result->m->size1; i++) + model_vars[i] = c->depvar; + result = covariance_matrix_create (1 + c->n_indeps, model_vars); + for (j = 0; j < cov->m->size2; j++) { - for (j = 0; j < result->m->size2; j++) + k = 0; + while (k < result->m->size2) { - gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, columns[i], columns[j])); + if (design_matrix_col_to_var (cov, j) == design_matrix_col_to_var (result, k)) + { + permutation[k] = j; + } + k++; } } - free (columns); - free (v); + for (i = 0; i < result->m->size1; i++) + for (j = 0; j < result->m->size2; j++) + { + gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, permutation[i], permutation[j])); + } + free (permutation); + free (model_vars); return result; } /* Estimate the model parameters from the covariance matrix only. This method uses less memory than PSPP_LINREG, which requires the entire data set to be stored in memory. + + The function assumes FULL_COV may contain columns corresponding to + variables that are not in the model. It fixes this in + REARRANG_COVARIANCE_MATRIX. This allows the caller to compute a + large covariance matrix once before, then pass it to this without + having to alter it. The problem is that this means the caller must + set CACHE->N_COEFFS. */ -int -pspp_linreg_with_cov (const struct design_matrix *full_cov, +void +pspp_linreg_with_cov (const struct covariance_matrix *full_cov, pspp_linreg_cache * cache) { struct design_matrix *cov; - assert (cov != NULL); + assert (full_cov != NULL); assert (cache != NULL); cov = rearrange_covariance_matrix (full_cov, cache); - cache_init (cache, cov); + cache_init (cache); reg_sweep (cov->m); post_sweep_computations (cache, cov, cov->m); - covariance_matrix_destroy (cov); + design_matrix_destroy (cov); } double pspp_linreg_mse (const pspp_linreg_cache *c) @@@ -742,4 -746,3 +742,3 @@@ assert (c != NULL); return (c->sse / c->dfe); } -