#include <libpspp/hash.h>
#include <libpspp/array.h>
+#include <data/attributes.h>
#include <data/case.h>
#include <data/casereader-provider.h>
#include <data/casereader.h>
struct variable **,
int value_idx);
+static void sys_msg (struct sfm_reader *r, int class,
+ const char *format, va_list args)
+ PRINTF_FORMAT (3, 0);
static void sys_warn (struct sfm_reader *, const char *, ...)
PRINTF_FORMAT (2, 3);
-
static void sys_error (struct sfm_reader *, const char *, ...)
PRINTF_FORMAT (2, 3)
NO_RETURN;
static void read_string (struct sfm_reader *, char *, size_t);
static void skip_bytes (struct sfm_reader *, size_t);
-static struct variable_to_value_map *open_variable_to_value_map (
- struct sfm_reader *, size_t size);
-static void close_variable_to_value_map (struct sfm_reader *r,
- struct variable_to_value_map *);
-static bool read_variable_to_value_map (struct sfm_reader *,
- struct dictionary *,
- struct variable_to_value_map *,
- struct variable **var, char **value,
- int *warning_cnt);
+static struct text_record *open_text_record (struct sfm_reader *, size_t size);
+static void close_text_record (struct sfm_reader *r,
+ struct text_record *);
+static bool read_variable_to_value_pair (struct sfm_reader *,
+ struct dictionary *,
+ struct text_record *,
+ struct variable **var, char **value);
+static void text_warn (struct sfm_reader *r, struct text_record *text,
+ const char *format, ...)
+ PRINTF_FORMAT (3, 4);
+static char *text_get_token (struct text_record *,
+ struct substring delimiters);
+static bool text_match (struct text_record *, char c);
+static bool text_read_short_name (struct sfm_reader *, struct dictionary *,
+ struct text_record *,
+ struct substring delimiters,
+ struct variable **);
static bool close_reader (struct sfm_reader *r);
\f
static void read_long_string_map (struct sfm_reader *,
size_t size, size_t count,
struct dictionary *);
-
+static void read_data_file_attributes (struct sfm_reader *,
+ size_t size, size_t count,
+ struct dictionary *);
+static void read_variable_attributes (struct sfm_reader *,
+ size_t size, size_t count,
+ struct dictionary *);
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
break;
case 7:
- /* Unknown purpose. */
+ /* Used by the MRSETS command. */
+ break;
+
+ case 8:
+ /* Used by the SPSS Data Entry software. */
break;
case 11:
break;
case 17:
- /* Text field that defines variable attributes. New in
- SPSS 14. */
- break;
+ read_data_file_attributes (r, size, count, dict);
+ return;
+
+ case 18:
+ read_variable_attributes (r, size, count, dict);
+ return;
case 20:
/* New in SPSS 16. Contains a single string that describes
break;
default:
- sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
+ sys_warn (r, _("Unrecognized record type 7, subtype %d. Please send a copy of this file, and the syntax which created it to %s"),
+ subtype, PACKAGE_BUGREPORT);
break;
}
read_long_var_name_map (struct sfm_reader *r, size_t size, size_t count,
struct dictionary *dict)
{
- struct variable_to_value_map *map;
+ struct text_record *text;
struct variable *var;
char *long_name;
- int warning_cnt = 0;
- map = open_variable_to_value_map (r, size * count);
- while (read_variable_to_value_map (r, dict, map, &var, &long_name,
- &warning_cnt))
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (r, dict, text, &var, &long_name))
{
char **short_names;
size_t short_name_cnt;
}
free (short_names);
}
- close_variable_to_value_map (r, map);
+ close_text_record (r, text);
r->has_long_var_names = true;
}
read_long_string_map (struct sfm_reader *r, size_t size, size_t count,
struct dictionary *dict)
{
- struct variable_to_value_map *map;
+ struct text_record *text;
struct variable *var;
char *length_s;
- int warning_cnt = 0;
- map = open_variable_to_value_map (r, size * count);
- while (read_variable_to_value_map (r, dict, map, &var, &length_s,
- &warning_cnt))
+ text = open_text_record (r, size * count);
+ while (read_variable_to_value_pair (r, dict, text, &var, &length_s))
{
size_t idx = var_get_dict_index (var);
long int length;
dict_delete_consecutive_vars (dict, idx + 1, segment_cnt - 1);
var_set_width (var, length);
}
- close_variable_to_value_map (r, map);
+ close_text_record (r, text);
dict_compact_values (dict);
}
pool_destroy (subpool);
}
+
+/* Reads a set of custom attributes from TEXT into ATTRS.
+ ATTRS may be a null pointer, in which case the attributes are
+ read but discarded. */
+static void
+read_attributes (struct sfm_reader *r, struct text_record *text,
+ struct attrset *attrs)
+{
+ do
+ {
+ struct attribute *attr;
+ char *key;
+ int index;
+
+ /* Parse the key. */
+ key = text_get_token (text, ss_cstr ("("));
+ if (key == NULL)
+ return;
+
+ attr = attribute_create (key);
+ for (index = 1; ; index++)
+ {
+ /* Parse the value. */
+ char *value;
+ size_t length;
+
+ value = text_get_token (text, ss_cstr ("\n"));
+ if (value == NULL)
+ {
+ text_warn (r, text, _("Error parsing attribute value %s[%d]"),
+ key, index);
+ break;
+ }
+
+ length = strlen (value);
+ if (length >= 2 && value[0] == '\'' && value[length - 1] == '\'')
+ {
+ value[length - 1] = '\0';
+ attribute_add_value (attr, value + 1);
+ }
+ else
+ {
+ text_warn (r, text,
+ _("Attribute value %s[%d] is not quoted: %s"),
+ key, index, value);
+ attribute_add_value (attr, value);
+ }
+
+ /* Was this the last value for this attribute? */
+ if (text_match (text, ')'))
+ break;
+ }
+ if (attrs != NULL)
+ attrset_add (attrs, attr);
+ else
+ attribute_destroy (attr);
+ }
+ while (!text_match (text, '/'));
+}
+
+/* Reads record type 7, subtype 17, which lists custom
+ attributes on the data file. */
+static void
+read_data_file_attributes (struct sfm_reader *r,
+ size_t size, size_t count,
+ struct dictionary *dict)
+{
+ struct text_record *text = open_text_record (r, size * count);
+ read_attributes (r, text, dict_get_attributes (dict));
+ close_text_record (r, text);
+}
+
+/* Reads record type 7, subtype 18, which lists custom
+ attributes on individual variables. */
+static void
+read_variable_attributes (struct sfm_reader *r,
+ size_t size, size_t count,
+ struct dictionary *dict)
+{
+ struct text_record *text = open_text_record (r, size * count);
+ for (;;)
+ {
+ struct variable *var;
+ if (!text_read_short_name (r, dict, text, ss_cstr (":"), &var))
+ break;
+ read_attributes (r, text, var != NULL ? var_get_attributes (var) : NULL);
+ }
+ close_text_record (r, text);
+}
+
\f
/* Case reader. */
static bool read_whole_strings (struct sfm_reader *, char *, size_t);
static bool skip_whole_strings (struct sfm_reader *, size_t);
-/* Reads one case from READER's file into C. Returns true only
- if successful. */
-static bool
-sys_file_casereader_read (struct casereader *reader, void *r_,
- struct ccase *c)
+/* Reads and returns one case from READER's file. Returns a null
+ pointer if not successful. */
+static struct ccase *
+sys_file_casereader_read (struct casereader *reader, void *r_)
{
struct sfm_reader *r = r_;
+ struct ccase *volatile c;
int i;
if (r->error)
- return false;
+ return NULL;
- case_create (c, r->value_cnt);
+ c = case_create (r->value_cnt);
if (setjmp (r->bail_out))
{
casereader_force_error (reader);
- case_destroy (c);
- return false;
+ case_unref (c);
+ return NULL;
}
for (i = 0; i < r->sfm_var_cnt; i++)
partial_record (r);
}
}
- return true;
+ return c;
eof:
- case_destroy (c);
+ case_unref (c);
if (i != 0)
partial_record (r);
if (r->case_cnt != -1)
read_error (reader, r);
- return false;
+ return NULL;
}
/* Issues an error that R ends in a partial record. */
return NULL;
}
\f
-/* Helpers for reading records that contain "variable=value"
- pairs. */
+/* Helpers for reading records that contain structured text
+ strings. */
+
+/* Maximum number of warnings to issue for a single text
+ record. */
+#define MAX_TEXT_WARNINGS 5
/* State. */
-struct variable_to_value_map
+struct text_record
{
struct substring buffer; /* Record contents. */
size_t pos; /* Current position in buffer. */
+ int n_warnings; /* Number of warnings issued or suppressed. */
};
-/* Reads SIZE bytes into a "variable=value" map for R,
- and returns the map. */
-static struct variable_to_value_map *
-open_variable_to_value_map (struct sfm_reader *r, size_t size)
+/* Reads SIZE bytes into a text record for R,
+ and returns the new text record. */
+static struct text_record *
+open_text_record (struct sfm_reader *r, size_t size)
{
- struct variable_to_value_map *map = pool_alloc (r->pool, sizeof *map);
+ struct text_record *text = pool_alloc (r->pool, sizeof *text);
char *buffer = pool_malloc (r->pool, size + 1);
read_bytes (r, buffer, size);
- map->buffer = ss_buffer (buffer, size);
- map->pos = 0;
- return map;
+ text->buffer = ss_buffer (buffer, size);
+ text->pos = 0;
+ text->n_warnings = 0;
+ return text;
}
-/* Closes MAP and frees its storage.
- Not really needed, because the pool will free the map anyway,
- but can be used to free it earlier. */
+/* Closes TEXT, frees its storage, and issues a final warning
+ about suppressed warnings if necesary. */
static void
-close_variable_to_value_map (struct sfm_reader *r,
- struct variable_to_value_map *map)
+close_text_record (struct sfm_reader *r, struct text_record *text)
{
- pool_free (r->pool, ss_data (map->buffer));
+ if (text->n_warnings > MAX_TEXT_WARNINGS)
+ sys_warn (r, _("Suppressed %d additional related warnings."),
+ text->n_warnings - MAX_TEXT_WARNINGS);
+ pool_free (r->pool, ss_data (text->buffer));
}
-/* Reads the next variable=value pair from MAP.
+/* Reads a variable=value pair from TEXT.
Looks up the variable in DICT and stores it into *VAR.
Stores a null-terminated value into *VALUE. */
static bool
-read_variable_to_value_map (struct sfm_reader *r, struct dictionary *dict,
- struct variable_to_value_map *map,
- struct variable **var, char **value,
- int *warning_cnt)
+read_variable_to_value_pair (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text,
+ struct variable **var, char **value)
{
- int max_warnings = 5;
-
for (;;)
{
- struct substring short_name_ss, value_ss;
+ if (!text_read_short_name (r, dict, text, ss_cstr ("="), var))
+ return false;
+
+ *value = text_get_token (text, ss_buffer ("\t\0", 2));
+ if (*value == NULL)
+ return false;
- if (!ss_tokenize (map->buffer, ss_cstr ("="), &map->pos, &short_name_ss)
- || !ss_tokenize (map->buffer, ss_buffer ("\t\0", 2), &map->pos,
- &value_ss))
- {
- if (*warning_cnt > max_warnings)
- sys_warn (r, _("Suppressed %d additional variable map warnings."),
- *warning_cnt - max_warnings);
- return false;
- }
+ text->pos += ss_span (ss_substr (text->buffer, text->pos, SIZE_MAX),
+ ss_buffer ("\t\0", 2));
- map->pos += ss_span (ss_substr (map->buffer, map->pos, SIZE_MAX),
- ss_buffer ("\t\0", 2));
+ if (*var != NULL)
+ return true;
+ }
+}
- ss_data (short_name_ss)[ss_length (short_name_ss)] = '\0';
- *var = lookup_var_by_short_name (dict, ss_data (short_name_ss));
- if (*var == NULL)
- {
- if (++*warning_cnt <= max_warnings)
- sys_warn (r, _("Variable map refers to unknown variable %s."),
- ss_data (short_name_ss));
- continue;
- }
+static bool
+text_read_short_name (struct sfm_reader *r, struct dictionary *dict,
+ struct text_record *text, struct substring delimiters,
+ struct variable **var)
+{
+ char *short_name = text_get_token (text, delimiters);
+ if (short_name == NULL)
+ return false;
- ss_data (value_ss)[ss_length (value_ss)] = '\0';
- *value = ss_data (value_ss);
+ *var = lookup_var_by_short_name (dict, short_name);
+ if (*var == NULL)
+ text_warn (r, text, _("Variable map refers to unknown variable %s."),
+ short_name);
+ return true;
+}
+
+/* Displays a warning for the current file position, limiting the
+ number to MAX_TEXT_WARNINGS for TEXT. */
+static void
+text_warn (struct sfm_reader *r, struct text_record *text,
+ const char *format, ...)
+{
+ if (text->n_warnings++ < MAX_TEXT_WARNINGS)
+ {
+ va_list args;
+
+ va_start (args, format);
+ sys_msg (r, MW, format, args);
+ va_end (args);
+ }
+}
+static char *
+text_get_token (struct text_record *text, struct substring delimiters)
+{
+ struct substring token;
+
+ if (!ss_tokenize (text->buffer, delimiters, &text->pos, &token))
+ return NULL;
+ ss_data (token)[ss_length (token)] = '\0';
+ return ss_data (token);
+}
+
+static bool
+text_match (struct text_record *text, char c)
+{
+ if (text->buffer.string[text->pos] == c)
+ {
+ text->pos++;
return true;
}
+ else
+ return false;
}
\f
/* Messages. */
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <libpspp/message.h>
#include <libpspp/pool.h>
+ #include <relocatable.h>
#include "minmax.h"
#include "xalloc.h"
#include "xsize.h"
void
str_copy_rpad (char *dst, size_t dst_size, const char *src)
{
- size_t src_len = strlen (src);
- if (src_len < dst_size - 1)
+ if (dst_size > 0)
{
- memcpy (dst, src, src_len);
- memset (&dst[src_len], ' ', dst_size - 1 - src_len);
+ size_t src_len = strlen (src);
+ if (src_len < dst_size - 1)
+ {
+ memcpy (dst, src, src_len);
+ memset (&dst[src_len], ' ', dst_size - 1 - src_len);
+ }
+ else
+ memcpy (dst, src, dst_size - 1);
+ dst[dst_size - 1] = 0;
}
- else
- memcpy (dst, src, dst_size - 1);
- dst[dst_size - 1] = 0;
}
/* Copies SRC to DST, which is in a buffer DST_SIZE bytes long.
return st->ss.string;
}
-/* Appends to ST a newline-terminated line read from STREAM, but
- no more than MAX_LENGTH characters.
- Newline is the last character of ST on return, if encountering
- a newline was the reason for terminating.
- Returns true if at least one character was read from STREAM
- and appended to ST, false if no characters at all were read
- before an I/O error or end of file was encountered (or
- MAX_LENGTH was 0). */
+/* Reads characters from STREAM and appends them to ST, stopping
+ after MAX_LENGTH characters, after appending a newline, or
+ after an I/O error or end of file was encountered, whichever
+ comes first. Returns true if at least one character was added
+ to ST, false if no characters were read before an I/O error or
+ end of file (or if MAX_LENGTH was 0).
+
+ This function accepts LF, CR LF, and CR sequences as new-line,
+ and translates each of them to a single '\n' new-line
+ character in ST. */
bool
ds_read_line (struct string *st, FILE *stream, size_t max_length)
{
- if (!st->ss.length && max_length == SIZE_MAX)
- {
- size_t capacity = st->capacity ? st->capacity + 1 : 0;
- ssize_t n = getline (&st->ss.string, &capacity, stream);
- if (capacity)
- st->capacity = capacity - 1;
- if (n > 0)
- {
- st->ss.length = n;
- return true;
- }
- else
- return false;
- }
- else
+ size_t length;
+
+ for (length = 0; length < max_length; length++)
{
- size_t length;
+ int c = getc (stream);
+ if (c == EOF)
+ break;
- for (length = 0; length < max_length; length++)
+ if (c == '\r')
{
- int c = getc (stream);
- if (c == EOF)
- break;
-
- ds_put_char (st, c);
- if (c == '\n')
- return true;
+ c = getc (stream);
+ if (c != '\n')
+ {
+ ungetc (c, stream);
+ c = '\n';
+ }
}
-
- return length > 0;
+ ds_put_char (st, c);
+ if (c == '\n')
+ return true;
}
+
+ return length > 0;
}
/* Removes a comment introduced by `#' from ST,
{
memset (ds_put_uninit (st, cnt), ch, cnt);
}
+
+
+ /* If relocation has been enabled, replace ST,
+ with its relocated version */
+ void
+ ds_relocate (struct string *st)
+ {
+ const char *orig = ds_cstr (st);
+ const char *rel = relocate (orig);
+
+ if ( orig != rel)
+ {
+ ds_clear (st);
+ ds_put_cstr (st, rel);
+ free ((char *) rel);
+ }
+ }
#include <math/coefficient.h>
#include <math/linreg.h>
#include <math/coefficient.h>
-#include <math/covariance-matrix.h>
#include <math/design-matrix.h>
#include <src/data/category.h>
#include <src/data/variable.h>
independent variables.
*/
pspp_linreg_cache *
-pspp_linreg_cache_alloc (size_t n, size_t p)
+pspp_linreg_cache_alloc (const struct variable *depvar, const struct variable **indep_vars,
+ size_t n, size_t p)
{
+ size_t i;
pspp_linreg_cache *c;
c = (pspp_linreg_cache *) malloc (sizeof (pspp_linreg_cache));
- c->depvar = NULL;
+ c->depvar = depvar;
+ c->indep_vars = indep_vars;
c->indep_means = gsl_vector_alloc (p);
c->indep_std = gsl_vector_alloc (p);
c->ssx = gsl_vector_alloc (p); /* Sums of squares for the
c->ss_indeps = gsl_vector_alloc (p); /* Sums of squares for the
model parameters.
*/
- c->cov = gsl_matrix_alloc (p + 1, p + 1); /* Covariance matrix. */
c->n_obs = n;
c->n_indeps = p;
+ c->n_coeffs = 0;
+ for (i = 0; i < p; i++)
+ {
+ if (var_is_numeric (indep_vars[i]))
+ {
+ c->n_coeffs++;
+ }
+ else
+ {
+ c->n_coeffs += cat_get_n_categories (indep_vars[i]) - 1;
+ }
+ }
+
+ c->cov = gsl_matrix_alloc (c->n_coeffs + 1, c->n_coeffs + 1);
/*
Default settings.
*/
return true;
}
static void
-cache_init (pspp_linreg_cache *cache, const struct design_matrix *dm)
+cache_init (pspp_linreg_cache *cache)
{
assert (cache != NULL);
cache->dft = cache->n_obs - 1;
cache->dfm = cache->n_indeps;
cache->dfe = cache->dft - cache->dfm;
- cache->n_coeffs = dm->m->size2;
cache->intercept = 0.0;
}
cache->depvar_std = s;
cache->sst = ss;
}
- cache_init (cache, dm);
+ cache_init (cache);
+ cache->n_coeffs = dm->m->size2;
for (i = 0; i < dm->m->size2; i++)
{
if (opts->get_indep_mean_std[i])
only variables in the model are in the covariance matrix.
*/
static struct design_matrix *
-rearrange_covariance_matrix (const struct design_matrix *cov, pspp_linreg_cache *c)
+rearrange_covariance_matrix (const struct covariance_matrix *cm, pspp_linreg_cache *c)
{
- struct variable **v;
- struct variable **model_vars;
- struct variable *tmp;
+ const struct variable **model_vars;
+ struct design_matrix *cov;
struct design_matrix *result;
- int n_vars;
- int found;
- size_t *columns;
+ size_t *permutation;
size_t i;
size_t j;
size_t k;
- size_t dep_col;
+ assert (cm != NULL);
+ cov = covariance_to_design (cm);
assert (cov != NULL);
assert (c != NULL);
assert (cov->m->size1 > 0);
assert (cov->m->size2 == cov->m->size1);
- v = xnmalloc (c->n_coeffs, sizeof (*v));
- model_vars = xnmalloc (c->n_coeffs, sizeof (*model_vars));
- columns = xnmalloc (cov->m->size2, sizeof (*columns));
- n_vars = pspp_linreg_get_vars (c, (const struct variable **) v);
- dep_col = 0;
- k = 0;
- for (i = 0; i < cov->m->size2; i++)
- {
- tmp = design_matrix_col_to_var (cov, i);
- found = 0;
- j = 0;
- while (!found && j < n_vars)
- {
- if (tmp == v[j])
- {
- found = 1;
- if (tmp == c->depvar)
- {
- dep_col = j;
- }
- else
- {
- columns[k] = j;
- k++;
- }
- }
- j++;
- }
- }
- k++;
- columns[k] = dep_col;
- /*
- K should now be equal to C->N_INDEPS + 1. If it is not, then
- either the code above is wrong or the caller didn't send us the
- correct values in C.
- */
- assert (k == c->n_indeps + 1);
+ permutation = xnmalloc (1 + c->n_indeps, sizeof (*permutation));
+ model_vars = xnmalloc (1 + c->n_indeps, sizeof (*model_vars));
+
/*
Put the model variables in the right order in MODEL_VARS.
*/
- for (i = 0; i < k; i++)
+ for (i = 0; i < c->n_indeps; i++)
{
- model_vars[i] = v[columns[i]];
+ model_vars[i] = c->indep_vars[i];
}
-
- result = covariance_matrix_create (k, model_vars);
- for (i = 0; i < result->m->size1; i++)
+ model_vars[i] = c->depvar;
+ result = covariance_matrix_create (1 + c->n_indeps, model_vars);
+ for (j = 0; j < cov->m->size2; j++)
{
- for (j = 0; j < result->m->size2; j++)
+ k = 0;
+ while (k < result->m->size2)
{
- gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, columns[i], columns[j]));
+ if (design_matrix_col_to_var (cov, j) == design_matrix_col_to_var (result, k))
+ {
+ permutation[k] = j;
+ }
+ k++;
}
}
- free (columns);
- free (v);
+ for (i = 0; i < result->m->size1; i++)
+ for (j = 0; j < result->m->size2; j++)
+ {
+ gsl_matrix_set (result->m, i, j, gsl_matrix_get (cov->m, permutation[i], permutation[j]));
+ }
+ free (permutation);
+ free (model_vars);
return result;
}
/*
Estimate the model parameters from the covariance matrix only. This
method uses less memory than PSPP_LINREG, which requires the entire
data set to be stored in memory.
+
+ The function assumes FULL_COV may contain columns corresponding to
+ variables that are not in the model. It fixes this in
+ REARRANG_COVARIANCE_MATRIX. This allows the caller to compute a
+ large covariance matrix once before, then pass it to this without
+ having to alter it. The problem is that this means the caller must
+ set CACHE->N_COEFFS.
*/
-int
-pspp_linreg_with_cov (const struct design_matrix *full_cov,
+void
+pspp_linreg_with_cov (const struct covariance_matrix *full_cov,
pspp_linreg_cache * cache)
{
struct design_matrix *cov;
- assert (cov != NULL);
+ assert (full_cov != NULL);
assert (cache != NULL);
cov = rearrange_covariance_matrix (full_cov, cache);
- cache_init (cache, cov);
+ cache_init (cache);
reg_sweep (cov->m);
post_sweep_computations (cache, cov, cov->m);
- covariance_matrix_destroy (cov);
+ design_matrix_destroy (cov);
}
double pspp_linreg_mse (const pspp_linreg_cache *c)
assert (c != NULL);
return (c->sse / c->dfe);
}
-