projects
/
pspp
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
work on lexer
[pspp]
/
src
/
data
/
pc+-file-reader.c
diff --git
a/src/data/pc+-file-reader.c
b/src/data/pc+-file-reader.c
index a127323c682d4d57b3be3f9c79a316af1eda92ab..73b9ea804b715d83332d4af132d0225884dc0e48 100644
(file)
--- a/
src/data/pc+-file-reader.c
+++ b/
src/data/pc+-file-reader.c
@@
-1,5
+1,5
@@
/* PSPP - a program for statistical analysis.
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-2000, 2006-2007, 2009-201
4
Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2007, 2009-201
6
Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@
-73,22
+73,26
@@
struct pcp_main_header
char creation_date[9]; /* "[m]m/dd/yy". */
char creation_time[9]; /* "[H]H:MM:SS". */
char file_label[65]; /* File label. */
char creation_date[9]; /* "[m]m/dd/yy". */
char creation_time[9]; /* "[H]H:MM:SS". */
char file_label[65]; /* File label. */
+ unsigned int weight_index; /* Index of weighting variable, 0 if none. */
};
struct pcp_var_record
{
unsigned int pos;
};
struct pcp_var_record
{
unsigned int pos;
+ bool drop;
+ union value tmp;
+
char name[9];
int width;
struct fmt_spec format;
uint8_t missing[8];
char *label;
char name[9];
int width;
struct fmt_spec format;
uint8_t missing[8];
char *label;
+ bool weight;
+
struct pcp_value_label *val_labs;
size_t n_val_labs;
struct pcp_value_label *val_labs;
size_t n_val_labs;
-
- struct variable *var;
};
struct pcp_value_label
};
struct pcp_value_label
@@
-187,11
+191,10
@@
static bool parse_variable_records (struct pcp_reader *, struct dictionary *,
static struct any_reader *
pcp_open (struct file_handle *fh)
{
static struct any_reader *
pcp_open (struct file_handle *fh)
{
- struct pcp_reader *r;
struct stat s;
/* Create and initialize reader. */
struct stat s;
/* Create and initialize reader. */
-
r = xzalloc (sizeof *
r);
+
struct pcp_reader *r = XZALLOC (struct pcp_reade
r);
r->any_reader.klass = &pcp_file_reader_class;
r->pool = pool_create ();
pool_register (r->pool, free, r);
r->any_reader.klass = &pcp_file_reader_class;
r->pool = pool_create ();
pool_register (r->pool, free, r);
@@
-206,7
+209,7
@@
pcp_open (struct file_handle *fh)
goto error;
/* Open file. */
goto error;
/* Open file. */
- r->file = fn_open (fh
_get_file_name (fh)
, "rb");
+ r->file = fn_open (fh, "rb");
if (r->file == NULL)
{
msg (ME, _("Error opening `%s' for reading as an SPSS/PC+ "
if (r->file == NULL)
{
msg (ME, _("Error opening `%s' for reading as an SPSS/PC+ "
@@
-218,13
+221,13
@@
pcp_open (struct file_handle *fh)
/* Fetch file size. */
if (fstat (fileno (r->file), &s))
{
/* Fetch file size. */
if (fstat (fileno (r->file), &s))
{
- pcp_error (
ME
, 0, _("%s: stat failed (%s)."),
+ pcp_error (
r
, 0, _("%s: stat failed (%s)."),
fh_get_file_name (r->fh), strerror (errno));
goto error;
}
if (s.st_size > UINT_MAX)
{
fh_get_file_name (r->fh), strerror (errno));
goto error;
}
if (s.st_size > UINT_MAX)
{
- pcp_error (
ME
, 0, _("%s: file too large."), fh_get_file_name (r->fh));
+ pcp_error (
r
, 0, _("%s: file too large."), fh_get_file_name (r->fh));
goto error;
}
r->file_size = s.st_size;
goto error;
}
r->file_size = s.st_size;
@@
-393,15
+396,7
@@
pcp_get_strings (const struct any_reader *r_, struct pool *pool,
return aux.n;
}
return aux.n;
}
-static void
-find_and_delete_var (struct dictionary *dict, const char *name)
-{
- struct variable *var = dict_lookup_var (dict, name);
- if (var)
- dict_delete_var (dict, var);
-}
-
-/* Decodes the dictionary read from R, saving it into into *DICT. Character
+/* Decodes the dictionary read from R, saving it into *DICT. Character
strings in R are decoded using ENCODING, or an encoding obtained from R if
ENCODING is null, or the locale encoding if R specifies no encoding.
strings in R are decoded using ENCODING, or an encoding obtained from R if
ENCODING is null, or the locale encoding if R specifies no encoding.
@@
-442,10
+437,6
@@
pcp_decode (struct any_reader *r_, const char *encoding,
dictionary and may destroy or modify its variables. */
r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
dictionary and may destroy or modify its variables. */
r->proto = caseproto_ref_pool (dict_get_proto (dict), r->pool);
- find_and_delete_var (dict, "CASENUM_");
- find_and_delete_var (dict, "DATE_");
- find_and_delete_var (dict, "WEIGHT_");
-
*dictp = dict;
if (infop)
{
*dictp = dict;
if (infop)
{
@@
-458,7
+449,7
@@
pcp_decode (struct any_reader *r_, const char *encoding,
error:
pcp_close (&r->any_reader);
error:
pcp_close (&r->any_reader);
- dict_
destroy
(dict);
+ dict_
unref
(dict);
*dictp = NULL;
return NULL;
}
*dictp = NULL;
return NULL;
}
@@
-475,7
+466,7
@@
pcp_close (struct any_reader *r_)
if (r->file)
{
if (r->file)
{
- if (fn_close (
fh_get_file_name (r->fh)
, r->file) == EOF)
+ if (fn_close (
r->fh
, r->file) == EOF)
{
msg (ME, _("Error closing system file `%s': %s."),
fh_get_file_name (r->fh), strerror (errno));
{
msg (ME, _("Error closing system file `%s': %s."),
fh_get_file_name (r->fh), strerror (errno));
@@
-502,18
+493,20
@@
pcp_file_casereader_destroy (struct casereader *reader UNUSED, void *r_)
pcp_close (&r->any_reader);
}
pcp_close (&r->any_reader);
}
-/*
Returns true if FILE is an SPSS/PC+ system file,
-
false otherwise
. */
+/*
Detects whether FILE is an SPSS/PC+ system file. Returns 1 if so, 0 if
+
not, and a negative errno value if there is an error reading FILE
. */
static int
pcp_detect (FILE *file)
{
static const char signature[4] = "SPSS";
char buf[sizeof signature];
static int
pcp_detect (FILE *file)
{
static const char signature[4] = "SPSS";
char buf[sizeof signature];
- if (fseek (file, 0x104, SEEK_SET)
- || (fread (buf, sizeof buf, 1, file) != 1 && !feof (file)))
+ if (fseek (file, 0x104, SEEK_SET))
return -errno;
return -errno;
+ if (fread (buf, sizeof buf, 1, file) != 1)
+ return ferror (file) ? -errno : 0;
+
return !memcmp (buf, signature, sizeof buf);
}
\f
return !memcmp (buf, signature, sizeof buf);
}
\f
@@
-524,8
+517,8
@@
static bool
read_main_header (struct pcp_reader *r, struct pcp_main_header *header)
{
unsigned int base_ofs = r->directory.main.ofs;
read_main_header (struct pcp_reader *r, struct pcp_main_header *header)
{
unsigned int base_ofs = r->directory.main.ofs;
+ unsigned int zero0, zero1, zero2, zero3;
size_t min_values, min_data_size;
size_t min_values, min_data_size;
- unsigned int zero0, zero1, zero2;
unsigned int one0, one1;
unsigned int compressed;
unsigned int n_cases1;
unsigned int one0, one1;
unsigned int compressed;
unsigned int n_cases1;
@@
-551,9
+544,11
@@
read_main_header (struct pcp_reader *r, struct pcp_main_header *header)
|| !read_uint16 (r, &one1)
|| !read_uint16 (r, &compressed)
|| !read_uint16 (r, &header->nominal_case_size)
|| !read_uint16 (r, &one1)
|| !read_uint16 (r, &compressed)
|| !read_uint16 (r, &header->nominal_case_size)
- || !read_uint32 (r, &r->n_cases)
+ || !read_uint16 (r, &r->n_cases)
+ || !read_uint16 (r, &header->weight_index)
|| !read_uint16 (r, &zero2)
|| !read_uint16 (r, &zero2)
- || !read_uint32 (r, &n_cases1)
+ || !read_uint16 (r, &n_cases1)
+ || !read_uint16 (r, &zero3)
|| !read_string (r, header->creation_date, sizeof header->creation_date)
|| !read_string (r, header->creation_time, sizeof header->creation_time)
|| !read_string (r, header->file_label, sizeof header->file_label))
|| !read_string (r, header->creation_date, sizeof header->creation_date)
|| !read_string (r, header->creation_time, sizeof header->creation_time)
|| !read_string (r, header->file_label, sizeof header->file_label))
@@
-565,10
+560,11
@@
read_main_header (struct pcp_reader *r, struct pcp_main_header *header)
pcp_warn (r, base_ofs, _("Record 0 specifies unexpected system missing "
"value %g (%a)."), d, d);
}
pcp_warn (r, base_ofs, _("Record 0 specifies unexpected system missing "
"value %g (%a)."), d, d);
}
- if (one0 != 1 || one1 != 1 || zero0 != 0 || zero1 != 0 || zero2 != 0)
+ if (one0 != 1 || one1 != 1
+ || zero0 != 0 || zero1 != 0 || zero2 != 0 || zero3 != 0)
pcp_warn (r, base_ofs, _("Record 0 reserved fields have unexpected values "
pcp_warn (r, base_ofs, _("Record 0 reserved fields have unexpected values "
- "(%u,%u,%u,%u,%u)."),
- one0, one1, zero0, zero1, zero2);
+ "(%u,%u,%u,%u,%u
,%u
)."),
+ one0, one1, zero0, zero1, zero2
, zero3
);
if (n_cases1 != r->n_cases)
pcp_warn (r, base_ofs, _("Record 0 case counts differ (%u versus %u)."),
r->n_cases, n_cases1);
if (n_cases1 != r->n_cases)
pcp_warn (r, base_ofs, _("Record 0 case counts differ (%u versus %u)."),
r->n_cases, n_cases1);
@@
-633,8
+629,9
@@
read_value_labels (struct pcp_reader *r, struct pcp_var_record *var,
uint8_t len;
if (var->n_val_labs >= allocated_val_labs)
uint8_t len;
if (var->n_val_labs >= allocated_val_labs)
- var->val_labs = x2nrealloc (var->val_labs, &allocated_val_labs,
- sizeof *var->val_labs);
+ var->val_labs = pool_2nrealloc (r->pool, var->val_labs,
+ &allocated_val_labs,
+ sizeof *var->val_labs);
vl = &var->val_labs[var->n_val_labs];
if (!read_bytes (r, vl->value, sizeof vl->value)
vl = &var->val_labs[var->n_val_labs];
if (!read_bytes (r, vl->value, sizeof vl->value)
@@
-701,6
+698,7
@@
static bool
read_variables_record (struct pcp_reader *r)
{
unsigned int i;
read_variables_record (struct pcp_reader *r)
{
unsigned int i;
+ bool weighted;
if (!pcp_seek (r, r->directory.variables.ofs))
return false;
if (!pcp_seek (r, r->directory.variables.ofs))
return false;
@@
-713,6
+711,7
@@
read_variables_record (struct pcp_reader *r)
r->vars = pool_calloc (r->pool,
r->header.nominal_case_size, sizeof *r->vars);
r->vars = pool_calloc (r->pool,
r->header.nominal_case_size, sizeof *r->vars);
+ weighted = false;
for (i = 0; i < r->header.nominal_case_size; i++)
{
struct pcp_var_record *var = &r->vars[r->n_vars++];
for (i = 0; i < r->header.nominal_case_size; i++)
{
struct pcp_var_record *var = &r->vars[r->n_vars++];
@@
-730,6
+729,10
@@
read_variables_record (struct pcp_reader *r)
|| !read_bytes (r, var->missing, sizeof var->missing))
return false;
|| !read_bytes (r, var->missing, sizeof var->missing))
return false;
+ var->weight = r->header.weight_index && i == r->header.weight_index - 1;
+ if (var->weight)
+ weighted = true;
+
raw_type = format >> 16;
if (!fmt_from_io (raw_type, &var->format.type))
{
raw_type = format >> 16;
if (!fmt_from_io (raw_type, &var->format.type))
{
@@
-741,7
+744,7
@@
read_variables_record (struct pcp_reader *r)
var->format.w = (format >> 8) & 0xff;
var->format.d = format & 0xff;
fmt_fix_output (&var->format);
var->format.w = (format >> 8) & 0xff;
var->format.d = format & 0xff;
fmt_fix_output (&var->format);
- var->width = fmt_var_width (
&
var->format);
+ var->width = fmt_var_width (var->format);
if (var_label_ofs)
{
if (var_label_ofs)
{
@@
-768,6
+771,9
@@
read_variables_record (struct pcp_reader *r)
}
}
}
}
+ if (r->header.weight_index && !weighted)
+ pcp_warn (r, -1, _("Invalid weight index %u."), r->header.weight_index);
+
return true;
}
return true;
}
@@
-793,7
+799,7
@@
parse_header (struct pcp_reader *r, const struct pcp_main_header *header,
info->integer_format = INTEGER_LSB_FIRST;
info->float_format = FLOAT_IEEE_DOUBLE_LE;
info->compression = r->compressed ? ANY_COMP_SIMPLE : ANY_COMP_NONE;
info->integer_format = INTEGER_LSB_FIRST;
info->float_format = FLOAT_IEEE_DOUBLE_LE;
info->compression = r->compressed ? ANY_COMP_SIMPLE : ANY_COMP_NONE;
- info->
case_cnt
= r->n_cases;
+ info->
n_cases
= r->n_cases;
/* Convert file label to UTF-8 and put it into DICT. */
label = recode_and_trim_string (r->pool, dict_encoding, header->file_label);
/* Convert file label to UTF-8 and put it into DICT. */
label = recode_and_trim_string (r->pool, dict_encoding, header->file_label);
@@
-822,38
+828,45
@@
parse_variable_records (struct pcp_reader *r, struct dictionary *dict,
for (rec = var_recs; rec < &var_recs[n_var_recs]; rec++)
{
for (rec = var_recs; rec < &var_recs[n_var_recs]; rec++)
{
- struct variable *var;
- bool weight;
char *name;
size_t i;
name = recode_string_pool ("UTF-8", dict_encoding,
rec->name, -1, r->pool);
name[strcspn (name, " ")] = '\0';
char *name;
size_t i;
name = recode_string_pool ("UTF-8", dict_encoding,
rec->name, -1, r->pool);
name[strcspn (name, " ")] = '\0';
- weight = !strcmp (name, "$WEIGHT") && rec->width == 0;
- /* Transform $DATE => DATE_, $WEIGHT => WEIGHT_, $CASENUM => CASENUM_. */
- if (name[0] == '$')
- name = pool_asprintf (r->pool, "%s_", name + 1);
+ /* Drop system variables. */
+ rec->drop = name[0] == '$';
+ if (rec->drop)
+ {
+ value_init_pool (r->pool, &rec->tmp, rec->width);
+ continue;
+ }
- if (!dict_id_is_valid (dict, name
, false
) || name[0] == '#')
+ if (!dict_id_is_valid (dict, name) || name[0] == '#')
{
pcp_error (r, rec->pos, _("Invalid variable name `%s'."), name);
return false;
}
{
pcp_error (r, rec->pos, _("Invalid variable name `%s'."), name);
return false;
}
-
var = rec->
var = dict_create_var (dict, name, rec->width);
+
struct variable *
var = dict_create_var (dict, name, rec->width);
if (var == NULL)
{
char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
pcp_warn (r, rec->pos, _("Renaming variable with duplicate name "
"`%s' to `%s'."),
name, new_name);
if (var == NULL)
{
char *new_name = dict_make_unique_var_name (dict, NULL, NULL);
pcp_warn (r, rec->pos, _("Renaming variable with duplicate name "
"`%s' to `%s'."),
name, new_name);
- var =
rec->var =
dict_create_var_assert (dict, new_name, rec->width);
+ var = dict_create_var_assert (dict, new_name, rec->width);
free (new_name);
}
free (new_name);
}
- if (weight)
- dict_set_weight (dict, var);
+ if (rec->weight)
+ {
+ if (!rec->width)
+ dict_set_weight (dict, var);
+ else
+ pcp_warn (r, rec->pos,
+ _("Cannot weight by string variable `%s'."), name);
+ }
/* Set the short name the same as the long name. */
var_set_short_name (var, 0, name);
/* Set the short name the same as the long name. */
var_set_short_name (var, 0, name);
@@
-878,8
+891,7
@@
parse_variable_records (struct pcp_reader *r, struct dictionary *dict,
if (var_is_numeric (var))
value.f = parse_float (rec->val_labs[i].value);
else
if (var_is_numeric (var))
value.f = parse_float (rec->val_labs[i].value);
else
- memcpy (value_str_rw (&value, rec->width),
- rec->val_labs[i].value, rec->width);
+ memcpy (value.s, rec->val_labs[i].value, rec->width);
utf8_label = recode_string ("UTF-8", dict_encoding,
rec->val_labs[i].label, -1);
utf8_label = recode_string ("UTF-8", dict_encoding,
rec->val_labs[i].label, -1);
@@
-904,7
+916,7
@@
parse_variable_records (struct pcp_reader *r, struct dictionary *dict,
}
/* Set formats. */
}
/* Set formats. */
- var_set_both_formats (var,
&
rec->format);
+ var_set_both_formats (var, rec->format);
}
return true;
}
return true;
@@
-937,16
+949,16
@@
pcp_file_casereader_read (struct casereader *reader, void *r_)
r->n_cases--;
c = case_create (r->proto);
r->n_cases--;
c = case_create (r->proto);
+ size_t case_idx = 0;
for (i = 0; i < r->n_vars; i++)
{
struct pcp_var_record *var = &r->vars[i];
for (i = 0; i < r->n_vars; i++)
{
struct pcp_var_record *var = &r->vars[i];
- union value *v =
case_data_rw_idx (c, i
);
+ union value *v =
var->drop ? &var->tmp : case_data_rw_idx (c, case_idx++
);
if (var->width == 0)
retval = read_case_number (r, &v->f);
else
if (var->width == 0)
retval = read_case_number (r, &v->f);
else
- retval = read_case_string (r, value_str_rw (v, var->width),
- var->width);
+ retval = read_case_string (r, v->s, var->width);
if (retval != 1)
{
if (retval != 1)
{
@@
-1130,9
+1142,7
@@
static void
pcp_msg (struct pcp_reader *r, off_t offset,
int class, const char *format, va_list args)
{
pcp_msg (struct pcp_reader *r, off_t offset,
int class, const char *format, va_list args)
{
- struct msg m;
struct string text;
struct string text;
-
ds_init_empty (&text);
if (offset >= 0)
ds_put_format (&text, _("`%s' near offset 0x%llx: "),
ds_init_empty (&text);
if (offset >= 0)
ds_put_format (&text, _("`%s' near offset 0x%llx: "),
@@
-1141,16
+1151,13
@@
pcp_msg (struct pcp_reader *r, off_t offset,
ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
ds_put_vformat (&text, format, args);
ds_put_format (&text, _("`%s': "), fh_get_file_name (r->fh));
ds_put_vformat (&text, format, args);
- m.category = msg_class_to_category (class);
- m.severity = msg_class_to_severity (class);
- m.file_name = NULL;
- m.first_line = 0;
- m.last_line = 0;
- m.first_column = 0;
- m.last_column = 0;
- m.text = ds_cstr (&text);
-
- msg_emit (&m);
+ struct msg *m = xmalloc (sizeof *m);
+ *m = (struct msg) {
+ .category = msg_class_to_category (class),
+ .severity = msg_class_to_severity (class),
+ .text = ds_steal_cstr (&text),
+ };
+ msg_emit (m);
}
/* Displays a warning for offset OFFSET in the file. */
}
/* Displays a warning for offset OFFSET in the file. */
@@
-1186,11
+1193,11
@@
pcp_error (struct pcp_reader *r, off_t offset, const char *format, ...)
an error. */
static inline int
read_bytes_internal (struct pcp_reader *r, bool eof_is_ok,
an error. */
static inline int
read_bytes_internal (struct pcp_reader *r, bool eof_is_ok,
- void *buf, size_t
byte_cnt
)
+ void *buf, size_t
n_bytes
)
{
{
- size_t bytes_read = fread (buf, 1,
byte_cnt
, r->file);
+ size_t bytes_read = fread (buf, 1,
n_bytes
, r->file);
r->pos += bytes_read;
r->pos += bytes_read;
- if (bytes_read ==
byte_cnt
)
+ if (bytes_read ==
n_bytes
)
return 1;
else if (ferror (r->file))
{
return 1;
else if (ferror (r->file))
{
@@
-1210,9
+1217,9
@@
read_bytes_internal (struct pcp_reader *r, bool eof_is_ok,
Returns true if successful.
Returns false upon I/O error or if end-of-file is encountered. */
static bool
Returns true if successful.
Returns false upon I/O error or if end-of-file is encountered. */
static bool
-read_bytes (struct pcp_reader *r, void *buf, size_t
byte_cnt
)
+read_bytes (struct pcp_reader *r, void *buf, size_t
n_bytes
)
{
{
- return read_bytes_internal (r, false, buf,
byte_cnt
) == 1;
+ return read_bytes_internal (r, false, buf,
n_bytes
) == 1;
}
/* Reads BYTE_CNT bytes into BUF.
}
/* Reads BYTE_CNT bytes into BUF.
@@
-1220,9
+1227,9
@@
read_bytes (struct pcp_reader *r, void *buf, size_t byte_cnt)
Returns 0 if an immediate end-of-file is encountered.
Returns -1 if an I/O error or a partial read occurs. */
static int
Returns 0 if an immediate end-of-file is encountered.
Returns -1 if an I/O error or a partial read occurs. */
static int
-try_read_bytes (struct pcp_reader *r, void *buf, size_t
byte_cnt
)
+try_read_bytes (struct pcp_reader *r, void *buf, size_t
n_bytes
)
{
{
- return read_bytes_internal (r, true, buf,
byte_cnt
);
+ return read_bytes_internal (r, true, buf,
n_bytes
);
}
/* Reads a 16-bit signed integer from R and stores its value in host format in
}
/* Reads a 16-bit signed integer from R and stores its value in host format in