size_t n_lines;
};
+struct sfm_mrset
+ {
+ const char *name; /* Name. */
+ const char *label; /* Human-readable label for group. */
+ enum mrset_type type; /* Group type. */
+ const char **vars; /* Constituent variables' names. */
+ size_t n_vars; /* Number of constituent variables. */
+
+ /* MRSET_MD only. */
+ enum mrset_md_cat_source cat_source; /* Source of category labels. */
+ bool label_from_var_label; /* 'label' taken from variable label? */
+ const char *counted; /* Counted value, as string. */
+ };
+
struct sfm_extension_record
{
int subtype; /* Record subtype. */
struct sfm_value_label_record *labels;
size_t n_labels;
struct sfm_document_record *document;
+ struct sfm_mrset *mrsets;
+ size_t n_mrsets;
struct sfm_extension_record *extensions[32];
/* File state. */
struct sfm_read_info *);
static void parse_mrsets (struct sfm_reader *,
const struct sfm_extension_record *,
- struct dictionary *);
+ size_t *allocated_mrsets);
+static void decode_mrsets (struct sfm_reader *, struct dictionary *);
static void parse_long_var_name_map (struct sfm_reader *,
const struct sfm_extension_record *,
struct dictionary *);
struct sfm_reader *
sfm_open (struct file_handle *fh)
{
+ size_t allocated_mrsets = 0;
struct sfm_reader *r;
/* Create and initialize reader. */
if (!read_dictionary (r))
goto error;
+ if (r->extensions[EXT_MRSETS] != NULL)
+ parse_mrsets (r, r->extensions[EXT_MRSETS], &allocated_mrsets);
+
+ if (r->extensions[EXT_MRSETS2] != NULL)
+ parse_mrsets (r, r->extensions[EXT_MRSETS2], &allocated_mrsets);
+
return r;
error:
sfm_close (r);
/* The following records use short names, so they need to be parsed before
parse_long_var_name_map() changes short names to long names. */
- if (r->extensions[EXT_MRSETS] != NULL)
- parse_mrsets (r, r->extensions[EXT_MRSETS], dict);
-
- if (r->extensions[EXT_MRSETS2] != NULL)
- parse_mrsets (r, r->extensions[EXT_MRSETS2], dict);
+ decode_mrsets (r, dict);
if (r->extensions[EXT_LONG_STRINGS] != NULL
&& !parse_long_string_map (r, r->extensions[EXT_LONG_STRINGS], dict))
/* Parses record type 7, subtype 7 or 19. */
static void
parse_mrsets (struct sfm_reader *r, const struct sfm_extension_record *record,
- struct dictionary *dict)
+ size_t *allocated_mrsets)
{
struct text_record *text;
- struct mrset *mrset;
text = open_text_record (r, record, false);
for (;;)
{
- const char *counted = NULL;
- const char *name;
- const char *label;
- struct stringi_set var_names;
+ struct sfm_mrset *mrset;
size_t allocated_vars;
char delimiter;
- int width;
/* Skip extra line feeds if present. */
while (text_match (text, '\n'))
continue;
- mrset = xzalloc (sizeof *mrset);
+ if (r->n_mrsets >= *allocated_mrsets)
+ r->mrsets = pool_2nrealloc (r->pool, r->mrsets, allocated_mrsets,
+ sizeof *r->mrsets);
+ mrset = &r->mrsets[r->n_mrsets];
+ memset(mrset, 0, sizeof *mrset);
- name = text_get_token (text, ss_cstr ("="), NULL);
- if (name == NULL)
+ mrset->name = text_get_token (text, ss_cstr ("="), NULL);
+ if (mrset->name == NULL)
break;
- mrset->name = recode_string ("UTF-8", r->encoding, name, -1);
-
- if (mrset->name[0] != '$')
- {
- sys_warn (r, record->pos,
- _("`%s' does not begin with `$' at offset %zu "
- "in MRSETS record."), mrset->name, text_pos (text));
- break;
- }
if (text_match (text, 'C'))
{
mrset->label_from_var_label = true;
else if (strcmp (number, "1"))
sys_warn (r, record->pos,
- _("Unexpected label source value `%s' following `E' "
+ _("Unexpected label source value following `E' "
"at offset %zu in MRSETS record."),
- number, text_pos (text));
+ text_pos (text));
}
else
{
if (mrset->type == MRSET_MD)
{
- counted = text_parse_counted_string (r, text);
- if (counted == NULL)
+ mrset->counted = text_parse_counted_string (r, text);
+ if (mrset->counted == NULL)
break;
}
- label = text_parse_counted_string (r, text);
- if (label == NULL)
+ mrset->label = text_parse_counted_string (r, text);
+ if (mrset->label == NULL)
break;
- if (label[0] != '\0')
- mrset->label = recode_string ("UTF-8", r->encoding, label, -1);
- stringi_set_init (&var_names);
allocated_vars = 0;
- width = INT_MAX;
do
{
- const char *raw_var_name;
- struct variable *var;
- char *var_name;
+ const char *var;
- raw_var_name = text_get_token (text, ss_cstr (" \n"), &delimiter);
- if (raw_var_name == NULL)
+ var = text_get_token (text, ss_cstr (" \n"), &delimiter);
+ if (var == NULL)
{
if (delimiter != '\n')
sys_warn (r, record->pos,
text_pos (text));
break;
}
- var_name = recode_string ("UTF-8", r->encoding, raw_var_name, -1);
+
+ if (mrset->n_vars >= allocated_vars)
+ mrset->vars = pool_2nrealloc (r->pool, mrset->vars,
+ &allocated_vars,
+ sizeof *mrset->vars);
+ mrset->vars[mrset->n_vars++] = var;
+ }
+ while (delimiter != '\n');
+
+ r->n_mrsets++;
+ }
+ close_text_record (r, text);
+}
+
+static void
+decode_mrsets (struct sfm_reader *r, struct dictionary *dict)
+{
+ const struct sfm_mrset *s;
+
+ for (s = r->mrsets; s < &r->mrsets[r->n_mrsets]; s++)
+ {
+ struct stringi_set var_names;
+ struct mrset *mrset;
+ char *name;
+ int width;
+ size_t i;
+
+ name = recode_string ("UTF-8", r->encoding, s->name, -1);
+ if (name[0] != '$')
+ {
+ sys_warn (r, -1, _("Multiple response set name `%s' does not begin "
+ "with `$'."),
+ name);
+ free (name);
+ continue;
+ }
+
+ mrset = xzalloc (sizeof *mrset);
+ mrset->name = name;
+ mrset->type = s->type;
+ mrset->cat_source = s->cat_source;
+ mrset->label_from_var_label = s->label_from_var_label;
+ if (s->label[0] != '\0')
+ mrset->label = recode_string ("UTF-8", r->encoding, s->label, -1);
+
+ stringi_set_init (&var_names);
+ mrset->vars = xmalloc (s->n_vars * sizeof *mrset->vars);
+ width = INT_MAX;
+ for (i = 0; i < s->n_vars; i++)
+ {
+ struct variable *var;
+ char *var_name;
+
+ var_name = recode_string ("UTF-8", r->encoding, s->vars[i], -1);
var = dict_lookup_var (dict, var_name);
if (var == NULL)
}
if (!stringi_set_insert (&var_names, var_name))
{
- sys_warn (r, record->pos,
- _("Duplicate variable name %s "
- "at offset %zu in MRSETS record."),
- var_name, text_pos (text));
+ sys_warn (r, -1,
+ _("MRSET %s contains duplicate variable name %s."),
+ mrset->name, var_name);
free (var_name);
continue;
}
if (mrset->n_vars
&& var_get_type (var) != var_get_type (mrset->vars[0]))
{
- sys_warn (r, record->pos,
+ sys_warn (r, -1,
_("MRSET %s contains both string and "
- "numeric variables."), name);
+ "numeric variables."), mrset->name);
continue;
}
width = MIN (width, var_get_width (var));
- if (mrset->n_vars >= allocated_vars)
- mrset->vars = x2nrealloc (mrset->vars, &allocated_vars,
- sizeof *mrset->vars);
mrset->vars[mrset->n_vars++] = var;
}
- while (delimiter != '\n');
if (mrset->n_vars < 2)
{
- sys_warn (r, record->pos,
- _("MRSET %s has only %zu variables."), mrset->name,
- mrset->n_vars);
+ if (mrset->n_vars == 0)
+ sys_warn (r, -1, _("MRSET %s has no variables."), mrset->name);
+ else
+ sys_warn (r, -1, _("MRSET %s has only one variable."),
+ mrset->name);
mrset_destroy (mrset);
stringi_set_destroy (&var_names);
continue;
mrset->width = width;
value_init (&mrset->counted, width);
if (width == 0)
- mrset->counted.f = c_strtod (counted, NULL);
+ mrset->counted.f = c_strtod (s->counted, NULL);
else
value_copy_str_rpad (&mrset->counted, width,
- (const uint8_t *) counted, ' ');
+ (const uint8_t *) s->counted, ' ');
}
dict_add_mrset (dict, mrset);
- mrset = NULL;
stringi_set_destroy (&var_names);
}
- mrset_destroy (mrset);
- close_text_record (r, text);
}
/* Read record type 7, subtype 11, which specifies how variables
AT_DATA([sys-file.sack], [dnl
dnl File header.
"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file";
-2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3;
+2; dnl Layout code
+16; dnl Nominal case size
+0; dnl Not compressed
+0; dnl Not weighted
+0; dnl No cases.
+100.0; dnl Bias.
+"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file";
+i8 0 *3;
-dnl Numeric variable, no label or missing values.
-2; 0; 0; 0; 0x050800 *2; s8 "NUM1";
+dnl $a
+2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 "";
+2; 0; 0; 0; 0x050800 *2; s8 "B";
+2; 0; 0; 0; 0x050800 *2; s8 "C";
-dnl Multiple response sets.
-7; 7; 1; COUNT("a=C");
-7; 19; 1; COUNT("xyz=D");
+dnl $b
+2; 0; 0; 0; 0x050800 *2; s8 "D";
+2; 0; 0; 0; 0x050800 *2; s8 "E";
+2; 0; 0; 0; 0x050800 *2; s8 "F";
+2; 0; 0; 0; 0x050800 *2; s8 "G";
+
+dnl $c
+2; 4; 0; 0; 0x010400 *2; s8 "H";
+2; 4; 0; 0; 0x010400 *2; s8 "I";
+2; 4; 0; 0; 0x010400 *2; s8 "J";
+
+dnl $d
+2; 0; 0; 0; 0x050800 *2; s8 "K";
+2; 0; 0; 0; 0x050800 *2; s8 "L";
+2; 0; 0; 0; 0x050800 *2; s8 "M";
+
+dnl $e
+2; 6; 0; 0; 0x010600 *2; s8 "N";
+2; 6; 0; 0; 0x010600 *2; s8 "O";
+2; 6; 0; 0; 0x010600 *2; s8 "P";
+
+dnl Machine integer info record.
+7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932;
+
+7; 7; 1;
+COUNT(
+ "$a=C 10 my mcgroup "; i8 0x82; i8 0xa0; " b c"; i8 10;
+ "b=D2 55 0 g e f d"; i8 10;
+ "$c=D4 "; i8 0x82; i8 0xcd; i8 0x82; i8 0xa2; " 10 mdgroup #2 h i j"; i8 10);
+
+7; 19; 1;
+COUNT(
+ "$d=E 1 2 34 13 third mdgroup k l m"; i8 10;
+ "e=E 11 6 choice 0 n o p"; i8 10);
+
+dnl Character encoding record.
+7; 20; 1; 9; "shift_jis";
+dnl Dictionary termination record.
999; 0;
])
for variant in \
- "be 15a9bf44d0cd6186a60629b77079c5a5" \
- "le 161c99aca5e7a3684df096137e72ce5b"
+ "be 8832b331e09557a1ac0bf4e31611428a" \
+ "le 1c1dcd9930864632402b5c635395cb51"
do
set $variant
AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2]
])
AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
+MRSETS /DISPLAY NAME=ALL.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
-warning: `sys-file.sav' near offset 0xd8: `a' does not begin with `$' at offset 2 in MRSETS record.
+warning: `sys-file.sav': Multiple response set name `b' does not begin with `$'.
-warning: `sys-file.sav' near offset 0xeb: `xyz' does not begin with `$' at offset 4 in MRSETS record.
+warning: `sys-file.sav': Multiple response set name `e' does not begin with `$'.
+
+Table: Multiple Response Sets
+Name,Variables,Details
+$a,"あ
+b
+c
+","Multiple category set
+Label: my mcgroup
+"
+$c,"h
+i
+j
+","Multiple dichotomy set
+Label: mdgroup #2
+Label source: Provided by user
+Counted value: `はい'
+Category label source: Variable labels
+"
+$d,"k
+l
+m
+","Multiple dichotomy set
+Label: third mdgroup
+Label source: Provided by user
+Counted value: 34
+Category label source: Value labels of counted value
+"
])
done
AT_CLEANUP
AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
-warning: `sys-file.sav' near offset 0xd8: Unexpected label source value `2' following `E' at offset 7 in MRSETS record.
+warning: `sys-file.sav' near offset 0xd8: Unexpected label source value following `E' at offset 7 in MRSETS record.
warning: `sys-file.sav' near offset 0xd8: Expecting digit at offset 7 in MRSETS record.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
warning: `sys-file.sav' near offset 0xd8: Missing new-line parsing variable names at offset 13 in MRSETS record.
-warning: `sys-file.sav' near offset 0xd8: MRSET $a has only 1 variables.
+warning: `sys-file.sav': MRSET $a has only one variable.
])
done
AT_CLEANUP
AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
-warning: `sys-file.sav' near offset 0xd8: Duplicate variable name NUM1 at offset 18 in MRSETS record.
+warning: `sys-file.sav': MRSET $a contains duplicate variable name NUM1.
-warning: `sys-file.sav' near offset 0xd8: MRSET $a has only 1 variables.
+warning: `sys-file.sav': MRSET $a has only one variable.
])
done
AT_CLEANUP
AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
-warning: `sys-file.sav' near offset 0xf8: MRSET $a contains both string and numeric variables.
+warning: `sys-file.sav': MRSET $a contains both string and numeric variables.
-warning: `sys-file.sav' near offset 0xf8: MRSET $a has only 1 variables.
+warning: `sys-file.sav': MRSET $a has only one variable.
])
done
AT_CLEANUP
AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
-warning: `sys-file.sav' near offset 0xd8: MRSET $a has only 1 variables.
+warning: `sys-file.sav': MRSET $a has only one variable.
])
done
AT_CLEANUP
AT_DATA([sys-file.sps], [GET FILE='sys-file.sav'.
])
AT_CHECK([pspp -O format=csv sys-file.sps], [0], [dnl
-warning: `sys-file.sav' near offset 0xd8: MRSET $a has only 1 variables.
+warning: `sys-file.sav': MRSET $a has only one variable.
-warning: `sys-file.sav' near offset 0xd8: MRSET $b has only 0 variables.
+warning: `sys-file.sav': MRSET $b has no variables.
])
done
AT_CLEANUP