Based on a file provided by Alan Mead.
uint16 one1;
uint16 compressed;
uint16 nominal_case_size;
-uint32 n_cases0;
+uint16 n_cases0;
+uint16 weight_index;
uint16 zero2;
-uint32 n_cases1;
+uint16 n_cases1;
+uint16 zero3;
char creation_date[8];
char creation_time[8];
char label[64];
@item uint32 zero0;
@itemx uint32 zero1;
@itemx uint16 zero2;
+@itemx uint16 zero3;
Always set to 0.
It seems likely that one of these variables is set to 1 if weighting
every 8 bytes after the first 8). String variables in SPSS/PC+ system
files are limited to 255 bytes.
-@item uint32 n_cases0;
-@itemx uint32 n_cases1;
+@item uint16 n_cases0;
+@itemx uint16 n_cases1;
The number of cases in the data record. Both values are the same.
Some files in the corpus contain data for the number of cases noted
here, followed by garbage that somewhat resembles data.
+@item uint16 weight_index;
+0, if the file is unweighted, otherwise a 1-based index into the data
+record of the weighting variable, e.g.@: 4 for the first variable
+after the 3 system-defined variables.
+
@item char creation_date[8];
The date that the file was created, in @samp{mm/dd/yy} format.
Single-digit days and months are not prefixed by zeros. The string is
char creation_date[9]; /* "[m]m/dd/yy". */
char creation_time[9]; /* "[H]H:MM:SS". */
char file_label[65]; /* File label. */
+ unsigned int weight_index; /* Index of weighting variable, 0 if none. */
};
struct pcp_var_record
uint8_t missing[8];
char *label;
+ bool weight;
+
struct pcp_value_label *val_labs;
size_t n_val_labs;
read_main_header (struct pcp_reader *r, struct pcp_main_header *header)
{
unsigned int base_ofs = r->directory.main.ofs;
+ unsigned int zero0, zero1, zero2, zero3;
size_t min_values, min_data_size;
- unsigned int zero0, zero1, zero2;
unsigned int one0, one1;
unsigned int compressed;
unsigned int n_cases1;
|| !read_uint16 (r, &one1)
|| !read_uint16 (r, &compressed)
|| !read_uint16 (r, &header->nominal_case_size)
- || !read_uint32 (r, &r->n_cases)
+ || !read_uint16 (r, &r->n_cases)
+ || !read_uint16 (r, &header->weight_index)
|| !read_uint16 (r, &zero2)
- || !read_uint32 (r, &n_cases1)
+ || !read_uint16 (r, &n_cases1)
+ || !read_uint16 (r, &zero3)
|| !read_string (r, header->creation_date, sizeof header->creation_date)
|| !read_string (r, header->creation_time, sizeof header->creation_time)
|| !read_string (r, header->file_label, sizeof header->file_label))
pcp_warn (r, base_ofs, _("Record 0 specifies unexpected system missing "
"value %g (%a)."), d, d);
}
- if (one0 != 1 || one1 != 1 || zero0 != 0 || zero1 != 0 || zero2 != 0)
+ if (one0 != 1 || one1 != 1
+ || zero0 != 0 || zero1 != 0 || zero2 != 0 || zero3 != 0)
pcp_warn (r, base_ofs, _("Record 0 reserved fields have unexpected values "
- "(%u,%u,%u,%u,%u)."),
- one0, one1, zero0, zero1, zero2);
+ "(%u,%u,%u,%u,%u,%u)."),
+ one0, one1, zero0, zero1, zero2, zero3);
if (n_cases1 != r->n_cases)
pcp_warn (r, base_ofs, _("Record 0 case counts differ (%u versus %u)."),
r->n_cases, n_cases1);
read_variables_record (struct pcp_reader *r)
{
unsigned int i;
+ bool weighted;
if (!pcp_seek (r, r->directory.variables.ofs))
return false;
r->vars = pool_calloc (r->pool,
r->header.nominal_case_size, sizeof *r->vars);
+ weighted = false;
for (i = 0; i < r->header.nominal_case_size; i++)
{
struct pcp_var_record *var = &r->vars[r->n_vars++];
|| !read_bytes (r, var->missing, sizeof var->missing))
return false;
+ var->weight = r->header.weight_index && i == r->header.weight_index - 1;
+ if (var->weight)
+ weighted = true;
+
raw_type = format >> 16;
if (!fmt_from_io (raw_type, &var->format.type))
{
}
}
+ if (r->header.weight_index && !weighted)
+ pcp_warn (r, -1, _("Invalid weight index %u."), r->header.weight_index);
+
return true;
}
for (rec = var_recs; rec < &var_recs[n_var_recs]; rec++)
{
struct variable *var;
- bool weight;
char *name;
size_t i;
name = recode_string_pool ("UTF-8", dict_encoding,
rec->name, -1, r->pool);
name[strcspn (name, " ")] = '\0';
- weight = !strcmp (name, "$WEIGHT") && rec->width == 0;
/* Transform $DATE => DATE_, $WEIGHT => WEIGHT_, $CASENUM => CASENUM_. */
if (name[0] == '$')
var = rec->var = dict_create_var_assert (dict, new_name, rec->width);
free (new_name);
}
- if (weight)
- dict_set_weight (dict, var);
+ if (rec->weight)
+ {
+ if (!rec->width)
+ dict_set_weight (dict, var);
+ else
+ pcp_warn (r, rec->pos,
+ _("Cannot weight by string variable `%s'."), name);
+ }
/* Set the short name the same as the long name. */
var_set_short_name (var, 0, name);
1000,.,PQRS,TUVWXYZa,bcdefghijklmnop
])
AT_CLEANUP
+
+AT_SETUP([weighted])
+AT_KEYWORDS([sack synthetic PC+ file negative])
+AT_DATA([pc+-file.sack], [dnl
+dnl File header.
+2; 0;
+@MAIN; @MAIN_END - @MAIN;
+@VARS; @VARS_END - @VARS;
+0; 0;
+@DATA; @DATA_END - @DATA;
+(0; 0) * 11;
+i8 0 * 128;
+
+MAIN:
+ i16 1; dnl Fixed.
+ s62 "PCSPSS PSPP synthetic test product";
+ PCSYSMIS;
+ 0; 0; i16 1; dnl Fixed.
+ i16 0;
+ i16 7;
+ i16 1; i16 6;
+ i16 0; dnl Fixed.
+ i16 1; i16 0;
+ s8 "11/28/14";
+ s8 "15:11:00";
+ s64 "PSPP synthetic test file";
+MAIN_END:
+
+VARS:
+ 0; 0; 0; 0x050800; s8 "$CASENUM"; PCSYSMIS;
+ 0; 0; 0; 0x010800; s8 "$DATE"; PCSYSMIS;
+ 0; 0; 0; 0x050802; s8 "$WEIGHT"; PCSYSMIS;
+
+ dnl Numeric variables.
+ 0; 0; 0; 0x050800; s8 "NUM1"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM2"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM3"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM4"; PCSYSMIS;
+VARS_END:
+
+DATA:
+ 0.0; "11/28/14"; 1.0; 2.0; 3.0; 4.0; 5.0;
+DATA_END:
+])
+AT_CHECK([sack --le pc+-file.sack > pc+-file.sav])
+AT_DATA([pc+-file.sps], [dnl
+SYSFILE INFO FILE='pc+-file.sav' ENCODING='us-ascii'.
+])
+AT_CHECK([pspp -O format=csv pc+-file.sps], [0], [dnl
+File:,pc+-file.sav
+Label:,PSPP synthetic test file
+Created:,11/28/14 15:11:00 by PCSPSS PSPP synthetic test product
+Integer Format:,Little Endian
+Real Format:,IEEE 754 LE.
+Variables:,4
+Cases:,1
+Type:,SPSS/PC+ System File
+Weight:,NUM3
+Compression:,None
+Encoding:,us-ascii
+
+Variable,Description,Position
+NUM1,"Format: F8.0
+Measure: Scale
+Role: Input
+Display Alignment: Right
+Display Width: 8",1
+NUM2,"Format: F8.0
+Measure: Scale
+Role: Input
+Display Alignment: Right
+Display Width: 8",2
+NUM3,"Format: F8.0
+Measure: Scale
+Role: Input
+Display Alignment: Right
+Display Width: 8",3
+NUM4,"Format: F8.0
+Measure: Scale
+Role: Input
+Display Alignment: Right
+Display Width: 8",4
+])
+AT_CLEANUP
\f
AT_BANNER([SPSS/PC+ file reader - negative])
warning: `pc+-file.sav' near offset 0x100: Record 0 specifies unexpected system missing value 1 (0x1p+0).
-"warning: `pc+-file.sav' near offset 0x100: Record 0 reserved fields have unexpected values (1,1,0,2,0)."
+"warning: `pc+-file.sav' near offset 0x100: Record 0 reserved fields have unexpected values (1,1,0,2,0,0)."
warning: `pc+-file.sav' near offset 0x100: Record 0 case counts differ (1 versus 3).
])
1000,.,PQRS,TUVWXYZa,bcdefghijklmnop
])
AT_CLEANUP
+
+AT_SETUP([invalid weight index])
+AT_KEYWORDS([sack synthetic PC+ file negative])
+AT_DATA([pc+-file.sack], [dnl
+dnl File header.
+2; 0;
+@MAIN; @MAIN_END - @MAIN;
+@VARS; @VARS_END - @VARS;
+0; 0;
+@DATA; @DATA_END - @DATA;
+(0; 0) * 11;
+i8 0 * 128;
+
+MAIN:
+ i16 1; dnl Fixed.
+ s62 "PCSPSS PSPP synthetic test product";
+ PCSYSMIS;
+ 0; 0; i16 1; dnl Fixed.
+ i16 0;
+ i16 7;
+ i16 1; i16 10;
+ i16 0; dnl Fixed.
+ i16 1; i16 0;
+ s8 "11/28/14";
+ s8 "15:11:00";
+ s64 "PSPP synthetic test file";
+MAIN_END:
+
+VARS:
+ 0; 0; 0; 0x050800; s8 "$CASENUM"; PCSYSMIS;
+ 0; 0; 0; 0x010800; s8 "$DATE"; PCSYSMIS;
+ 0; 0; 0; 0x050802; s8 "$WEIGHT"; PCSYSMIS;
+
+ dnl Numeric variables.
+ 0; 0; 0; 0x050800; s8 "NUM1"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM2"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM3"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM4"; PCSYSMIS;
+VARS_END:
+
+DATA:
+ 0.0; "11/28/14"; 1.0; 2.0; 3.0; 4.0; 5.0;
+DATA_END:
+])
+AT_CHECK([sack --le pc+-file.sack > pc+-file.sav])
+AT_DATA([pc+-file.sps], [dnl
+GET FILE='pc+-file.sav' ENCODING='us-ascii'.
+])
+AT_CHECK([pspp -O format=csv pc+-file.sps], [0],
+ [warning: `pc+-file.sav': Invalid weight index 10.
+])
+AT_CLEANUP
+
+AT_SETUP([string weight])
+AT_KEYWORDS([sack synthetic PC+ file negative])
+AT_DATA([pc+-file.sack], [dnl
+dnl File header.
+2; 0;
+@MAIN; @MAIN_END - @MAIN;
+@VARS; @VARS_END - @VARS;
+0; 0;
+@DATA; @DATA_END - @DATA;
+(0; 0) * 11;
+i8 0 * 128;
+
+MAIN:
+ i16 1; dnl Fixed.
+ s62 "PCSPSS PSPP synthetic test product";
+ PCSYSMIS;
+ 0; 0; i16 1; dnl Fixed.
+ i16 0;
+ i16 7;
+ i16 1; i16 6;
+ i16 0; dnl Fixed.
+ i16 1; i16 0;
+ s8 "11/28/14";
+ s8 "15:11:00";
+ s64 "PSPP synthetic test file";
+MAIN_END:
+
+VARS:
+ 0; 0; 0; 0x050800; s8 "$CASENUM"; PCSYSMIS;
+ 0; 0; 0; 0x010800; s8 "$DATE"; PCSYSMIS;
+ 0; 0; 0; 0x050802; s8 "$WEIGHT"; PCSYSMIS;
+
+ dnl Variables.
+ 0; 0; 0; 0x050800; s8 "NUM1"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM2"; PCSYSMIS;
+ 0; 0; 0; 0x010800; s8 "STR1"; PCSYSMIS;
+ 0; 0; 0; 0x050800; s8 "NUM4"; PCSYSMIS;
+VARS_END:
+
+DATA:
+ 0.0; "11/28/14"; 1.0; s8 "acbdefgh"; 3.0; 4.0; 5.0;
+DATA_END:
+])
+AT_CHECK([sack --le pc+-file.sack > pc+-file.sav])
+AT_DATA([pc+-file.sps], [dnl
+GET FILE='pc+-file.sav' ENCODING='us-ascii'.
+])
+AT_CHECK([pspp -O format=csv pc+-file.sps], [0],
+ [warning: `pc+-file.sav' near offset 0x250: Cannot weight by string variable `STR1'.
+])
+AT_CLEANUP