@table @code
@item char rec_type[4];
-Record type code, set to @samp{$FL2}.
+Record type code, set to @samp{$FL2}, that is, either @code{24 46 4c
+32} if the file uses an ASCII-based character encoding, or @code{5b c6
+d3 f2} if the file uses an EBCDIC-based character encoding.
@item char prod_name[60];
Product identification string. This always begins with the characters
been actually observed in system files:
@table @asis
+@item 1
+EBCDIC.
+
@item 2
7-bit ASCII.
The following additional values are known to be defined:
@table @asis
-@item 1
-EBCDIC.
-
@item 3
8-bit ``ASCII''.
Other Windows code page numbers are known to be generally valid.
-Old versions of SPSS always wrote value 2 in this field, regardless of
-the encoding in use. Newer versions also write the character encoding
-as a string (see @ref{Character Encoding Record}).
+Old versions of SPSS for Unix and Windows always wrote value 2 in this
+field, regardless of the encoding in use. Newer versions also write
+the character encoding as a string (see @ref{Character Encoding
+Record}).
@end table
@node Machine Floating-Point Info Record
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2006-2007, 2009-2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
struct dictionary;
+/* Magic numbers.
+
+ Both of these are actually $FL2 in the respective character set. The "FL2"
+ part is invariant among national variants of each character set, but "$" has
+ different encodings, so it is safer to write them as hexadecimal. */
+#define ASCII_MAGIC "\x24\x46\x4c\x32"
+#define EBCDIC_MAGIC "\x5b\xc6\xd3\xf2"
+
/* A variable in a system file. */
struct sfm_var
{
/* Fields from the top-level header record. */
struct sfm_header_record
{
+ char magic[5]; /* First 4 bytes of file, then null. */
int weight_idx; /* 0 if unweighted, otherwise a var index. */
int nominal_case_size; /* Number of var positions. */
static const char *choose_encoding (
struct sfm_reader *,
+ const struct sfm_header_record *,
const struct sfm_extension_record *ext_integer,
const struct sfm_extension_record *ext_encoding);
First, figure out the correct character encoding, because this determines
how the rest of the header data is to be interpreted. */
- dict = dict_create (choose_encoding (r, extensions[EXT_INTEGER],
+ dict = dict_create (choose_encoding (r, &header, extensions[EXT_INTEGER],
extensions[EXT_ENCODING]));
r->encoding = dict_get_encoding (dict);
bool
sfm_detect (FILE *file)
{
- char rec_type[5];
+ char magic[5];
- if (fread (rec_type, 4, 1, file) != 1)
+ if (fread (magic, 4, 1, file) != 1)
return false;
- rec_type[4] = '\0';
+ magic[4] = '\0';
- return !strcmp ("$FL2", rec_type);
+ return !strcmp (ASCII_MAGIC, magic) || !strcmp (EBCDIC_MAGIC, magic);
}
\f
/* Reads the global header of the system file. Initializes *HEADER and *INFO,
read_header (struct sfm_reader *r, struct sfm_read_info *info,
struct sfm_header_record *header)
{
- char rec_type[5];
uint8_t raw_layout_code[4];
uint8_t raw_bias[8];
- read_string (r, rec_type, sizeof rec_type);
+ read_string (r, header->magic, sizeof header->magic);
read_string (r, header->eye_catcher, sizeof header->eye_catcher);
- if (strcmp ("$FL2", rec_type) != 0)
+ if (strcmp (ASCII_MAGIC, header->magic)
+ && strcmp (EBCDIC_MAGIC, header->magic))
sys_error (r, 0, _("This is not an SPSS system file."));
/* Identify integer format. */
static const char *
choose_encoding (struct sfm_reader *r,
+ const struct sfm_header_record *header,
const struct sfm_extension_record *ext_integer,
const struct sfm_extension_record *ext_encoding)
{
}
}
+ /* If the file magic number is EBCDIC then its character data is too. */
+ if (!strcmp (header->magic, EBCDIC_MAGIC))
+ return "EBCDIC-US";
+
return locale_charset ();
}
/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2007, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 1997-2000, 2006-2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
static void
write_header (struct sfm_writer *w, const struct dictionary *d)
{
+ const char *dict_encoding = dict_get_encoding (d);
char prod_name[61];
char creation_date[10];
char creation_time[9];
time_t t;
/* Record-type code. */
- write_string (w, "$FL2", 4);
+ if (is_encoding_ebcdic_compatible (dict_encoding))
+ write_string (w, EBCDIC_MAGIC, 4);
+ else
+ write_string (w, ASCII_MAGIC, 4);
/* Product identification. */
snprintf (prod_name, sizeof prod_name, "@(#) SPSS DATA FILE %s - %s",
write_integer_info_record (struct sfm_writer *w,
const struct dictionary *d)
{
+ const char *dict_encoding = dict_get_encoding (d);
int version_component[3];
int float_format;
int codepage;
abort ();
/* Choose codepage. */
- codepage = sys_get_codepage_from_encoding (dict_get_encoding (d));
+ codepage = sys_get_codepage_from_encoding (dict_encoding);
if (codepage == 0)
{
- /* Default to "7-bit ASCII" if the codepage number is unknown, because
+ /* The codepage is unknown. Choose a default.
+
+ For an EBCDIC-compatible encoding, use the value for EBCDIC.
+
+ For an ASCII-compatible encoding, default to "7-bit ASCII", because
many files use this codepage number regardless of their actual
- encoding. */
- codepage = 2;
+ encoding.
+ */
+ if (is_encoding_ascii_compatible (dict_encoding))
+ codepage = 2;
+ else if (is_encoding_ebcdic_compatible (dict_encoding))
+ codepage = 1;
}
/* Write record. */
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2009, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
e->is_ascii_compatible = ss_equals (in, out);
ss_dealloc (&out);
+ if (!e->is_ascii_compatible && e->unit == 1)
+ {
+ out = recode_substring_pool ("UTF-8", name, ss_cstr ("A"), NULL);
+ e->is_ebcdic_compatible = (out.length == 1
+ && (uint8_t) out.string[0] == 0xc1);
+ ss_dealloc (&out);
+ }
+ else
+ e->is_ebcdic_compatible = false;
+
return ok;
}
return e.is_ascii_compatible;
}
+bool
+is_encoding_ebcdic_compatible (const char *encoding)
+{
+ struct encoding_info e;
+
+ get_encoding_info (&e, encoding);
+ return e.is_ebcdic_compatible;
+}
+
/* Returns true if iconv can convert ENCODING to and from UTF-8,
otherwise false. */
bool
/* PSPP - a program for statistical analysis.
- Copyright (C) 2006, 2010, 2011 Free Software Foundation, Inc.
+ Copyright (C) 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
used in ASCII text files has the same value in this encoding. */
bool is_ascii_compatible;
+ /* True if this encoding has a unit width of 1 byte and appears to be
+ EBCDIC-based. */
+ bool is_ebcdic_compatible;
+
/* Character information. */
int unit; /* Unit width, in bytes. */
char cr[MAX_UNIT]; /* \r in encoding, 'unit' bytes long. */
bool get_encoding_info (struct encoding_info *, const char *name);
bool is_encoding_ascii_compatible (const char *encoding);
+bool is_encoding_ebcdic_compatible (const char *encoding);
bool is_encoding_supported (const char *encoding);
#endif /* i18n.h */