* Variable Display Parameter Record::
* Long Variable Names Record::
* Very Long String Record::
+* Character Encoding Record::
* Data File and Variable Attributes Records::
* Miscellaneous Informational Records::
* Dictionary Termination Record::
Machine endianness. 1 indicates big-endian, 2 indicates little-endian.
@item int32 character_code;
+@anchor{character-code}
Character code. 1 indicates EBCDIC, 2 indicates 7-bit ASCII, 3
indicates 8-bit ASCII, 4 indicates DEC Kanji.
Windows code page numbers are also valid.
+
+Experience has shown that in many files, this field is ignored or incorrect.
+For a more reliable indication of the file's character encoding
+see @ref{Character Encoding Record}.
@end table
@node Machine Floating-Point Info Record
The total length is @code{count} bytes.
@end table
+@node Character Encoding Record
+@section Character Encoding Record
+
+This record, if present, indicates the character encoding for string data,
+long variable names, variable labels, value labels and other strings in the
+file.
+
+@example
+/* @r{Header.} */
+int32 rec_type;
+int32 subtype;
+int32 size;
+int32 count;
+
+/* @r{Exactly @code{count} bytes of data.} */
+char encoding[];
+@end example
+
+@table @code
+@item int32 rec_type;
+Record type. Always set to 7.
+
+@item int32 subtype;
+Record subtype. Always set to 20.
+
+@item int32 size;
+The size of each element in the @code{encoding} member. Always set to 1.
+
+@item int32 count;
+The total number of bytes in @code{encoding}.
+
+@item char encoding[];
+The name of the character encoding. Normally this will be an official IANA characterset name or alias.
+See @url{http://www.iana.org/assignments/character-sets}.
+@end table
+
+This record is not present in files generated by older software.
+See also @ref{character-code}.
+
+
@node Data File and Variable Attributes Records
@section Data File and Variable Attributes Records
/* Create the dictionary and populate it */
*dict = r->dict = dict_create ();
+ {
+ const int enc = PQclientEncoding (r->conn);
+
+ /* According to section 22.2 of the Postgresql manual
+ a value of zero (SQL_ASCII) indicates
+ "a declaration of ignorance about the encoding".
+ Accordingly, we don't set the dictionary's encoding
+ if we find this value.
+ */
+ if ( enc != 0 )
+ dict_set_encoding (r->dict, pg_encoding_to_char (enc));
+ }
+
/*
select count (*) from (select * from medium) stupid_sql_standard;
*/
-
ds_init_cstr (&query,
"BEGIN READ ONLY ISOLATION LEVEL SERIALIZABLE; "
"DECLARE pspp BINARY CURSOR FOR ");
size_t size, size_t count);
static void read_variable_attributes (struct sfm_reader *r,
size_t size, size_t count);
+static void read_character_encoding (struct sfm_reader *r,
+ size_t size, size_t count);
+
static struct text_record *open_text_record (
struct sfm_reader *, size_t size);
read_variable_attributes (r, size, count);
return;
+ case 20:
+ read_character_encoding (r, size, count);
+ return;
+
default:
sys_warn (r, _("Unrecognized record type 7, subtype %d."), subtype);
break;
close_text_record (text);
}
+static void
+read_character_encoding (struct sfm_reader *r, size_t size, size_t count)
+{
+ const unsigned long int posn = ftell (r->file);
+ char *encoding = calloc (size, count + 1);
+ read_string (r, encoding, count + 1);
+
+ printf ("%08lx: Character Encoding: %s\n", posn, encoding);
+}
+
+
static void
read_variable_attributes (struct sfm_reader *r, size_t size, size_t count)
{