From: Ben Pfaff Date: Thu, 16 Jun 2011 06:13:12 +0000 (-0700) Subject: sys-file-reader, sys-file-writer: Use codepage numbers. X-Git-Tag: v0.7.9~278 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c3d6b68809037b48ff51dd693a0c8e75bcc30d7a;p=pspp-builds.git sys-file-reader, sys-file-writer: Use codepage numbers. PSPP has had a library for converting between encoding names and codepage numbers for a while, but the system file reader and writer code has not taken advantage of it. This commit make them use it. --- diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 0c6aa808..00db5b2f 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1167,6 +1167,7 @@ choose_encoding (struct sfm_reader *r, if (ext_integer) { int codepage = parse_int (r, ext_integer->data, 7 * 4); + const char *encoding; switch (codepage) { @@ -1184,14 +1185,11 @@ choose_encoding (struct sfm_reader *r, case 4: return "MS_KANJI"; - case 65000: - return "UTF-7"; - - case 65001: - return "UTF-8"; - default: - return pool_asprintf (r->pool, "CP%d", codepage); + encoding = sys_get_encoding_from_codepage (codepage); + if (encoding != NULL) + return encoding; + break; } } diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 5ebf3ede..0aeb93a0 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -100,7 +100,8 @@ static void write_header (struct sfm_writer *, const struct dictionary *); static void write_variable (struct sfm_writer *, const struct variable *); static void write_value_labels (struct sfm_writer *, struct variable *, int idx); -static void write_integer_info_record (struct sfm_writer *); +static void write_integer_info_record (struct sfm_writer *, + const struct dictionary *); static void write_float_info_record (struct sfm_writer *); static void write_longvar_table (struct sfm_writer *w, @@ -248,7 +249,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, if (dict_get_document_line_cnt (d) > 0) write_documents (w, d); - write_integer_info_record (w); + write_integer_info_record (w, d); write_float_info_record (w); write_mrsets (w, d, true); @@ -885,10 +886,12 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) /* Write integer information record. */ static void -write_integer_info_record (struct sfm_writer *w) +write_integer_info_record (struct sfm_writer *w, + const struct dictionary *d) { int version_component[3]; int float_format; + int codepage; /* Parse the version string. */ memset (version_component, 0, sizeof version_component); @@ -906,6 +909,16 @@ write_integer_info_record (struct sfm_writer *w) else abort (); + /* Choose codepage. */ + codepage = sys_get_codepage_from_encoding (dict_get_encoding (d)); + if (codepage == 0) + { + /* Default to "7-bit ASCII" if the codepage number is unknown, because + many files use this codepage number regardless of their actual + encoding. */ + codepage = 2; + } + /* Write record. */ write_int (w, 7); /* Record type. */ write_int (w, 3); /* Record subtype. */ @@ -918,7 +931,7 @@ write_integer_info_record (struct sfm_writer *w) write_int (w, float_format); write_int (w, 1); /* Compression code. */ write_int (w, INTEGER_NATIVE == INTEGER_MSB_FIRST ? 1 : 2); - write_int (w, 2); /* 7-bit ASCII. */ + write_int (w, codepage); } /* Write floating-point information record. */