From 9254d30d06a0565c89daccedd93a94c4c6086004 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Sun, 5 Jul 2009 20:45:12 +0800 Subject: [PATCH] Change enum legacy_encoding to const char *. Preparation for i18n of union values. Remove the legacy_encoding enum and substitute it with a const char *. This makes it easier to integrate recoding of union values in the data parsing stage. --- src/data/data-in.c | 8 ++++---- src/data/data-in.h | 2 +- src/data/data-out.c | 4 ++-- src/data/data-out.h | 2 +- src/data/file-handle-def.c | 4 ++-- src/data/file-handle-def.h | 4 ++-- src/language/data-io/data-parser.c | 6 +++--- src/language/data-io/data-reader.c | 2 +- src/language/data-io/data-reader.h | 3 +-- src/language/data-io/data-writer.c | 2 +- src/language/data-io/data-writer.h | 3 +-- src/language/data-io/file-handle.q | 2 +- src/language/data-io/print.c | 4 ++-- src/libpspp/legacy-encoding.c | 12 ++++++------ src/libpspp/legacy-encoding.h | 25 +++++++++---------------- 15 files changed, 37 insertions(+), 46 deletions(-) diff --git a/src/data/data-in.c b/src/data/data-in.c index eda6d125..e7a83f25 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -53,7 +53,7 @@ /* Information about parsing one data field. */ struct data_in { - enum legacy_encoding encoding;/* Encoding of source. */ + const char *encoding; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ int implied_decimals; /* Number of implied decimal places. */ @@ -100,7 +100,7 @@ static int hexit_value (int c); FIRST_COLUMN plus the length of the input because of the possibility of escaped quotes in strings, etc.) */ bool -data_in (struct substring input, enum legacy_encoding encoding, +data_in (struct substring input, const char *encoding, enum fmt_type format, int implied_decimals, int first_column, int last_column, union value *output, int width) { @@ -116,7 +116,7 @@ data_in (struct substring input, enum legacy_encoding encoding, assert ((width != 0) == fmt_is_string (format)); - if (encoding == LEGACY_NATIVE + if (0 == strcmp (encoding, LEGACY_NATIVE) || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)) { i.input = input; @@ -639,7 +639,7 @@ parse_AHEX (struct data_in *i) return false; } - if (i->encoding != LEGACY_NATIVE) + if (0 != strcmp (i->encoding, LEGACY_NATIVE)) { hi = legacy_to_native (i->encoding, hi); lo = legacy_to_native (i->encoding, lo); diff --git a/src/data/data-in.h b/src/data/data-in.h index 3a8d67cc..5256bb91 100644 --- a/src/data/data-in.h +++ b/src/data/data-in.h @@ -26,7 +26,7 @@ enum fmt_type; union value; -bool data_in (struct substring input, enum legacy_encoding, +bool data_in (struct substring input, const char *encoding, enum fmt_type, int implied_decimals, int first_column, int last_column, union value *output, int width); diff --git a/src/data/data-out.c b/src/data/data-out.c index e7800a8f..fa8d59e7 100644 --- a/src/data/data-out.c +++ b/src/data/data-out.c @@ -86,7 +86,7 @@ static void output_hex (const void *, size_t bytes, char *); /* Same as data_out, and additionally recodes the output from native form into the given legacy character ENCODING. */ void -data_out_legacy (const union value *input, enum legacy_encoding encoding, +data_out_legacy (const union value *input, const char *encoding, const struct fmt_spec *format, char *output) { static data_out_converter_func *const converters[FMT_NUMBER_OF_FORMATS] = @@ -98,7 +98,7 @@ data_out_legacy (const union value *input, enum legacy_encoding encoding, assert (fmt_check_output (format)); converters[format->type] (input, format, output); - if (encoding != LEGACY_NATIVE + if (0 != strcmp (encoding, LEGACY_NATIVE) && fmt_get_category (format->type) != FMT_CAT_BINARY) legacy_recode (LEGACY_NATIVE, output, encoding, output, format->w); } diff --git a/src/data/data-out.h b/src/data/data-out.h index f9f70da9..7972f6a9 100644 --- a/src/data/data-out.h +++ b/src/data/data-out.h @@ -27,7 +27,7 @@ union value; void data_out (const union value *, const struct fmt_spec *, char *); -void data_out_legacy (const union value *, enum legacy_encoding, +void data_out_legacy (const union value *, const char *encoding, const struct fmt_spec *, char *); #endif /* data-out.h */ diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c index 0652501f..6ed3f8f9 100644 --- a/src/data/file-handle-def.c +++ b/src/data/file-handle-def.c @@ -49,7 +49,7 @@ struct file_handle /* FH_REF_FILE only. */ char *file_name; /* File name as provided by user. */ enum fh_mode mode; /* File mode. */ - enum legacy_encoding encoding;/* File encoding. */ + const char *encoding; /* File encoding. */ /* FH_REF_FILE and FH_REF_INLINE only. */ size_t record_width; /* Length of fixed-format records. */ @@ -325,7 +325,7 @@ fh_get_tab_width (const struct file_handle *handle) } /* Returns the encoding of characters read from HANDLE. */ -enum legacy_encoding +const char * fh_get_legacy_encoding (const struct file_handle *handle) { assert (handle->referent & (FH_REF_FILE | FH_REF_INLINE)); diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h index 73e118cd..b4a6d610 100644 --- a/src/data/file-handle-def.h +++ b/src/data/file-handle-def.h @@ -54,7 +54,7 @@ struct fh_properties enum fh_mode mode; /* File mode. */ size_t record_width; /* Length of fixed-format records. */ size_t tab_width; /* Tab width, 0=do not expand tabs. */ - enum legacy_encoding encoding;/* ASCII or EBCDIC? */ + const char *encoding; /* ASCII or EBCDIC? */ }; void fh_init (void); @@ -89,7 +89,7 @@ enum fh_mode fh_get_mode (const struct file_handle *) ; /* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */ size_t fh_get_record_width (const struct file_handle *); size_t fh_get_tab_width (const struct file_handle *); -enum legacy_encoding fh_get_legacy_encoding (const struct file_handle *); +const char *fh_get_legacy_encoding (const struct file_handle *); /* Properties of FH_REF_SCRATCH file handles. */ struct scratch_handle *fh_get_scratch_handle (const struct file_handle *); diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index 2f503423..eab32868 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -505,7 +505,7 @@ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct field *f; int row; @@ -547,7 +547,7 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -588,7 +588,7 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; struct field *f; diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index 24ddcf13..6f620a6a 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -597,7 +597,7 @@ dfm_expand_tabs (struct dfm_reader *r) } /* Returns the legacy character encoding of data read from READER. */ -enum legacy_encoding +const char * dfm_reader_get_legacy_encoding (const struct dfm_reader *reader) { return fh_get_legacy_encoding (reader->fh); diff --git a/src/language/data-io/data-reader.h b/src/language/data-io/data-reader.h index c7fee613..308701c2 100644 --- a/src/language/data-io/data-reader.h +++ b/src/language/data-io/data-reader.h @@ -38,8 +38,7 @@ bool dfm_reader_error (const struct dfm_reader *); unsigned dfm_eof (struct dfm_reader *); struct substring dfm_get_record (struct dfm_reader *); void dfm_expand_tabs (struct dfm_reader *); -enum legacy_encoding dfm_reader_get_legacy_encoding ( - const struct dfm_reader *); +const char *dfm_reader_get_legacy_encoding (const struct dfm_reader *); int dfm_get_percent_read (const struct dfm_reader *); /* Line control. */ diff --git a/src/language/data-io/data-writer.c b/src/language/data-io/data-writer.c index b5df59d4..85b11d4c 100644 --- a/src/language/data-io/data-writer.c +++ b/src/language/data-io/data-writer.c @@ -200,7 +200,7 @@ dfm_close_writer (struct dfm_writer *w) } /* Returns the legacy character encoding of data written to WRITER. */ -enum legacy_encoding +const char * dfm_writer_get_legacy_encoding (const struct dfm_writer *writer) { return fh_get_legacy_encoding (writer->fh); diff --git a/src/language/data-io/data-writer.h b/src/language/data-io/data-writer.h index 2142f215..045db316 100644 --- a/src/language/data-io/data-writer.h +++ b/src/language/data-io/data-writer.h @@ -27,7 +27,6 @@ struct dfm_writer *dfm_open_writer (struct file_handle *); bool dfm_close_writer (struct dfm_writer *); bool dfm_write_error (const struct dfm_writer *); bool dfm_put_record (struct dfm_writer *, const char *rec, size_t len); -enum legacy_encoding dfm_writer_get_legacy_encoding ( - const struct dfm_writer *); +const char *dfm_writer_get_legacy_encoding (const struct dfm_writer *); #endif /* data-writer.h */ diff --git a/src/language/data-io/file-handle.q b/src/language/data-io/file-handle.q index 827dbab4..60eeee7b 100644 --- a/src/language/data-io/file-handle.q +++ b/src/language/data-io/file-handle.q @@ -102,7 +102,7 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds) properties.mode = FH_MODE_VARIABLE; break; case FH_360: - properties.encoding = LEGACY_EBCDIC; + properties.encoding = "PSPP-LEGACY-EBCDIC"; if (cmd.recform == FH_FIXED || cmd.recform == FH_F) properties.mode = FH_MODE_FIXED; else if (cmd.recform == FH_VARIABLE || cmd.recform == FH_V) diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index aae8b38a..eac5567a 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -83,7 +83,7 @@ struct print_trns struct pool *pool; /* Stores related data. */ bool eject; /* Eject page before printing? */ bool include_prefix; /* Prefix lines with space? */ - enum legacy_encoding encoding; /* Encoding to use for output. */ + const char *encoding; /* Encoding to use for output. */ struct dfm_writer *writer; /* Output file, NULL=listing file. */ struct ll_list specs; /* List of struct prt_out_specs. */ size_t record_cnt; /* Number of records to write. */ @@ -479,7 +479,7 @@ print_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) else { ds_put_substring (&trns->line, ds_ss (&spec->string)); - if (trns->encoding != LEGACY_NATIVE) + if (0 != strcmp (trns->encoding, LEGACY_NATIVE)) { size_t length = ds_length (&spec->string); char *data = ss_data (ds_tail (&trns->line, length)); diff --git a/src/libpspp/legacy-encoding.c b/src/libpspp/legacy-encoding.c index 45f0195f..aaa1fb73 100644 --- a/src/libpspp/legacy-encoding.c +++ b/src/libpspp/legacy-encoding.c @@ -24,16 +24,16 @@ static const char ascii_to_ebcdic[256]; static const char ebcdic_to_ascii[256]; void -legacy_recode (enum legacy_encoding from, const char *src, - enum legacy_encoding to, char *dst, +legacy_recode (const char *from, const char *src, + const char *to, char *dst, size_t size) { - if (from != to) + if (0 != strcmp (from, to)) { const char *table; size_t i; - table = from == LEGACY_ASCII ? ascii_to_ebcdic : ebcdic_to_ascii; + table = (0 == strcmp (from, "PSPP-LEGACY-ASCII")) ? ascii_to_ebcdic : ebcdic_to_ascii; for (i = 0; i < size; i++) dst[i] = table[(unsigned char) src[i]]; } @@ -45,14 +45,14 @@ legacy_recode (enum legacy_encoding from, const char *src, } char -legacy_to_native (enum legacy_encoding from, char c) +legacy_to_native (const char *from, char c) { legacy_recode (from, &c, LEGACY_NATIVE, &c, 1); return c; } char -legacy_from_native (enum legacy_encoding to, char c) +legacy_from_native (const char *to, char c) { legacy_recode (LEGACY_NATIVE, &c, to, &c, 1); return c; diff --git a/src/libpspp/legacy-encoding.h b/src/libpspp/legacy-encoding.h index 12afe42b..ec889efb 100644 --- a/src/libpspp/legacy-encoding.h +++ b/src/libpspp/legacy-encoding.h @@ -20,27 +20,20 @@ #include #include -/* A legacy character encoding. - This exists only to handle the specific legacy EBCDIC-to-ASCII - recoding that MODE=360 file handles perform. */ -enum legacy_encoding - { - LEGACY_ASCII, /* ASCII or similar character set. */ - LEGACY_EBCDIC, /* IBM EBCDIC character set. */ - - /* Native character set. */ + #if 'A' == 0x41 - LEGACY_NATIVE = LEGACY_ASCII +#define LEGACY_NATIVE "PSPP-LEGACY-ASCII" #elif 'A' == 0xc1 - LEGACY_NATIVE = LEGACY_EBCDIC +#define LEGACY_NATIVE "PSPP-LEGACY-EBCDIC" #else #error Cannot detect native character set. #endif - }; -void legacy_recode (enum legacy_encoding, const char *src, - enum legacy_encoding, char *dst, size_t); -char legacy_to_native (enum legacy_encoding from, char) PURE_FUNCTION; -char legacy_from_native (enum legacy_encoding to, char) PURE_FUNCTION; + + +void legacy_recode (const char *from, const char *src, + const char *to, char *dst, size_t); +char legacy_to_native (const char *from, char) PURE_FUNCTION; +char legacy_from_native (const char *to, char) PURE_FUNCTION; #endif /* libpspp/legacy-encoding.h */ -- 2.30.2