From c43455db6f3f1191d969c42b8e679e0fddc44e78 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Wed, 7 Oct 2015 19:06:21 +0200 Subject: [PATCH] Ensure all lex_readers have the encoding parameter --- configure.ac | 2 +- perl-module/PSPP.xs | 4 ++-- perl-module/lib/PSPP.pm.in | 6 ++++-- src/data/file-handle-def.c | 2 +- src/data/file-handle-def.h | 2 +- src/language/control/repeat.c | 5 ++--- src/language/data-io/file-handle.q | 4 ++-- src/language/lexer/lexer.c | 29 +++++++++++++++++++++-------- src/language/lexer/lexer.h | 10 ++++++---- src/ui/gui/executor.c | 2 +- src/ui/gui/psppire-data-window.c | 2 +- utilities/pspp-convert.c | 4 ++-- 12 files changed, 44 insertions(+), 28 deletions(-) diff --git a/configure.ac b/configure.ac index 9d38a8f53d..a819d44b18 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ dnl Process this file with autoconf to produce a configure script. dnl Initialize. AC_PREREQ(2.63) -AC_INIT([GNU PSPP], [0.8.5], [bug-gnu-pspp@gnu.org], [pspp]) +AC_INIT([GNU PSPP], [0.8.6], [bug-gnu-pspp@gnu.org], [pspp]) AC_CONFIG_AUX_DIR([build-aux]) AC_CONFIG_HEADERS([config.h]) AC_CONFIG_TESTDIR([tests]) diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs index 7577b7ad2c..dbdc73a106 100644 --- a/perl-module/PSPP.xs +++ b/perl-module/PSPP.xs @@ -638,7 +638,7 @@ INIT: } CODE: struct file_handle *fh = - fh_create_file (NULL, name, fh_default_properties () ); + fh_create_file (NULL, name, "UTF-8", fh_default_properties () ); struct syswriter_info *swi = xmalloc (sizeof (*swi)); swi->writer = sfm_open_writer (fh, dict->dict, opts); swi->dict = dict; @@ -752,7 +752,7 @@ CODE: struct casereader *reader; struct sysreader_info *sri = NULL; struct file_handle *fh = - fh_create_file (NULL, name, fh_default_properties () ); + fh_create_file (NULL, name, "UTF-8", fh_default_properties () ); struct dictionary *dict; sri = xmalloc (sizeof (*sri)); diff --git a/perl-module/lib/PSPP.pm.in b/perl-module/lib/PSPP.pm.in index 7d077992bd..be4b02ea3c 100644 --- a/perl-module/lib/PSPP.pm.in +++ b/perl-module/lib/PSPP.pm.in @@ -402,7 +402,8 @@ package PSPP::Sysfile; =head3 new ($filename, $dict [,%opts]) Creates a new system file from the dictionary C. The file will -be written to the file called C. +be written to the file called C. The string C must +be encoded in UTF-8. C, if specified, is a hash containing optional parameters for the system file. Currently, the only supported parameter is C. If C is non zero, then the system file written @@ -481,7 +482,8 @@ Opens a system file for reading. Open is used to read data from an existing system file. It creates and returns a PSPP::Reader object which can be used to read -data and dictionary information from C. +data and dictionary information from C. The string C +must be in UTF-8 encoding. =head3 get_case_cnt () diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c index 9c853e5e98..db44cf701c 100644 --- a/src/data/file-handle-def.c +++ b/src/data/file-handle-def.c @@ -227,7 +227,7 @@ fh_inline_file (void) existing file identifiers. The new handle is associated with file FILE_NAME and the given PROPERTIES. */ struct file_handle * -fh_create_file (const char *id, const char *file_name, +fh_create_file (const char *id, const char *file_name, const char *file_name_encoding, const struct fh_properties *properties) { char *handle_name; diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h index bd1fed7c1a..a57d3d72e5 100644 --- a/src/data/file-handle-def.h +++ b/src/data/file-handle-def.h @@ -75,7 +75,7 @@ void fh_done (void); /* Creating file handles. */ struct file_handle *fh_create_file (const char *handle_name, - const char *file_name, + const char *file_name, const char *file_name_encoding, const struct fh_properties *); struct file_handle *fh_create_dataset (struct dataset *); const struct fh_properties *fh_default_properties (void); diff --git a/src/language/control/repeat.c b/src/language/control/repeat.c index c2e136cf41..0e464420a2 100644 --- a/src/language/control/repeat.c +++ b/src/language/control/repeat.c @@ -305,9 +305,8 @@ parse_commands (struct lexer *lexer, struct hmap *dummies) for (i = 0; i < n_values; i++) { struct string *output = &outputs[n_values - i - 1]; - struct lex_reader *reader; - - reader = lex_reader_for_substring_nocopy (ds_ss (output)); + const char *encoding = lex_get_encoding (lexer); + struct lex_reader *reader = lex_reader_for_substring_nocopy (ds_ss (output), encoding); lex_reader_set_file_name (reader, file_name); reader->line_number = line_number; lex_include (lexer, reader); diff --git a/src/language/data-io/file-handle.q b/src/language/data-io/file-handle.q index 0ac59caa0c..7ac20a080c 100644 --- a/src/language/data-io/file-handle.q +++ b/src/language/data-io/file-handle.q @@ -157,7 +157,7 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds) if (cmd.s_encoding != NULL) properties.encoding = cmd.s_encoding; - fh_create_file (handle_name, cmd.s_name, &properties); + fh_create_file (handle_name, cmd.s_name, lex_get_encoding (lexer), &properties); result = CMD_SUCCESS; @@ -249,7 +249,7 @@ fh_parse (struct lexer *lexer, enum fh_referent referent_mask, if (lex_token (lexer) == T_ID) handle = fh_from_id (lex_tokcstr (lexer)); if (handle == NULL) - handle = fh_create_file (NULL, lex_tokcstr (lexer), + handle = fh_create_file (NULL, lex_tokcstr (lexer), lex_get_encoding (lexer), fh_default_properties ()); lex_get (lexer); } diff --git a/src/language/lexer/lexer.c b/src/language/lexer/lexer.c index c263537d0d..96d0591e33 100644 --- a/src/language/lexer/lexer.c +++ b/src/language/lexer/lexer.c @@ -131,6 +131,7 @@ lex_reader_init (struct lex_reader *reader, reader->syntax = LEX_SYNTAX_AUTO; reader->error = LEX_ERROR_CONTINUE; reader->file_name = NULL; + reader->encoding = NULL; reader->line_number = 0; } @@ -1038,6 +1039,14 @@ lex_get_file_name (const struct lexer *lexer) return src == NULL ? NULL : src->reader->file_name; } +const char * +lex_get_encoding (const struct lexer *lexer) +{ + struct lex_source *src = lex_source__ (lexer); + return src == NULL ? NULL : src->reader->encoding; +} + + /* Returns the syntax mode for the syntax file from which the current drawn is drawn. Returns LEX_SYNTAX_AUTO for a T_STOP token or if the command's source does not have line numbers. @@ -1527,9 +1536,11 @@ static void lex_source_destroy (struct lex_source *src) { char *file_name = src->reader->file_name; + char *encoding = src->reader->encoding; if (src->reader->class->destroy != NULL) src->reader->class->destroy (src->reader); free (file_name); + free (encoding); free (src->buffer); while (!deque_is_empty (&src->deque)) lex_source_pop__ (src); @@ -1575,6 +1586,7 @@ lex_reader_for_file (const char *file_name, const char *encoding, r->reader.syntax = syntax; r->reader.error = error; r->reader.file_name = xstrdup (file_name); + r->reader.encoding = encoding ? xstrdup (encoding) : NULL; r->reader.line_number = 1; r->istream = istream; @@ -1633,16 +1645,17 @@ struct lex_string_reader static struct lex_reader_class lex_string_reader_class; /* Creates and returns a new lex_reader for the contents of S, which must be - encoded in UTF-8. The new reader takes ownership of S and will free it + encoded in the given ENCODING. The new reader takes ownership of S and will free it with ss_dealloc() when it is closed. */ struct lex_reader * -lex_reader_for_substring_nocopy (struct substring s) +lex_reader_for_substring_nocopy (struct substring s, const char *encoding) { struct lex_string_reader *r; r = xmalloc (sizeof *r); lex_reader_init (&r->reader, &lex_string_reader_class); r->reader.syntax = LEX_SYNTAX_AUTO; + r->reader.encoding = encoding ? xstrdup (encoding) : NULL; r->s = s; r->offset = 0; @@ -1650,25 +1663,25 @@ lex_reader_for_substring_nocopy (struct substring s) } /* Creates and returns a new lex_reader for a copy of null-terminated string S, - which must be encoded in UTF-8. The caller retains ownership of S. */ + which must be encoded in ENCODING. The caller retains ownership of S. */ struct lex_reader * -lex_reader_for_string (const char *s) +lex_reader_for_string (const char *s, const char *encoding) { struct substring ss; ss_alloc_substring (&ss, ss_cstr (s)); - return lex_reader_for_substring_nocopy (ss); + return lex_reader_for_substring_nocopy (ss, encoding); } /* Formats FORMAT as a printf()-like format string and creates and returns a new lex_reader for the formatted result. */ struct lex_reader * -lex_reader_for_format (const char *format, ...) +lex_reader_for_format (const char *format, const char *encoding, ...) { struct lex_reader *r; va_list args; - va_start (args, format); - r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args))); + va_start (args, encoding); + r = lex_reader_for_substring_nocopy (ss_cstr (xvasprintf (format, args)), encoding); va_end (args); return r; diff --git a/src/language/lexer/lexer.h b/src/language/lexer/lexer.h index 01c4e91501..03202e260b 100644 --- a/src/language/lexer/lexer.h +++ b/src/language/lexer/lexer.h @@ -53,6 +53,7 @@ struct lex_reader const struct lex_reader_class *class; enum lex_syntax_mode syntax; enum lex_error_mode error; + char *encoding; char *file_name; /* NULL if not associated with a file. */ int line_number; /* 1-based initial line number, 0 if none. */ }; @@ -85,10 +86,10 @@ struct lex_reader *lex_reader_for_file (const char *file_name, const char *encoding, enum lex_syntax_mode syntax, enum lex_error_mode error); -struct lex_reader *lex_reader_for_string (const char *); -struct lex_reader *lex_reader_for_format (const char *, ...) - PRINTF_FORMAT (1, 2); -struct lex_reader *lex_reader_for_substring_nocopy (struct substring); +struct lex_reader *lex_reader_for_string (const char *, const char *encoding); +struct lex_reader *lex_reader_for_format (const char *, const char *, ...) + PRINTF_FORMAT (1, 3); +struct lex_reader *lex_reader_for_substring_nocopy (struct substring, const char *encoding); /* Initialization. */ struct lexer *lex_create (void); @@ -150,6 +151,7 @@ int lex_get_last_line_number (const struct lexer *, int n); int lex_get_first_column (const struct lexer *, int n); int lex_get_last_column (const struct lexer *, int n); const char *lex_get_file_name (const struct lexer *); +const char *lex_get_encoding (const struct lexer *); /* Issuing errors. */ void lex_error (struct lexer *, const char *, ...) PRINTF_FORMAT (2, 3); diff --git a/src/ui/gui/executor.c b/src/ui/gui/executor.c index e9ef3f07b7..9b4c4c9186 100644 --- a/src/ui/gui/executor.c +++ b/src/ui/gui/executor.c @@ -200,5 +200,5 @@ execute_syntax_string (PsppireDataWindow *window, gchar *syntax) void execute_const_syntax_string (PsppireDataWindow *window, const gchar *syntax) { - execute_syntax (window, lex_reader_for_string (syntax)); + execute_syntax (window, lex_reader_for_string (syntax, "UTF-8")); } diff --git a/src/ui/gui/psppire-data-window.c b/src/ui/gui/psppire-data-window.c index a301dda27d..c68f65e129 100644 --- a/src/ui/gui/psppire-data-window.c +++ b/src/ui/gui/psppire-data-window.c @@ -370,7 +370,7 @@ load_file (PsppireWindow *de, const gchar *file_name, const char *encoding, } ok = execute_syntax (PSPPIRE_DATA_WINDOW (de), - lex_reader_for_string (syntax)); + lex_reader_for_string (syntax, "UTF-8")); g_free (syntax); if (ok && syn == NULL) diff --git a/utilities/pspp-convert.c b/utilities/pspp-convert.c index 264ec7a705..f71109524f 100644 --- a/utilities/pspp-convert.c +++ b/utilities/pspp-convert.c @@ -164,12 +164,12 @@ main (int argc, char *argv[]) goto exit; } - input_fh = fh_create_file (NULL, input_filename, fh_default_properties ()); + input_fh = fh_create_file (NULL, input_filename, NULL, fh_default_properties ()); reader = any_reader_open_and_decode (input_fh, encoding, &dict, NULL); if (reader == NULL) exit (1); - output_fh = fh_create_file (NULL, output_filename, fh_default_properties ()); + output_fh = fh_create_file (NULL, output_filename, NULL, fh_default_properties ()); if (!strcmp (output_format, "csv") || !strcmp (output_format, "txt")) { struct csv_writer_options options; -- 2.30.2