From c9ba3a14c478461aac8305599c070824113299c2 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Tue, 7 Apr 2009 11:31:57 +0800 Subject: [PATCH] Implemented the ENCODING subcommand to DATA LIST. Implemented the ENCODING subcommand which enables syntax authors to tell pspp the encoding of a text file from which data is to be read. --- doc/data-io.texi | 8 +++++--- src/language/data-io/data-list.c | 25 +++++++++++++++++++++---- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/doc/data-io.texi b/doc/data-io.texi index 0b9583b0..b7bfda9c 100644 --- a/doc/data-io.texi +++ b/doc/data-io.texi @@ -178,7 +178,7 @@ situations. @display DATA LIST [FIXED] @{TABLE,NOTABLE@} - [FILE='file-name'] + [FILE='file-name' [ENCODING='encoding']] [RECORDS=record_count] [END=end_var] [SKIP=record_count] @@ -198,6 +198,8 @@ external file. It may be used to specify a file name as a string or a file handle (@pxref{File Handles}). If the FILE subcommand is not used, then input is assumed to be specified within the command file using @cmd{BEGIN DATA}@dots{}@cmd{END DATA} (@pxref{BEGIN DATA}). +The ENCODING subcommand may only be used if the FILE subcommand is also used. +It specifies the character encoding of the file. The optional RECORDS subcommand, which takes a single integer as an argument, is used to specify the number of lines per record. If RECORDS @@ -391,7 +393,7 @@ This example shows keywords abbreviated to their first 3 letters. DATA LIST FREE [(@{TAB,'c'@}, @dots{})] [@{NOTABLE,TABLE@}] - [FILE='file-name'] + [FILE='file-name' [ENCODING='encoding']] [SKIP=record_cnt] /var_spec@dots{} @@ -443,7 +445,7 @@ on field width apply, but they are honored on output. DATA LIST LIST [(@{TAB,'c'@}, @dots{})] [@{NOTABLE,TABLE@}] - [FILE='file-name'] + [FILE='file-name' [ENCODING='encoding']] [SKIP=record_count] /var_spec@dots{} diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index d07eae5c..3b091404 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -75,8 +75,9 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) struct dictionary *dict; struct data_parser *parser; struct dfm_reader *reader; - struct variable *end; - struct file_handle *fh; + struct variable *end = NULL; + struct file_handle *fh = NULL; + struct string encoding = DS_EMPTY_INITIALIZER; int table; enum data_parser_type type; @@ -87,8 +88,6 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) dict = in_input_program () ? dataset_dict (ds) : dict_create (); parser = data_parser_create (); reader = NULL; - end = NULL; - fh = NULL; table = -1; /* Print table if nonzero, -1=undecided. */ has_type = false; @@ -103,6 +102,16 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) if (fh == NULL) goto error; } + else if (lex_match_id (lexer, "ENCODING")) + { + lex_match (lexer, '='); + if (!lex_force_string (lexer)) + goto error; + + ds_init_string (&encoding, lex_tokstr (lexer)); + + lex_get (lexer); + } else if (lex_match_id (lexer, "RECORDS")) { lex_match (lexer, '='); @@ -228,6 +237,14 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) } type = data_parser_get_type (parser); + if (! ds_is_empty (&encoding)) + { + if ( NULL == fh) + msg (MW, _("Encoding should not be specified for inline data. It will be ignored.")); + else + dict_set_encoding (dict, ds_cstr (&encoding)); + } + if (fh == NULL) fh = fh_inline_file (); fh_set_default_handle (fh); -- 2.30.2