From a5097a183f00ab2d2dc538ba7094a4696e2fea04 Mon Sep 17 00:00:00 2001
From: John Darrington <john@darrington.wattle.id.au>
Date: Sun, 12 Jul 2009 22:13:44 +0800
Subject: [PATCH] Added a dict parameter to data_in and dealt with the
 consequences.

The data_in function now takes a pointer to a struct dictionary,
which must be the dictionary with which the output value is
associated.  Data_in now ensures that the data of string values
is converted to the dictionary's encoding if necessary.
---
 perl-module/PSPP.xs                     |  1 +
 src/data/data-in.c                      | 74 +++++++++++++------------
 src/data/data-in.h                      |  2 +
 src/language/data-io/data-list.c        |  2 +-
 src/language/data-io/data-parser.c      |  7 ++-
 src/language/data-io/data-parser.h      |  2 +-
 src/language/data-io/get-data.c         |  6 +-
 src/language/expressions/operations.def |  2 +-
 src/language/lexer/value-parser.c       |  3 +-
 src/language/xforms/recode.c            |  7 ++-
 src/ui/gui/find-dialog.c                |  1 +
 src/ui/gui/helper.c                     |  2 +
 src/ui/gui/helper.h                     |  1 +
 src/ui/gui/missing-val-dialog.c         |  8 ++-
 src/ui/gui/psppire-data-store.c         |  3 +-
 src/ui/gui/text-data-import-dialog.c    |  2 +
 src/ui/gui/val-labs-dialog.c            |  4 ++
 src/ui/syntax-gen.c                     |  3 +-
 18 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs
index 94ca9b0183..b3ac4cdc31 100644
--- a/perl-module/PSPP.xs
+++ b/perl-module/PSPP.xs
@@ -614,6 +614,7 @@ CODE:
       {
 	struct substring ss = ss_cstr (SvPV_nolen (sv));
 	if ( ! data_in (ss, LEGACY_NATIVE, ifmt->type, 0, 0, 0,
+			sfi->dict,
 			case_data_rw (c, v),
 			var_get_width (v)) )
 	  {
diff --git a/src/data/data-in.c b/src/data/data-in.c
index 7e7d087d51..33e369f971 100644
--- a/src/data/data-in.c
+++ b/src/data/data-in.c
@@ -34,6 +34,7 @@
 #include "settings.h"
 #include "value.h"
 #include "format.h"
+#include "dictionary.h"
 
 #include <libpspp/assertion.h>
 #include <libpspp/legacy-encoding.h>
@@ -54,7 +55,7 @@
 /* Information about parsing one data field. */
 struct data_in
   {
-    const char *encoding;       /* Encoding of source. */
+    const char *src_enc;        /* Encoding of source. */
     struct substring input;     /* Source. */
     enum fmt_type format;       /* Input format. */
     int implied_decimals;       /* Number of implied decimal places. */
@@ -89,6 +90,9 @@ static int hexit_value (int c);
    representation in OUTPUT, which the caller must have
    initialized with the given WIDTH (0 for a numeric field,
    otherwise the string width).
+   Iff FORMAT is a string format, then DICT must be a pointer
+   to the dictionary associated with OUTPUT.  Otherwise, DICT
+   may be null.
 
    If no decimal point is included in a numeric format, then
    IMPLIED_DECIMALS decimal places are implied.  Specify 0 if no
@@ -103,7 +107,9 @@ static int hexit_value (int c);
 bool
 data_in (struct substring input, const char *encoding,
          enum fmt_type format, int implied_decimals,
-         int first_column, int last_column, union value *output, int width)
+         int first_column, int last_column,
+	 const struct dictionary *dict,
+	 union value *output, int width)
 {
   static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
     {
@@ -112,28 +118,11 @@ data_in (struct substring input, const char *encoding,
     };
 
   struct data_in i;
-  void *copy = NULL;
+
   bool ok;
 
   assert ((width != 0) == fmt_is_string (format));
 
-  if (0 == strcmp (encoding, LEGACY_NATIVE)
-      || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))
-    {
-      i.input = input;
-      i.encoding = encoding;
-    }
-  else
-    {
-      char *s;
-      ss_alloc_uninit (&i.input, ss_length (input));
-
-      s = recode_string (LEGACY_NATIVE, encoding, ss_data (input), ss_length (input));
-      memcpy (ss_data (i.input), s, ss_length (input));
-      free (s);
-      i.encoding = LEGACY_NATIVE;
-      copy = ss_data (i.input);
-    }
   i.format = format;
   i.implied_decimals = implied_decimals;
 
@@ -142,21 +131,39 @@ data_in (struct substring input, const char *encoding,
 
   i.first_column = first_column;
   i.last_column = last_column;
+  i.src_enc = encoding;
 
-  if (!ss_is_empty (i.input))
+  if (ss_is_empty (input))
     {
-      ok = handlers[i.format] (&i);
-      if (!ok)
-        default_result (&i);
+      default_result (&i);
+      return true;
+    }
+
+  if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY))
+    {
+      i.input = input;
     }
   else
     {
-      default_result (&i);
-      ok = true;
+      const char *dest_encoding;
+      char *s = NULL;
+      if ( dict == NULL)
+	{
+	  assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)));
+	  dest_encoding = LEGACY_NATIVE;
+	}
+      else
+	dest_encoding = dict_get_encoding (dict);
+
+      s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input));
+      ss_alloc_uninit (&i.input, strlen (s));
+      memcpy (ss_data (i.input), s, ss_length (input));
+      free (s);
     }
 
-  if (copy)
-    free (copy);
+  ok = handlers[i.format] (&i);
+  if (!ok)
+    default_result (&i);
 
   return ok;
 }
@@ -617,9 +624,8 @@ parse_A (struct data_in *i)
   const char *src = ss_data (i->input);
   size_t src_size = ss_length (i->input);
 
-  char *s = recode_string (LEGACY_NATIVE, i->encoding, src, MIN (src_size, dst_size));
-  memcpy (dst, s, dst_size);
-  free (s);
+  memcpy (dst, src, MIN (src_size, dst_size));
+
   if (dst_size > src_size)
     memset (&dst[src_size], ' ', dst_size - src_size);
 
@@ -645,10 +651,10 @@ parse_AHEX (struct data_in *i)
           return false;
         }
 
-      if (0 != strcmp (i->encoding, LEGACY_NATIVE))
+      if (0 != strcmp (i->src_enc, LEGACY_NATIVE))
         {
-          hi = legacy_to_native (i->encoding, hi);
-          lo = legacy_to_native (i->encoding, lo);
+          hi = legacy_to_native (i->src_enc, hi);
+          lo = legacy_to_native (i->src_enc, lo);
         }
       if (!c_isxdigit (hi) || !c_isxdigit (lo))
 	{
diff --git a/src/data/data-in.h b/src/data/data-in.h
index 5256bb9132..3ebd5933c4 100644
--- a/src/data/data-in.h
+++ b/src/data/data-in.h
@@ -26,9 +26,11 @@
 
 enum fmt_type;
 union value;
+struct dictionary;
 bool data_in (struct substring input, const char *encoding,
               enum fmt_type, int implied_decimals,
               int first_column, int last_column,
+	      const struct dictionary *dict,
               union value *output, int width);
 
 #endif /* data/data-in.h */
diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c
index de85748856..d43af347a7 100644
--- a/src/language/data-io/data-list.c
+++ b/src/language/data-io/data-list.c
@@ -86,7 +86,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds)
   bool ok;
 
   dict = in_input_program () ? dataset_dict (ds) : dict_create ();
-  parser = data_parser_create ();
+  parser = data_parser_create (dict);
   reader = NULL;
 
   table = -1;                /* Print table if nonzero, -1=undecided. */
diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c
index eab3286843..8f189b1b21 100644
--- a/src/language/data-io/data-parser.c
+++ b/src/language/data-io/data-parser.c
@@ -41,6 +41,7 @@
 /* Data parser for textual data like that read by DATA LIST. */
 struct data_parser
   {
+    const struct dictionary *dict; /*Dictionary of destination */
     enum data_parser_type type; /* Type of data to parse. */
     int skip_records;           /* Records to skip before first real data. */
     casenumber max_cases;       /* Max number of cases to read. */
@@ -79,7 +80,7 @@ static void set_any_sep (struct data_parser *parser);
 
 /* Creates and returns a new data parser. */
 struct data_parser *
-data_parser_create (void)
+data_parser_create (const struct dictionary *dict)
 {
   struct data_parser *parser = xmalloc (sizeof *parser);
 
@@ -91,6 +92,7 @@ data_parser_create (void)
   parser->fields = NULL;
   parser->field_cnt = 0;
   parser->field_allocated = 0;
+  parser->dict = dict;
 
   parser->span = true;
   parser->empty_line_has_field = false;
@@ -531,6 +533,7 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
                             f->format.w),
                  encoding, f->format.type, f->format.d,
                  f->first_column, f->first_column + f->format.w,
+		 parser->dict,
                  case_data_rw_idx (c, f->case_idx),
                  fmt_var_width (&f->format));
 
@@ -574,6 +577,7 @@ parse_delimited_span (const struct data_parser *parser,
 
       data_in (s, encoding, f->format.type, 0,
                first_column, last_column,
+	       parser->dict,
                case_data_rw_idx (c, f->case_idx),
                fmt_var_width (&f->format));
     }
@@ -614,6 +618,7 @@ parse_delimited_no_span (const struct data_parser *parser,
 
       data_in (s, encoding, f->format.type, 0,
                first_column, last_column,
+	       parser->dict,
                case_data_rw_idx (c, f->case_idx),
                fmt_var_width (&f->format));
     }
diff --git a/src/language/data-io/data-parser.h b/src/language/data-io/data-parser.h
index b250e91bb5..5a53a2f64d 100644
--- a/src/language/data-io/data-parser.h
+++ b/src/language/data-io/data-parser.h
@@ -38,7 +38,7 @@ enum data_parser_type
   };
 
 /* Creating and configuring any parser. */
-struct data_parser *data_parser_create (void);
+struct data_parser *data_parser_create (const struct dictionary *dict);
 void data_parser_destroy (struct data_parser *);
 
 enum data_parser_type data_parser_get_type (const struct data_parser *);
diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c
index e4ab76a999..32202babde 100644
--- a/src/language/data-io/get-data.c
+++ b/src/language/data-io/get-data.c
@@ -271,7 +271,7 @@ static int
 parse_get_txt (struct lexer *lexer, struct dataset *ds)
 {
   struct data_parser *parser = NULL;
-  struct dictionary *dict = NULL;
+  struct dictionary *dict = dict_create ();
   struct file_handle *fh = NULL;
   struct dfm_reader *reader = NULL;
 
@@ -288,7 +288,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds)
   if (fh == NULL)
     goto error;
 
-  parser = data_parser_create ();
+  parser = data_parser_create (dict);
   has_type = false;
   data_parser_set_type (parser, DP_DELIMITED);
   data_parser_set_span (parser, false);
@@ -465,7 +465,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds)
     }
   lex_match (lexer, '=');
 
-  dict = dict_create ();
+
   record = 1;
   type = data_parser_get_type (parser);
   do
diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def
index 52d4226dff..d283867214 100644
--- a/src/language/expressions/operations.def
+++ b/src/language/expressions/operations.def
@@ -573,7 +573,7 @@ string function RTRIM (string s, string c)
 function NUMBER (string s, ni_format f)
 {
   union value out;
-  data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, &out, 0);
+  data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, NULL, &out, 0);
   return out.f;
 }
 
diff --git a/src/language/lexer/value-parser.c b/src/language/lexer/value-parser.c
index 2cf9cf2bb9..c780d86f04 100644
--- a/src/language/lexer/value-parser.c
+++ b/src/language/lexer/value-parser.c
@@ -99,8 +99,9 @@ parse_number (struct lexer *lexer, double *x, const enum fmt_type *format)
   else if (lex_token (lexer) == T_STRING && format != NULL)
     {
       union value v;
+      assert (! (fmt_get_category (*format) & ( FMT_CAT_STRING )));
       data_in (ds_ss (lex_tokstr (lexer)), LEGACY_NATIVE,
-               *format, 0, 0, 0, &v, 0);
+               *format, 0, 0, 0, NULL, &v, 0);
       lex_get (lexer);
       *x = v.f;
       if (*x == SYSMIS)
diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c
index e8a382a3d5..62b03ba073 100644
--- a/src/language/xforms/recode.c
+++ b/src/language/xforms/recode.c
@@ -83,6 +83,8 @@ struct recode_trns
   {
     struct pool *pool;
 
+
+
     /* Variable types, for convenience. */
     enum val_type src_type;     /* src_vars[*] type. */
     enum val_type dst_type;     /* dst_vars[*] type. */
@@ -90,6 +92,7 @@ struct recode_trns
     /* Variables. */
     const struct variable **src_vars;	/* Source variables. */
     const struct variable **dst_vars;	/* Destination variables. */
+    const struct dictionary *dst_dict;  /* Dictionary of dst_vars */
     char **dst_names;		/* Name of dest variables, if they're new. */
     size_t var_cnt;             /* Number of variables. */
 
@@ -540,6 +543,8 @@ create_dst_vars (struct recode_trns *trns, struct dictionary *dict)
 {
   size_t i;
 
+  trns->dst_dict = dict;
+
   for (i = 0; i < trns->var_cnt; i++)
     {
       const struct variable **var = &trns->dst_vars[i];
@@ -625,7 +630,7 @@ find_src_string (struct recode_trns *trns, const uint8_t *value,
 
             msg_disable ();
             match = data_in (ss_buffer (value, width), LEGACY_NATIVE,
-                             FMT_F, 0, 0, 0, &uv, 0);
+                             FMT_F, 0, 0, 0, trns->dst_dict,  &uv, 0);
             msg_enable ();
             out->value.f = uv.f;
             break;
diff --git a/src/ui/gui/find-dialog.c b/src/ui/gui/find-dialog.c
index 86d295830b..f24bdbd0d8 100644
--- a/src/ui/gui/find-dialog.c
+++ b/src/ui/gui/find-dialog.c
@@ -599,6 +599,7 @@ value_comparator_create (const struct variable *var, const char *target)
                   LEGACY_NATIVE,
 		  fmt->type,
 		  0, 0, 0,
+		  NULL,
 		  &vc->pattern, width) )
     {
       value_destroy (&vc->pattern, width);
diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c
index 4ec4bb0c1a..d670002a46 100644
--- a/src/ui/gui/helper.c
+++ b/src/ui/gui/helper.c
@@ -63,6 +63,7 @@ value_to_text (union value v, const PsppireDict *dict, struct fmt_spec format)
 
 gboolean
 text_to_value (const gchar *text, union value *v,
+	       const PsppireDict *dict,
 	      struct fmt_spec format)
 {
   bool ok;
@@ -86,6 +87,7 @@ text_to_value (const gchar *text, union value *v,
 
   msg_disable ();
   ok = data_in (ss_cstr (text), LEGACY_NATIVE, format.type, 0, 0, 0,
+		dict->dict,
                 v, fmt_var_width (&format));
   msg_enable ();
 
diff --git a/src/ui/gui/helper.h b/src/ui/gui/helper.h
index 2a7be4aa14..f6c084d48e 100644
--- a/src/ui/gui/helper.h
+++ b/src/ui/gui/helper.h
@@ -38,6 +38,7 @@ gchar * value_to_text (union value v, const PsppireDict *dict, struct fmt_spec f
 
 
 gboolean text_to_value (const gchar *text, union value *v,
+			const PsppireDict *dict,
 		       struct fmt_spec format);
 
 GObject *get_object_assert (GtkBuilder *builder, const gchar *name, GType type);
diff --git a/src/ui/gui/missing-val-dialog.c b/src/ui/gui/missing-val-dialog.c
index 9230b8a6e5..16a7f6f34b 100644
--- a/src/ui/gui/missing-val-dialog.c
+++ b/src/ui/gui/missing-val-dialog.c
@@ -100,7 +100,8 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data)
 	      continue;
 	    }
 
-	  if ( text_to_value (text, &v, *write_spec))
+	  if ( text_to_value (text, &v, 
+			      dialog->dict, *write_spec))
 	    {
 	      nvals++;
 	      mv_add_value (&dialog->mvl, &v);
@@ -126,9 +127,9 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data)
       const gchar *low_text = gtk_entry_get_text (GTK_ENTRY (dialog->low));
       const gchar *high_text = gtk_entry_get_text (GTK_ENTRY (dialog->high));
 
-      if ( text_to_value (low_text, &low_val, *write_spec)
+      if ( text_to_value (low_text, &low_val, dialog->dict, *write_spec)
 	   &&
-	   text_to_value (high_text, &high_val, *write_spec) )
+	   text_to_value (high_text, &high_val, dialog->dict, *write_spec) )
 	{
 	  if ( low_val.f > high_val.f )
 	    {
@@ -154,6 +155,7 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data)
 	{
 	  union value discrete_val;
 	  if ( !text_to_value (discrete_text, &discrete_val,
+			       dialog->dict,
 			      *write_spec))
 	    {
 	      err_dialog (_("Incorrect value for variable type"),
diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c
index a97cbbb3e4..9833fb496f 100644
--- a/src/ui/gui/psppire-data-store.c
+++ b/src/ui/gui/psppire-data-store.c
@@ -959,7 +959,8 @@ psppire_data_store_data_in (PsppireDataStore *ds, casenumber casenum, gint idx,
                         FALSE);
   value_init (&value, width);
   ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value)
-        && data_in (input, dict_get_encoding (dict->dict), fmt->type, 0, 0, 0, &value, width)
+        && data_in (input, UTF8, fmt->type, 0, 0, 0,
+		    dict->dict, &value, width)
         && datasheet_put_value (ds->datasheet, casenum, idx, &value));
   value_destroy (&value, width);
 
diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c
index 330d284741..4baa99e014 100644
--- a/src/ui/gui/text-data-import-dialog.c
+++ b/src/ui/gui/text-data-import-dialog.c
@@ -1748,7 +1748,9 @@ parse_field (struct import_assistant *ia,
   if (field.string != NULL)
     {
       msg_disable ();
+      /* FIXME: NULL should be replaced with the destination dictionary */
       if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, 0,
+		    NULL,
                     &val, var_get_width (var)))
         {
           char fmt_string[FMT_STRING_LEN_MAX + 1];
diff --git a/src/ui/gui/val-labs-dialog.c b/src/ui/gui/val-labs-dialog.c
index 92a7fe8e0c..4b575d9779 100644
--- a/src/ui/gui/val-labs-dialog.c
+++ b/src/ui/gui/val-labs-dialog.c
@@ -72,6 +72,7 @@ on_label_entry_change (GtkEntry *entry, gpointer data)
   text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry));
 
   text_to_value (text, &v,
+		 dialog->var_store->dict,
 		*var_get_write_format (dialog->pv));
 
 
@@ -142,6 +143,7 @@ on_value_entry_change (GtkEntry *entry, gpointer data)
 
   union value v;
   text_to_value (text, &v,
+		 dialog->var_store->dict,
 		*var_get_write_format (dialog->pv));
 
 
@@ -268,6 +270,7 @@ on_change (GtkWidget *w, gpointer data)
   union value v;
 
   text_to_value (val_text, &v,
+		 dialog->var_store->dict,
 		*var_get_write_format (dialog->pv));
 
   val_labs_replace (dialog->labs, &v,
@@ -292,6 +295,7 @@ on_add (GtkWidget *w, gpointer data)
   const gchar *text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry));
 
   text_to_value (text, &v,
+		 dialog->var_store->dict,
 		*var_get_write_format (dialog->pv));
 
 
diff --git a/src/ui/syntax-gen.c b/src/ui/syntax-gen.c
index f063fa3965..22e717ac96 100644
--- a/src/ui/syntax-gen.c
+++ b/src/ui/syntax-gen.c
@@ -152,8 +152,9 @@ syntax_gen_number (struct string *output,
       v_in.f = number;
       s = data_out (&v_in, "FIXME",  format);
       msg_disable ();
+      /* FIXME: UTF8 encoded strings will fail here */
       ok = data_in (ss_cstr (s), LEGACY_NATIVE,
-                    format->type, false, 0, 0, &v_out, 0);
+                    format->type, false, 0, 0, NULL, &v_out, 0);
       msg_enable ();
       if (ok && v_out.f == number)
         {
-- 
2.30.2