dictionary: Make dict_create() take the new dictionary's encoding.

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 24 Apr 2011 04:40:48 +0000 (21:40 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Tue, 26 Apr 2011 04:13:53 +0000 (21:13 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 24 Apr 2011 04:40:48 +0000 (21:40 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Tue, 26 Apr 2011 04:13:53 +0000 (21:13 -0700)
diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs

index 58eac5b762b58080d7ec3dbb9abf79e3ed3b50ca..896d7d88e394ff5a2a0ef0d4e9ede6683a61ac1a 100644 (file)
--- a/perl-module/PSPP.xs
+++ b/perl-module/PSPP.xs
@@ -223,7 +223,7 @@ MODULE = PSPP               PACKAGE = PSPP::Dict
  struct dictionary *
  pxs_dict_new()
  CODE:
- RETVAL = dict_create ();
+ RETVAL = dict_create ("UTF-8");
  OUTPUT:
   RETVAL
  
@@ -592,7 +592,6 @@ CODE:
   struct file_handle *fh =
    fh_create_file (NULL, name, fh_default_properties () );
   struct sysfile_info *sfi = xmalloc (sizeof (*sfi));
- dict_set_encoding (dict, UTF8);
   sfi->writer = sfm_open_writer (fh, dict, opts);
   sfi->dict = dict;
   sfi->opened = true;
diff --git a/src/data/dataset.c b/src/data/dataset.c

index 5d0598e3a6949444c65acfd9b7d71861a7987941..466696c7b9a34fb277d7120bfb1dfbb090434457 100644 (file)
--- a/src/data/dataset.c
+++ b/src/data/dataset.c
@@ -124,9 +124,8 @@ dataset_create (void)
    struct dataset *ds;
  
    ds = xzalloc (sizeof *ds);
-  ds->dict = dict_create ();
+  ds->dict = dict_create (get_default_encoding ());
    dict_set_change_callback (ds->dict, dict_callback, ds);
-  dict_set_encoding (ds->dict, get_default_encoding ());
  
    ds->caseinit = caseinit_create ();
    proc_cancel_all_transformations (ds);
diff --git a/src/data/dictionary.c b/src/data/dictionary.c

index 7d67defbfecfc61fa3f3637ecfbb94c127c4b75a..c8f58516ecc79d1d6a7bc78a29353f838745634c 100644 (file)
--- a/src/data/dictionary.c
+++ b/src/data/dictionary.c
@@ -87,16 +87,6 @@ struct dictionary
  static void dict_unset_split_var (struct dictionary *, struct variable *);
  static void dict_unset_mrset_var (struct dictionary *, struct variable *);
  
-void
-dict_set_encoding (struct dictionary *d, const char *enc)
-{
-  if (enc)
-    {
-      free (d->encoding);
-      d->encoding = xstrdup (enc);
-    }
-}
-
  const char *
  dict_get_encoding (const struct dictionary *d)
  {
@@ -171,14 +161,16 @@ dict_copy_callbacks (struct dictionary *dest,
    dest->cb_data = src->cb_data;
  }
  
-/* Creates and returns a new dictionary. */
+/* Creates and returns a new dictionary with the specified ENCODING. */
  struct dictionary *
-dict_create (void)
+dict_create (const char *encoding)
  {
    struct dictionary *d = xzalloc (sizeof *d);
  
+  d->encoding = xstrdup (encoding);
    hmap_init (&d->name_map);
    attrset_init (&d->attributes);
+
    return d;
  }
  
@@ -196,7 +188,7 @@ dict_clone (const struct dictionary *s)
    struct dictionary *d;
    size_t i;
  
-  d = dict_create ();
+  d = dict_create (s->encoding);
  
    /* Set the new dictionary's encoding early so that string length limitations
       are interpreted correctly. */
@@ -1660,7 +1652,7 @@ struct variable *
  dict_create_internal_var (int case_idx, int width)
  {
    if (internal_dict == NULL)
-    internal_dict = dict_create ();
+    internal_dict = dict_create ("UTF-8");
  
    for (;;)
      {
diff --git a/src/data/dictionary.h b/src/data/dictionary.h

index fa5d0ddeb8fc355d4f48a28e69b24b5e883f0645..2a196950a97a3d6bae5aa86bb3dab83733576df8 100644 (file)
--- a/src/data/dictionary.h
+++ b/src/data/dictionary.h
@@ -26,7 +26,7 @@ struct string;
  struct ccase;
  
  /* Creating dictionaries. */
-struct dictionary *dict_create (void);
+struct dictionary *dict_create (const char *encoding);
  struct dictionary *dict_clone (const struct dictionary *);
  
  
@@ -164,7 +164,6 @@ void dict_set_attributes (struct dictionary *, const struct attrset *);
  bool dict_has_attributes (const struct dictionary *);
  
  /* Data encoding. */
-void dict_set_encoding (struct dictionary *d, const char *enc);
  const char *dict_get_encoding (const struct dictionary *d);
  
  bool dict_id_is_valid (const struct dictionary *, const char *id,
diff --git a/src/data/gnumeric-reader.c b/src/data/gnumeric-reader.c

index 6392f9b805d620ccbea4bf139bbbfc3c0cf3aac9..61fbab899b8bd44eff63633bdb0f90a57a2553d4 100644 (file)
--- a/src/data/gnumeric-reader.c
+++ b/src/data/gnumeric-reader.c
@@ -496,9 +496,8 @@ gnumeric_open_reader (struct gnumeric_read_info *gri, struct dictionary **dict)
  
  
    /* Create the dictionary and populate it */
-  *dict = r->dict = dict_create ();
-
-  dict_set_encoding (r->dict, CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr)));
+  *dict = r->dict = dict_create (
+    CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr)));
  
    for (i = 0 ; i < n_var_specs ; ++i )
      {
diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c

index 372d7682136f746110e4cc05d19c04ac30265824..a05f6b3d3a89444cb62e28d99d39fc863b7dba9a 100644 (file)
--- a/src/data/por-file-reader.c
+++ b/src/data/por-file-reader.c
@@ -38,6 +38,7 @@
  #include "data/value-labels.h"
  #include "data/variable.h"
  #include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
  #include "libpspp/message.h"
  #include "libpspp/misc.h"
  #include "libpspp/pool.h"
@@ -250,7 +251,7 @@ pfm_open_reader (struct file_handle *fh, struct dictionary **dict,
    struct pool *volatile pool = NULL;
    struct pfm_reader *volatile r = NULL;
  
-  *dict = dict_create ();
+  *dict = dict_create (get_default_encoding ());
  
    /* Create and initialize reader. */
    pool = pool_create ();
diff --git a/src/data/psql-reader.c b/src/data/psql-reader.c

index 346d214ad31e6d43050dccba941979390cb5cf33..2d9a2678086380725b8cb812305f5682373e8bf0 100644 (file)
--- a/src/data/psql-reader.c
+++ b/src/data/psql-reader.c
@@ -27,6 +27,7 @@
  #include "data/dictionary.h"
  #include "data/format.h"
  #include "data/variable.h"
+#include "libpspp/i18n.h"
  #include "libpspp/message.h"
  #include "libpspp/misc.h"
  #include "libpspp/str.h"
@@ -229,6 +230,7 @@ psql_open_reader (struct psql_read_info *info, struct dictionary **dict)
    int n_fields, n_tuples;
    PGresult *qres = NULL;
    casenumber n_cases = CASENUMBER_MAX;
+  const char *encoding;
  
    struct psql_reader *r = xzalloc (sizeof *r);
    struct string query ;
@@ -285,23 +287,21 @@ psql_open_reader (struct psql_read_info *info, struct dictionary **dict)
  
    r->postgres_epoch = calendar_gregorian_to_offset (2000, 1, 1, NULL);
  
-
-  /* Create the dictionary and populate it */
-  *dict = r->dict = dict_create ();
-
    {
      const int enc = PQclientEncoding (r->conn);
  
      /* According to section 22.2 of the Postgresql manual
         a value of zero (SQL_ASCII) indicates
         "a declaration of ignorance about the encoding".
-       Accordingly, we don't set the dictionary's encoding
+       Accordingly, we use the default encoding
         if we find this value.
      */
-    if ( enc != 0 )
-      dict_set_encoding (r->dict, pg_encoding_to_char (enc));
+    encoding = enc ? pg_encoding_to_char (enc) : get_default_encoding ();
    }
  
+  /* Create the dictionary and populate it */
+  *dict = r->dict = dict_create ();
+
    /*
      select count (*) from (select * from medium) stupid_sql_standard;
    */
diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c

index 6643b85da2848551cac05a352395e0c7bc9907c5..7e5a9e0fe1c6396306060abce27938460cf6869f 100644 (file)
--- a/src/data/sys-file-reader.c
+++ b/src/data/sys-file-reader.c
@@ -428,10 +428,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dictp,
  
       First, figure out the correct character encoding, because this determines
       how the rest of the header data is to be interpreted. */
-  dict = dict_create ();
-  r->encoding = choose_encoding (r, extensions[EXT_INTEGER],
-                                 extensions[EXT_ENCODING]);
-  dict_set_encoding (dict, r->encoding);
+  dict = dict_create (choose_encoding (r, extensions[EXT_INTEGER],
+                                       extensions[EXT_ENCODING]));
  
    /* These records don't use variables at all. */
    if (document != NULL)
diff --git a/src/language/data-io/combine-files.c b/src/language/data-io/combine-files.c

index 0f09e735d9eb345a901920636d6850cc8a7071b0..b4f06655ca1ff2de8addf70e915ab051f4a0fa03 100644 (file)
--- a/src/language/data-io/combine-files.c
+++ b/src/language/data-io/combine-files.c
@@ -35,6 +35,7 @@
  #include "language/lexer/variable-parser.h"
  #include "language/stats/sort-criteria.h"
  #include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
  #include "libpspp/message.h"
  #include "libpspp/string-array.h"
  #include "libpspp/taint.h"
@@ -160,7 +161,7 @@ combine_files (enum comb_command_type command,
  
    proc.files = NULL;
    proc.n_files = 0;
-  proc.dict = dict_create ();
+  proc.dict = dict_create (get_default_encoding ());
    proc.output = NULL;
    proc.matcher = NULL;
    subcase_init_empty (&proc.by_vars);
@@ -496,7 +497,6 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f)
    struct dictionary *d = f->dict;
    const struct string_array *d_docs, *m_docs;
    int i;
-  const char *file_encoding;
  
    if (dict_get_label (m) == NULL)
      dict_set_label (m, dict_get_label (d));
@@ -510,17 +510,9 @@ merge_dictionary (struct dictionary *const m, struct comb_file *f)
       The correct thing to do would be to convert to an encoding
       which can cope with all the input files (eg UTF-8).
     */
-  file_encoding = dict_get_encoding (f->dict);
-  if ( file_encoding != NULL)
-    {
-      if ( dict_get_encoding (m) == NULL)
-       dict_set_encoding (m, file_encoding);
-      else if ( 0 != strcmp (file_encoding, dict_get_encoding (m)))
-       {
-         msg (MW,
-              _("Combining files with incompatible encodings. String data may not be represented correctly."));
-       }
-    }
+  if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
+    msg (MW, _("Combining files with incompatible encodings. String data may "
+               "not be represented correctly."));
  
    if (d_docs != NULL)
      {
diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c

index a171986d061e8d26bb836efcd742ef8d9648f8b6..8ab758849e5b3d0126800741dcd92fe1897fc0c3 100644 (file)
--- a/src/language/data-io/data-list.c
+++ b/src/language/data-io/data-list.c
@@ -42,6 +42,7 @@
  #include "language/lexer/variable-parser.h"
  #include "libpspp/assertion.h"
  #include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
  #include "libpspp/message.h"
  #include "libpspp/misc.h"
  #include "libpspp/pool.h"
@@ -85,7 +86,9 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds)
    struct pool *tmp_pool;
    bool ok;
  
-  dict = in_input_program () ? dataset_dict (ds) : dict_create ();
+  dict = (in_input_program ()
+          ? dataset_dict (ds)
+          : dict_create (get_default_encoding ()));
    parser = data_parser_create (dict);
    reader = NULL;
  
@@ -238,13 +241,9 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds)
      }
    type = data_parser_get_type (parser);
  
-  if (! ds_is_empty (&encoding))
-    {
-      if ( NULL == fh)
-       msg (MW, _("Encoding should not be specified for inline data. It will be ignored."));
-      else
-       dict_set_encoding (dict, ds_cstr (&encoding));
-    }
+  if (! ds_is_empty (&encoding) && NULL == fh)
+    msg (MW, _("Encoding should not be specified for inline data. It will be "
+               "ignored."));
  
    if (fh == NULL)
      fh = fh_inline_file ();
diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c

index dd55752c7cabbfe114a2b3d1e458376a3719ea2b..d7927527eba98cba9fb43f34169f99a19e4d8d16 100644 (file)
--- a/src/language/data-io/get-data.c
+++ b/src/language/data-io/get-data.c
@@ -279,7 +279,7 @@ static int
  parse_get_txt (struct lexer *lexer, struct dataset *ds)
  {
    struct data_parser *parser = NULL;
-  struct dictionary *dict = dict_create ();
+  struct dictionary *dict = dict_create (get_default_encoding ());
    struct file_handle *fh = NULL;
    struct dfm_reader *reader = NULL;
    char *name = NULL;
diff --git a/src/language/stats/aggregate.c b/src/language/stats/aggregate.c

index d7339d8652c60115a926acfe10ba85ee09dff369..fe6f5eed685993411f035856cc1a0204bf7b3a68 100644 (file)
--- a/src/language/stats/aggregate.c
+++ b/src/language/stats/aggregate.c
@@ -208,7 +208,7 @@ cmd_aggregate (struct lexer *lexer, struct dataset *ds)
    if ( agr.add_variables )
      agr.dict = dict_clone (dict);
    else
-    agr.dict = dict_create ();    
+    agr.dict = dict_create (dict_get_encoding (dict));
  
    dict_set_label (agr.dict, dict_get_label (dict));
    dict_set_documents (agr.dict, dict_get_documents (dict));
diff --git a/src/ui/gui/psppire-data-editor.c b/src/ui/gui/psppire-data-editor.c

index 094a4f6988ada771e840e81b037be42e9b5203db..2aba780e0f9fb96df911202c95e37866b48ee87f 100644 (file)
--- a/src/ui/gui/psppire-data-editor.c
+++ b/src/ui/gui/psppire-data-editor.c
@@ -1,5 +1,5 @@
  /* PSPPIRE - a graphical user interface for PSPP.
-   Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
  
     This program is free software: you can redistribute it and/or modify
     it under the terms of the GNU General Public License as published by
@@ -1612,8 +1612,7 @@ data_sheet_set_clip (PsppireSheet *sheet)
      }
  
    /* Construct clip dictionary. */
-  clip_dict = dict_create ();
-  dict_set_encoding (clip_dict, dict_get_encoding (ds->dict->dict));
+  clip_dict = dict_create (dict_get_encoding (ds->dict->dict));
    for (i = col0; i <= coli; i++)
      dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i));
  
diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c

index 14a23a79846cbfebc9ce7a15c94699ccc7846113..8579ccc94958abcbe8d3aac33800c8eca1de96f4 100644 (file)
--- a/src/ui/gui/text-data-import-dialog.c
+++ b/src/ui/gui/text-data-import-dialog.c
@@ -1252,7 +1252,7 @@ choose_column_names (struct import_assistant *ia)
    struct column *col;
    size_t name_row;
  
-  dict = dict_create ();
+  dict = dict_create (get_default_encoding ());
    name_row = f->variable_names && f->skip_lines ? f->skip_lines : 0;
    for (col = s->columns; col < &s->columns[s->column_cnt]; col++)
      {
@@ -1595,7 +1595,7 @@ prepare_formats_page (struct import_assistant *ia)
  
    push_watch_cursor (ia);
  
-  dict = dict_create ();
+  dict = dict_create (get_default_encoding ());
    fg = fmt_guesser_create ();
    for (column_idx = 0; column_idx < s->column_cnt; column_idx++)
      {
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 24 Apr 2011 04:40:48 +0000 (21:40 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Tue, 26 Apr 2011 04:13:53 +0000 (21:13 -0700)
perl-module/PSPP.xs		patch \| blob \| history
src/data/dataset.c		patch \| blob \| history
src/data/dictionary.c		patch \| blob \| history
src/data/dictionary.h		patch \| blob \| history
src/data/gnumeric-reader.c		patch \| blob \| history
src/data/por-file-reader.c		patch \| blob \| history
src/data/psql-reader.c		patch \| blob \| history
src/data/sys-file-reader.c		patch \| blob \| history
src/language/data-io/combine-files.c		patch \| blob \| history
src/language/data-io/data-list.c		patch \| blob \| history
src/language/data-io/get-data.c		patch \| blob \| history
src/language/stats/aggregate.c		patch \| blob \| history
src/ui/gui/psppire-data-editor.c		patch \| blob \| history
src/ui/gui/text-data-import-dialog.c		patch \| blob \| history