There are several places in the PSPP tree that create dictionaries,
but few of them actually set an encoding. This causes most
dictionaries to be in the default encoding, which is often not
correct.
By making dict_create() take the encoding as a parameter we force
the caller to think about the encoding issue up-front.
14 files changed:
struct dictionary *
pxs_dict_new()
CODE:
struct dictionary *
pxs_dict_new()
CODE:
- RETVAL = dict_create ();
+ RETVAL = dict_create ("UTF-8");
struct file_handle *fh =
fh_create_file (NULL, name, fh_default_properties () );
struct sysfile_info *sfi = xmalloc (sizeof (*sfi));
struct file_handle *fh =
fh_create_file (NULL, name, fh_default_properties () );
struct sysfile_info *sfi = xmalloc (sizeof (*sfi));
- dict_set_encoding (dict, UTF8);
sfi->writer = sfm_open_writer (fh, dict, opts);
sfi->dict = dict;
sfi->opened = true;
sfi->writer = sfm_open_writer (fh, dict, opts);
sfi->dict = dict;
sfi->opened = true;
struct dataset *ds;
ds = xzalloc (sizeof *ds);
struct dataset *ds;
ds = xzalloc (sizeof *ds);
- ds->dict = dict_create ();
+ ds->dict = dict_create (get_default_encoding ());
dict_set_change_callback (ds->dict, dict_callback, ds);
dict_set_change_callback (ds->dict, dict_callback, ds);
- dict_set_encoding (ds->dict, get_default_encoding ());
ds->caseinit = caseinit_create ();
proc_cancel_all_transformations (ds);
ds->caseinit = caseinit_create ();
proc_cancel_all_transformations (ds);
static void dict_unset_split_var (struct dictionary *, struct variable *);
static void dict_unset_mrset_var (struct dictionary *, struct variable *);
static void dict_unset_split_var (struct dictionary *, struct variable *);
static void dict_unset_mrset_var (struct dictionary *, struct variable *);
-void
-dict_set_encoding (struct dictionary *d, const char *enc)
-{
- if (enc)
- {
- free (d->encoding);
- d->encoding = xstrdup (enc);
- }
-}
-
const char *
dict_get_encoding (const struct dictionary *d)
{
const char *
dict_get_encoding (const struct dictionary *d)
{
dest->cb_data = src->cb_data;
}
dest->cb_data = src->cb_data;
}
-/* Creates and returns a new dictionary. */
+/* Creates and returns a new dictionary with the specified ENCODING. */
+dict_create (const char *encoding)
{
struct dictionary *d = xzalloc (sizeof *d);
{
struct dictionary *d = xzalloc (sizeof *d);
+ d->encoding = xstrdup (encoding);
hmap_init (&d->name_map);
attrset_init (&d->attributes);
hmap_init (&d->name_map);
attrset_init (&d->attributes);
struct dictionary *d;
size_t i;
struct dictionary *d;
size_t i;
+ d = dict_create (s->encoding);
/* Set the new dictionary's encoding early so that string length limitations
are interpreted correctly. */
/* Set the new dictionary's encoding early so that string length limitations
are interpreted correctly. */
dict_create_internal_var (int case_idx, int width)
{
if (internal_dict == NULL)
dict_create_internal_var (int case_idx, int width)
{
if (internal_dict == NULL)
- internal_dict = dict_create ();
+ internal_dict = dict_create ("UTF-8");
struct ccase;
/* Creating dictionaries. */
struct ccase;
/* Creating dictionaries. */
-struct dictionary *dict_create (void);
+struct dictionary *dict_create (const char *encoding);
struct dictionary *dict_clone (const struct dictionary *);
struct dictionary *dict_clone (const struct dictionary *);
bool dict_has_attributes (const struct dictionary *);
/* Data encoding. */
bool dict_has_attributes (const struct dictionary *);
/* Data encoding. */
-void dict_set_encoding (struct dictionary *d, const char *enc);
const char *dict_get_encoding (const struct dictionary *d);
bool dict_id_is_valid (const struct dictionary *, const char *id,
const char *dict_get_encoding (const struct dictionary *d);
bool dict_id_is_valid (const struct dictionary *, const char *id,
/* Create the dictionary and populate it */
/* Create the dictionary and populate it */
- *dict = r->dict = dict_create ();
-
- dict_set_encoding (r->dict, CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr)));
+ *dict = r->dict = dict_create (
+ CHAR_CAST (const char *, xmlTextReaderConstEncoding (r->xtr)));
for (i = 0 ; i < n_var_specs ; ++i )
{
for (i = 0 ; i < n_var_specs ; ++i )
{
#include "data/value-labels.h"
#include "data/variable.h"
#include "libpspp/compiler.h"
#include "data/value-labels.h"
#include "data/variable.h"
#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
struct pool *volatile pool = NULL;
struct pfm_reader *volatile r = NULL;
struct pool *volatile pool = NULL;
struct pfm_reader *volatile r = NULL;
- *dict = dict_create ();
+ *dict = dict_create (get_default_encoding ());
/* Create and initialize reader. */
pool = pool_create ();
/* Create and initialize reader. */
pool = pool_create ();
#include "data/dictionary.h"
#include "data/format.h"
#include "data/variable.h"
#include "data/dictionary.h"
#include "data/format.h"
#include "data/variable.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/str.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/str.h"
int n_fields, n_tuples;
PGresult *qres = NULL;
casenumber n_cases = CASENUMBER_MAX;
int n_fields, n_tuples;
PGresult *qres = NULL;
casenumber n_cases = CASENUMBER_MAX;
struct psql_reader *r = xzalloc (sizeof *r);
struct string query ;
struct psql_reader *r = xzalloc (sizeof *r);
struct string query ;
r->postgres_epoch = calendar_gregorian_to_offset (2000, 1, 1, NULL);
r->postgres_epoch = calendar_gregorian_to_offset (2000, 1, 1, NULL);
-
- /* Create the dictionary and populate it */
- *dict = r->dict = dict_create ();
-
{
const int enc = PQclientEncoding (r->conn);
/* According to section 22.2 of the Postgresql manual
a value of zero (SQL_ASCII) indicates
"a declaration of ignorance about the encoding".
{
const int enc = PQclientEncoding (r->conn);
/* According to section 22.2 of the Postgresql manual
a value of zero (SQL_ASCII) indicates
"a declaration of ignorance about the encoding".
- Accordingly, we don't set the dictionary's encoding
+ Accordingly, we use the default encoding
if we find this value.
*/
if we find this value.
*/
- if ( enc != 0 )
- dict_set_encoding (r->dict, pg_encoding_to_char (enc));
+ encoding = enc ? pg_encoding_to_char (enc) : get_default_encoding ();
+ /* Create the dictionary and populate it */
+ *dict = r->dict = dict_create ();
+
/*
select count (*) from (select * from medium) stupid_sql_standard;
*/
/*
select count (*) from (select * from medium) stupid_sql_standard;
*/
First, figure out the correct character encoding, because this determines
how the rest of the header data is to be interpreted. */
First, figure out the correct character encoding, because this determines
how the rest of the header data is to be interpreted. */
- dict = dict_create ();
- r->encoding = choose_encoding (r, extensions[EXT_INTEGER],
- extensions[EXT_ENCODING]);
- dict_set_encoding (dict, r->encoding);
+ dict = dict_create (choose_encoding (r, extensions[EXT_INTEGER],
+ extensions[EXT_ENCODING]));
/* These records don't use variables at all. */
if (document != NULL)
/* These records don't use variables at all. */
if (document != NULL)
#include "language/lexer/variable-parser.h"
#include "language/stats/sort-criteria.h"
#include "libpspp/assertion.h"
#include "language/lexer/variable-parser.h"
#include "language/stats/sort-criteria.h"
#include "libpspp/assertion.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
#include "libpspp/taint.h"
#include "libpspp/message.h"
#include "libpspp/string-array.h"
#include "libpspp/taint.h"
proc.files = NULL;
proc.n_files = 0;
proc.files = NULL;
proc.n_files = 0;
- proc.dict = dict_create ();
+ proc.dict = dict_create (get_default_encoding ());
proc.output = NULL;
proc.matcher = NULL;
subcase_init_empty (&proc.by_vars);
proc.output = NULL;
proc.matcher = NULL;
subcase_init_empty (&proc.by_vars);
struct dictionary *d = f->dict;
const struct string_array *d_docs, *m_docs;
int i;
struct dictionary *d = f->dict;
const struct string_array *d_docs, *m_docs;
int i;
- const char *file_encoding;
if (dict_get_label (m) == NULL)
dict_set_label (m, dict_get_label (d));
if (dict_get_label (m) == NULL)
dict_set_label (m, dict_get_label (d));
The correct thing to do would be to convert to an encoding
which can cope with all the input files (eg UTF-8).
*/
The correct thing to do would be to convert to an encoding
which can cope with all the input files (eg UTF-8).
*/
- file_encoding = dict_get_encoding (f->dict);
- if ( file_encoding != NULL)
- {
- if ( dict_get_encoding (m) == NULL)
- dict_set_encoding (m, file_encoding);
- else if ( 0 != strcmp (file_encoding, dict_get_encoding (m)))
- {
- msg (MW,
- _("Combining files with incompatible encodings. String data may not be represented correctly."));
- }
- }
+ if ( 0 != strcmp (dict_get_encoding (f->dict), dict_get_encoding (m)))
+ msg (MW, _("Combining files with incompatible encodings. String data may "
+ "not be represented correctly."));
#include "language/lexer/variable-parser.h"
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
#include "language/lexer/variable-parser.h"
#include "libpspp/assertion.h"
#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
#include "libpspp/message.h"
#include "libpspp/misc.h"
#include "libpspp/pool.h"
struct pool *tmp_pool;
bool ok;
struct pool *tmp_pool;
bool ok;
- dict = in_input_program () ? dataset_dict (ds) : dict_create ();
+ dict = (in_input_program ()
+ ? dataset_dict (ds)
+ : dict_create (get_default_encoding ()));
parser = data_parser_create (dict);
reader = NULL;
parser = data_parser_create (dict);
reader = NULL;
}
type = data_parser_get_type (parser);
}
type = data_parser_get_type (parser);
- if (! ds_is_empty (&encoding))
- {
- if ( NULL == fh)
- msg (MW, _("Encoding should not be specified for inline data. It will be ignored."));
- else
- dict_set_encoding (dict, ds_cstr (&encoding));
- }
+ if (! ds_is_empty (&encoding) && NULL == fh)
+ msg (MW, _("Encoding should not be specified for inline data. It will be "
+ "ignored."));
if (fh == NULL)
fh = fh_inline_file ();
if (fh == NULL)
fh = fh_inline_file ();
parse_get_txt (struct lexer *lexer, struct dataset *ds)
{
struct data_parser *parser = NULL;
parse_get_txt (struct lexer *lexer, struct dataset *ds)
{
struct data_parser *parser = NULL;
- struct dictionary *dict = dict_create ();
+ struct dictionary *dict = dict_create (get_default_encoding ());
struct file_handle *fh = NULL;
struct dfm_reader *reader = NULL;
char *name = NULL;
struct file_handle *fh = NULL;
struct dfm_reader *reader = NULL;
char *name = NULL;
if ( agr.add_variables )
agr.dict = dict_clone (dict);
else
if ( agr.add_variables )
agr.dict = dict_clone (dict);
else
- agr.dict = dict_create ();
+ agr.dict = dict_create (dict_get_encoding (dict));
dict_set_label (agr.dict, dict_get_label (dict));
dict_set_documents (agr.dict, dict_get_documents (dict));
dict_set_label (agr.dict, dict_get_label (dict));
dict_set_documents (agr.dict, dict_get_documents (dict));
/* PSPPIRE - a graphical user interface for PSPP.
/* PSPPIRE - a graphical user interface for PSPP.
- Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
}
/* Construct clip dictionary. */
}
/* Construct clip dictionary. */
- clip_dict = dict_create ();
- dict_set_encoding (clip_dict, dict_get_encoding (ds->dict->dict));
+ clip_dict = dict_create (dict_get_encoding (ds->dict->dict));
for (i = col0; i <= coli; i++)
dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i));
for (i = col0; i <= coli; i++)
dict_clone_var_assert (clip_dict, dict_get_var (ds->dict->dict, i));
struct column *col;
size_t name_row;
struct column *col;
size_t name_row;
+ dict = dict_create (get_default_encoding ());
name_row = f->variable_names && f->skip_lines ? f->skip_lines : 0;
for (col = s->columns; col < &s->columns[s->column_cnt]; col++)
{
name_row = f->variable_names && f->skip_lines ? f->skip_lines : 0;
for (col = s->columns; col < &s->columns[s->column_cnt]; col++)
{
+ dict = dict_create (get_default_encoding ());
fg = fmt_guesser_create ();
for (column_idx = 0; column_idx < s->column_cnt; column_idx++)
{
fg = fmt_guesser_create ();
for (column_idx = 0; column_idx < s->column_cnt; column_idx++)
{