trunc \
unilbrk/ulc-width-linebreaks \
unistd \
+ unistr/u8-strlen \
unlocked-io \
vasprintf-posix \
vfprintf-posix \
These functions provide the ability to convert data fields into
@union{value}s and vice versa.
-@deftypefun bool data_in (struct substring @var{input}, enum legacy_encoding @var{legacy_encoding}, enum fmt_type @var{type}, int @var{implied_decimals}, int @var{first_column}, union value *@var{output}, int @var{width})
+@deftypefun bool data_in (struct substring @var{input}, const char *@var{encoding}, enum fmt_type @var{type}, int @var{implied_decimals}, int @var{first_column}, const struct dictionary *@var{dict}, union value *@var{output}, int @var{width})
Parses @var{input} as a field containing data in the given format
@var{type}. The resulting value is stored in @var{output}, which the
caller must have initialized with the given @var{width}. For
consistency, @var{width} must be 0 if
@var{type} is a numeric format type and greater than 0 if @var{type}
is a string format type.
-
-Ordinarily @var{legacy_encoding} should be @code{LEGACY_NATIVE},
-indicating that @var{input} is encoded in the character set
-conventionally used on the host machine. It may be set to
-@code{LEGACY_EBCDIC} to cause @var{input} to be re-encoded from EBCDIC
-during data parsing.
+@var{encoding} should be set to indicate the character
+encoding of @var{input}.
+@var{dict} must be a pointer to the dictionary with which @var{output}
+is associated.
If @var{input} is the empty string (with length 0), @var{output} is
set to the value set on SET BLANKS (@pxref{SET BLANKS,,,pspp, PSPP
This function is declared in @file{data/data-in.h}.
@end deftypefun
-@deftypefun void data_out (const union value *@var{input}, const struct fmt_spec *@var{format}, char *@var{output})
-@deftypefunx void data_out_legacy (const union value *@var{input}, enum legacy_encoding @var{legacy_encoding}, const struct fmt_spec *@var{format}, char *@var{output})
-Converts the data pointed to by @var{input} into a data field in
-@var{output} according to output format specifier @var{format}, which
-must be a valid output format. Exactly @code{@var{format}->w} bytes
-are written to @var{output}. The width of @var{input} is also
+@deftypefun char * data_out (const union value *@var{input}, const struct fmt_spec *@var{format})
+@deftypefunx char * data_out_legacy (const union value *@var{input}, const char *@var{encoding}, const struct fmt_spec *@var{format})
+Converts the data pointed to by @var{input} into a string value, which
+will be encoded in UTF-8, according to output format specifier @var{format}.
+Format
+must be a valid output format. The width of @var{input} is
inferred from @var{format} using an algorithm equivalent to
@func{fmt_var_width}.
-If @func{data_out} is called, or @func{data_out_legacy} is called with
-@var{legacy_encoding} set to @code{LEGACY_NATIVE}, @var{output} will
-be encoded in the character set conventionally used on the host
-machine. If @var{legacy_encoding} is set to @code{LEGACY_EBCDIC},
-@var{output} will be re-encoded from EBCDIC during data output.
-
When @var{input} contains data that cannot be represented in the given
@var{format}, @func{data_out} may output a message using @func{msg},
@c (@pxref{msg}),
dictionary's character set.
-
@section System files
@file{*.sav} files contain a field which is supposed to identify the encoding
of the data they contain (@pxref{Machine Integer Info Record}).
longer required.
@end deftypefun
+In order to minimise the number of conversions required, and to simplify
+design, PSPP attempts to store all internal strings in UTF8 encoding.
+Thus, when reading system and portable files (or any other data source),
+the following items are immediately converted to UTF8 encoding:
+@itemize
+@item Variable names
+@item Variable labels
+@item Value labels
+@end itemize
+Conversely, when writing system files, these are converted back to the
+encoding of that system file.
-For example, in order to display a string variable's value in a label widget in the psppire gui one would use code similar to
-@example
-
-struct variable *var = /* assigned from somewhere */
-struct case c = /* from somewhere else */
-
-const union value *val = case_data (&c, var);
-
-char *utf8string = recode_string (UTF8, dict_get_encoding (dict), val->s,
- var_get_width (var));
-
-GtkWidget *entry = gtk_entry_new();
-gtk_entry_set_text (entry, utf8string);
-gtk_widget_show (entry);
-
-free (utf8string);
-
-@end example
+String data stored in union values are left in their original encoding.
+These will be converted by the data_in/data_out functions.
#include <data/sys-file-writer.h>
#include <data/sys-file-reader.h>
#include <data/value.h>
+#include <data/vardict.h>
#include <data/value-labels.h>
#include <data/format.h>
#include <data/data-in.h>
+#include <data/data-out.h>
#include <string.h>
typedef struct fmt_spec input_format ;
const char *ver
CODE:
assert (0 == strcmp (ver, bare_version));
+ i18n_init ();
msg_init (NULL, message_handler);
settings_init (0, 0);
fh_init ();
CODE:
SV *ret;
const struct fmt_spec *fmt = var_get_print_format (var);
+ const struct dictionary *dict = var_get_vardict (var)->dict;
union value uv;
char *s;
make_value_from_scalar (&uv, val, var);
- s = malloc (fmt->w);
- memset (s, '\0', fmt->w);
- data_out (&uv, fmt, s);
+ s = data_out (&uv, dict_get_encoding (dict), fmt);
value_destroy (&uv, var_get_width (var));
ret = newSVpv (s, fmt->w);
free (s);
{
struct substring ss = ss_cstr (SvPV_nolen (sv));
if ( ! data_in (ss, LEGACY_NATIVE, ifmt->type, 0, 0, 0,
+ sfi->dict,
case_data_rw (c, v),
var_get_width (v)) )
{
Like the strings embedded in all "union value"s, the return
value is not null-terminated. */
-const char *
+const uint8_t *
case_str (const struct ccase *c, const struct variable *v)
{
size_t idx = var_get_case_index (v);
Like the strings embedded in all "union value"s, the return
value is not null-terminated. */
-const char *
+const uint8_t *
case_str_idx (const struct ccase *c, size_t idx)
{
assert (idx < c->proto->n_widths);
Like the strings embedded in all "union value"s, the return
value is not null-terminated. */
-char *
+uint8_t *
case_str_rw (struct ccase *c, const struct variable *v)
{
size_t idx = var_get_case_index (v);
Like the strings embedded in all "union value"s, the return
value is not null-terminated. */
-char *
+uint8_t *
case_str_rw_idx (struct ccase *c, size_t idx)
{
assert (idx < c->proto->n_widths);
double case_num (const struct ccase *, const struct variable *);
double case_num_idx (const struct ccase *, size_t idx);
-const char *case_str (const struct ccase *, const struct variable *);
-const char *case_str_idx (const struct ccase *, size_t idx);
-char *case_str_rw (struct ccase *, const struct variable *);
-char *case_str_rw_idx (struct ccase *, size_t idx);
+const uint8_t *case_str (const struct ccase *, const struct variable *);
+const uint8_t *case_str_idx (const struct ccase *, size_t idx);
+uint8_t *case_str_rw (struct ccase *, const struct variable *);
+uint8_t *case_str_rw_idx (struct ccase *, size_t idx);
int case_compare (const struct ccase *, const struct ccase *,
const struct variable *const *, size_t n_vars);
#include "settings.h"
#include "value.h"
#include "format.h"
+#include "dictionary.h"
#include <libpspp/assertion.h>
#include <libpspp/legacy-encoding.h>
+#include <libpspp/i18n.h>
#include <libpspp/compiler.h>
#include <libpspp/integer-format.h>
#include <libpspp/message.h>
/* Information about parsing one data field. */
struct data_in
{
- enum legacy_encoding encoding;/* Encoding of source. */
+ const char *src_enc; /* Encoding of source. */
struct substring input; /* Source. */
enum fmt_type format; /* Input format. */
int implied_decimals; /* Number of implied decimal places. */
representation in OUTPUT, which the caller must have
initialized with the given WIDTH (0 for a numeric field,
otherwise the string width).
+ Iff FORMAT is a string format, then DICT must be a pointer
+ to the dictionary associated with OUTPUT. Otherwise, DICT
+ may be null.
If no decimal point is included in a numeric format, then
IMPLIED_DECIMALS decimal places are implied. Specify 0 if no
FIRST_COLUMN plus the length of the input because of the
possibility of escaped quotes in strings, etc.) */
bool
-data_in (struct substring input, enum legacy_encoding encoding,
+data_in (struct substring input, const char *encoding,
enum fmt_type format, int implied_decimals,
- int first_column, int last_column, union value *output, int width)
+ int first_column, int last_column,
+ const struct dictionary *dict,
+ union value *output, int width)
{
static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
{
};
struct data_in i;
- void *copy = NULL;
+
bool ok;
assert ((width != 0) == fmt_is_string (format));
- if (encoding == LEGACY_NATIVE
- || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))
- {
- i.input = input;
- i.encoding = encoding;
- }
- else
- {
- ss_alloc_uninit (&i.input, ss_length (input));
- legacy_recode (encoding, ss_data (input), LEGACY_NATIVE,
- ss_data (i.input), ss_length (input));
- i.encoding = LEGACY_NATIVE;
- copy = ss_data (i.input);
- }
i.format = format;
i.implied_decimals = implied_decimals;
i.first_column = first_column;
i.last_column = last_column;
+ i.src_enc = encoding;
- if (!ss_is_empty (i.input))
+ if (ss_is_empty (input))
{
- ok = handlers[i.format] (&i);
- if (!ok)
- default_result (&i);
+ default_result (&i);
+ return true;
+ }
+
+ if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY))
+ {
+ i.input = input;
}
else
{
- default_result (&i);
- ok = true;
+ const char *dest_encoding;
+ char *s = NULL;
+ if ( dict == NULL)
+ {
+ assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)));
+ dest_encoding = LEGACY_NATIVE;
+ }
+ else
+ dest_encoding = dict_get_encoding (dict);
+
+ s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input));
+ ss_alloc_uninit (&i.input, strlen (s));
+ memcpy (ss_data (i.input), s, ss_length (input));
+ free (s);
}
- if (copy)
- free (copy);
+ ok = handlers[i.format] (&i);
+ if (!ok)
+ default_result (&i);
return ok;
}
{
/* This is equivalent to buf_copy_rpad, except that we posibly
do a character set recoding in the middle. */
- char *dst = value_str_rw (i->output, i->width);
+ uint8_t *dst = value_str_rw (i->output, i->width);
size_t dst_size = i->width;
const char *src = ss_data (i->input);
size_t src_size = ss_length (i->input);
- legacy_recode (i->encoding, src, LEGACY_NATIVE, dst, MIN (src_size, dst_size));
+ memcpy (dst, src, MIN (src_size, dst_size));
+
if (dst_size > src_size)
memset (&dst[src_size], ' ', dst_size - src_size);
static bool
parse_AHEX (struct data_in *i)
{
- char *s = value_str_rw (i->output, i->width);
+ uint8_t *s = value_str_rw (i->output, i->width);
size_t j;
for (j = 0; ; j++)
return false;
}
- if (i->encoding != LEGACY_NATIVE)
+ if (0 != strcmp (i->src_enc, LEGACY_NATIVE))
{
- hi = legacy_to_native (i->encoding, hi);
- lo = legacy_to_native (i->encoding, lo);
+ hi = legacy_to_native (i->src_enc, hi);
+ lo = legacy_to_native (i->src_enc, lo);
}
if (!c_isxdigit (hi) || !c_isxdigit (lo))
{
enum fmt_type;
union value;
-bool data_in (struct substring input, enum legacy_encoding,
+struct dictionary;
+bool data_in (struct substring input, const char *encoding,
enum fmt_type, int implied_decimals,
int first_column, int last_column,
+ const struct dictionary *dict,
union value *output, int width);
#endif /* data/data-in.h */
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/pool.h>
+#include <libpspp/i18n.h>
#include "minmax.h"
char *);
static void output_hex (const void *, size_t bytes, char *);
\f
-/* Same as data_out, and additionally recodes the output from
- native form into the given legacy character ENCODING. */
-void
-data_out_legacy (const union value *input, enum legacy_encoding encoding,
- const struct fmt_spec *format, char *output)
-{
- static data_out_converter_func *const converters[FMT_NUMBER_OF_FORMATS] =
+
+static data_out_converter_func *const converters[FMT_NUMBER_OF_FORMATS] =
{
#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) output_##METHOD,
#include "format.def"
};
+/* Similar to data_out. Additionally recodes the output from
+ native form into the given legacy character ENCODING.
+ OUTPUT must be provided by the caller and must be at least
+ FORMAT->w long. No null terminator is appended to OUTPUT.
+*/
+void
+data_out_legacy (const union value *input, const char *encoding,
+ const struct fmt_spec *format, char *output)
+{
assert (fmt_check_output (format));
converters[format->type] (input, format, output);
- if (encoding != LEGACY_NATIVE
+ if (0 != strcmp (encoding, LEGACY_NATIVE)
&& fmt_get_category (format->type) != FMT_CAT_BINARY)
- legacy_recode (LEGACY_NATIVE, output, encoding, output, format->w);
+ {
+ char *s = recode_string (encoding, LEGACY_NATIVE, output, format->w );
+ memcpy (output, s, format->w);
+ free (s);
+ }
}
-/* Converts the INPUT value into printable form in the exactly
- FORMAT->W characters in OUTPUT according to format
- specification FORMAT. No null terminator is appended to the
- buffer.
+/* Converts the INPUT value into a UTF8 encoded string, according
+ to format specification FORMAT.
VALUE must be the correct width for FORMAT, that is, its
- width must be fmt_var_width(FORMAT). */
-void
-data_out (const union value *input, const struct fmt_spec *format,
- char *output)
+ width must be fmt_var_width(FORMAT).
+
+ ENCODING must be the encoding of INPUT. Normally this can
+ be obtained by calling dict_get_encoding on the dictionary
+ with which INPUT is associated.
+
+ The return value is dynamically allocated, and must be freed
+ by the caller. If POOL is non-null, then the return value is
+ allocated on that pool.
+*/
+char *
+data_out_pool (const union value *input, const char *encoding,
+ const struct fmt_spec *format, struct pool *pool)
+{
+ char *output = xmalloc (format->w + 1);
+ char *t ;
+ assert (fmt_check_output (format));
+
+ converters[format->type] (input, format, output);
+ output[format->w] = '\0';
+
+ t = recode_string_pool (UTF8, encoding, output, format->w, pool);
+ free (output);
+ return t;
+}
+
+char *
+data_out (const union value *input, const char *encoding, const struct fmt_spec *format)
{
- return data_out_legacy (input, LEGACY_NATIVE, format, output);
+ return data_out_pool (input, encoding, format, NULL);
}
\f
struct fmt_spec;
union value;
-void data_out (const union value *, const struct fmt_spec *, char *);
+char * data_out (const union value *, const char *encoding, const struct fmt_spec *);
-void data_out_legacy (const union value *, enum legacy_encoding,
- const struct fmt_spec *, char *);
+char * data_out_pool (const union value *, const char *encoding, const struct fmt_spec *, struct pool *pool);
+
+void data_out_legacy (const union value *input, const char *encoding,
+ const struct fmt_spec *format, char *output);
#endif /* data-out.h */
/* FH_REF_FILE only. */
char *file_name; /* File name as provided by user. */
enum fh_mode mode; /* File mode. */
- enum legacy_encoding encoding;/* File encoding. */
+ const char *encoding; /* File encoding. */
/* FH_REF_FILE and FH_REF_INLINE only. */
size_t record_width; /* Length of fixed-format records. */
}
/* Returns the encoding of characters read from HANDLE. */
-enum legacy_encoding
+const char *
fh_get_legacy_encoding (const struct file_handle *handle)
{
assert (handle->referent & (FH_REF_FILE | FH_REF_INLINE));
enum fh_mode mode; /* File mode. */
size_t record_width; /* Length of fixed-format records. */
size_t tab_width; /* Tab width, 0=do not expand tabs. */
- enum legacy_encoding encoding;/* ASCII or EBCDIC? */
+ const char *encoding; /* ASCII or EBCDIC? */
};
void fh_init (void);
/* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */
size_t fh_get_record_width (const struct file_handle *);
size_t fh_get_tab_width (const struct file_handle *);
-enum legacy_encoding fh_get_legacy_encoding (const struct file_handle *);
+const char *fh_get_legacy_encoding (const struct file_handle *);
/* Properties of FH_REF_SCRATCH file handles. */
struct scratch_handle *fh_get_scratch_handle (const struct file_handle *);
Returns true if successful, false if MV has no more room for
missing values or if S is not an acceptable missing value. */
bool
-mv_add_str (struct missing_values *mv, const char s[])
+mv_add_str (struct missing_values *mv, const uint8_t s[])
{
union value v;
bool ok;
MV must be a set of string missing values.
S[] must contain exactly as many characters as MV's width. */
static bool
-is_str_user_missing (const struct missing_values *mv, const char s[])
+is_str_user_missing (const struct missing_values *mv, const uint8_t s[])
{
const union value *v = mv->values;
assert (mv->width > 0);
MV must be a set of string missing values.
S[] must contain exactly as many characters as MV's width. */
bool
-mv_is_str_missing (const struct missing_values *mv, const char s[],
+mv_is_str_missing (const struct missing_values *mv, const uint8_t s[],
enum mv_class class)
{
assert (mv->width > 0);
bool mv_is_value_missing (const struct missing_values *, const union value *,
enum mv_class);
bool mv_is_num_missing (const struct missing_values *, double, enum mv_class);
-bool mv_is_str_missing (const struct missing_values *, const char[],
+bool mv_is_str_missing (const struct missing_values *, const uint8_t[],
enum mv_class);
/* Initializing missing value sets. */
/* Adding and modifying discrete values. */
bool mv_add_value (struct missing_values *, const union value *);
-bool mv_add_str (struct missing_values *, const char[]);
+bool mv_add_str (struct missing_values *, const uint8_t[]);
bool mv_add_num (struct missing_values *, double);
void mv_pop_value (struct missing_values *, union value *);
bool mv_replace_value (struct missing_values *, const union value *, int idx);
*buf = '\0';
}
+
+/* Reads a string into BUF, which must have room for 256
+ characters.
+ Returns the number of bytes read.
+*/
+static size_t
+read_bytes (struct pfm_reader *r, uint8_t *buf)
+{
+ int n = read_int (r);
+ if (n < 0 || n > 255)
+ error (r, _("Bad string length %d."), n);
+
+ while (n-- > 0)
+ {
+ *buf++ = r->cc;
+ advance (r);
+ }
+ return n;
+}
+
+
+
/* Reads a string and returns a copy of it allocated from R's
pool. */
static char *
value_init (v, width);
if (width > 0)
{
- char string[256];
- read_string (r, string);
- value_copy_str_rpad (v, width, string, ' ');
+ uint8_t buf[256];
+ size_t n_bytes = read_bytes (r, buf);
+ value_copy_buf_rpad (v, width, buf, n_bytes, ' ');
}
else
v->f = read_float (r);
case_data_rw_idx (c, i)->f = read_float (r);
else
{
- char string[256];
- read_string (r, string);
- buf_copy_str_rpad (case_str_rw_idx (c, i), width, string, ' ');
+ uint8_t buf[256];
+ size_t n_bytes = read_bytes (r, buf);
+ u8_buf_copy_rpad (case_str_rw_idx (c, i), width, buf, n_bytes, ' ');
}
}
#include <setjmp.h>
#include <stdlib.h>
+#include <libpspp/i18n.h>
#include <libpspp/assertion.h>
#include <libpspp/message.h>
#include <libpspp/compiler.h>
size_t size, size_t count,
struct dictionary *);
+/* Convert all the strings in DICT from the dict encoding to UTF8 */
+static void
+recode_strings (struct dictionary *dict)
+{
+ int i;
+
+ const char *enc = dict_get_encoding (dict);
+
+ if ( NULL == enc)
+ enc = get_default_encoding ();
+
+ for (i = 0 ; i < dict_get_var_cnt (dict); ++i)
+ {
+ /* Convert the long variable name */
+ struct variable *var = dict_get_var (dict, i);
+ const char *native_name = var_get_name (var);
+ char *utf8_name = recode_string (UTF8, enc, native_name, -1);
+ if ( 0 != strcmp (utf8_name, native_name))
+ {
+ if ( NULL == dict_lookup_var (dict, utf8_name))
+ dict_rename_var (dict, var, utf8_name);
+ else
+ msg (MW,
+ _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name);
+ }
+
+ free (utf8_name);
+
+ /* Convert the variable label */
+ if (var_has_label (var))
+ {
+ char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1);
+ var_set_label (var, utf8_label);
+ free (utf8_label);
+ }
+
+ if (var_has_value_labels (var))
+ {
+ const struct val_lab *vl = NULL;
+ const struct val_labs *vlabs = var_get_value_labels (var);
+
+ for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl))
+ {
+ const union value *val = val_lab_get_value (vl);
+ const char *label = val_lab_get_label (vl);
+ char *new_label = NULL;
+
+ new_label = recode_string (UTF8, enc, label, -1);
+
+ var_replace_value_label (var, val, new_label);
+ free (new_label);
+ }
+ }
+ }
+}
+
/* Opens the system file designated by file handle FH for
reading. Reads the system file's dictionary into *DICT.
If INFO is non-null, then it receives additional info about the
r->has_long_var_names = true;
}
+ recode_strings (*dict);
+
/* Read record 999 data, which is just filler. */
read_int (r);
value_set_missing (&value, mv_width);
for (i = 0; i < missing_value_code; i++)
{
- char *s = value_str_rw (&value, mv_width);
+ uint8_t *s = value_str_rw (&value, mv_width);
read_bytes (r, s, 8);
mv_add_str (&mv, s);
}
struct label
{
- char raw_value[8]; /* Value as uninterpreted bytes. */
+ uint8_t raw_value[8]; /* Value as uninterpreted bytes. */
union value value; /* Value. */
char *label; /* Null-terminated label string. */
};
value_init_pool (subpool, &label->value, max_width);
if (var_is_alpha (var[0]))
- buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
+ u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width,
label->raw_value, sizeof label->raw_value, ' ');
else
label->value.f = float_get_double (r->float_format, label->raw_value);
/* Read value. */
value_length = read_int (r);
if (value_length == width)
- read_string (r, value_str_rw (&value, width), width + 1);
+ read_bytes (r, value_str_rw (&value, width), width);
else
{
sys_warn (r, _("Ignoring long string value %zu for variable %s, "
static void read_error (struct casereader *, const struct sfm_reader *);
static bool read_case_number (struct sfm_reader *, double *);
-static bool read_case_string (struct sfm_reader *, char *, size_t);
+static bool read_case_string (struct sfm_reader *, uint8_t *, size_t);
static int read_opcode (struct sfm_reader *);
static bool read_compressed_number (struct sfm_reader *, double *);
-static bool read_compressed_string (struct sfm_reader *, char *);
-static bool read_whole_strings (struct sfm_reader *, char *, size_t);
+static bool read_compressed_string (struct sfm_reader *, uint8_t *);
+static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t);
static bool skip_whole_strings (struct sfm_reader *, size_t);
/* Reads and returns one case from READER's file. Returns a null
}
else
{
- char *s = value_str_rw (v, sv->var_width);
+ uint8_t *s = value_str_rw (v, sv->var_width);
if (!read_case_string (r, s + sv->offset, sv->segment_width))
goto eof;
if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8)))
Returns true if successful, false if end of file is
reached immediately. */
static bool
-read_case_string (struct sfm_reader *r, char *s, size_t length)
+read_case_string (struct sfm_reader *r, uint8_t *s, size_t length)
{
size_t whole = ROUND_DOWN (length, 8);
size_t partial = length % 8;
if (partial)
{
- char bounce[8];
+ uint8_t bounce[8];
if (!read_whole_strings (r, bounce, sizeof bounce))
{
if (whole)
Returns true if successful, false if end of file is
reached immediately. */
static bool
-read_compressed_string (struct sfm_reader *r, char *dst)
+read_compressed_string (struct sfm_reader *r, uint8_t *dst)
{
switch (read_opcode (r))
{
Returns true if successful, false if end of file is
reached immediately. */
static bool
-read_whole_strings (struct sfm_reader *r, char *s, size_t length)
+read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length)
{
assert (length % 8 == 0);
if (!r->compressed)
static bool
skip_whole_strings (struct sfm_reader *r, size_t length)
{
- char buffer[1024];
+ uint8_t buffer[1024];
assert (length < sizeof buffer);
return read_whole_strings (r, buffer, length);
}
#include <libpspp/message.h>
#include <libpspp/misc.h>
#include <libpspp/str.h>
+#include <libpspp/i18n.h>
#include <libpspp/version.h>
#include <data/attributes.h>
static const struct casewriter_class sys_file_casewriter_class;
static void write_header (struct sfm_writer *, const struct dictionary *);
-static void write_variable (struct sfm_writer *, const struct variable *);
+static void write_variable (struct sfm_writer *, const struct variable *, const struct dictionary *);
static void write_value_labels (struct sfm_writer *,
- struct variable *, int idx);
+ struct variable *, int idx, const struct dictionary *);
static void write_integer_info_record (struct sfm_writer *);
static void write_float_info_record (struct sfm_writer *);
/* Write basic variable info. */
short_names_assign (d);
for (i = 0; i < dict_get_var_cnt (d); i++)
- write_variable (w, dict_get_var (d, i));
+ write_variable (w, dict_get_var (d, i), d);
/* Write out value labels. */
idx = 0;
{
struct variable *v = dict_get_var (d, i);
- write_value_labels (w, v, idx);
+ write_value_labels (w, v, idx, d);
idx += sfm_width_to_octs (var_get_width (v));
}
/* Write the variable record(s) for variable V to system file
W. */
static void
-write_variable (struct sfm_writer *w, const struct variable *v)
+write_variable (struct sfm_writer *w, const struct variable *v, const struct dictionary *dict)
{
int width = var_get_width (v);
int segment_cnt = sfm_width_to_segments (width);
if (var_has_label (v))
{
const char *label = var_get_label (v);
- size_t padded_len = ROUND_UP (MIN (strlen (label), 255), 4);
+ char *l = recode_string (dict_get_encoding (dict), UTF8, label, -1);
+ size_t padded_len = ROUND_UP (MIN (strlen (l), 255), 4);
write_int (w, padded_len);
- write_string (w, label, padded_len);
+ write_string (w, l, padded_len);
+ free (l);
}
/* Write the missing values, if any, range first. */
Value labels for long string variables are written separately,
by write_long_string_value_labels. */
static void
-write_value_labels (struct sfm_writer *w, struct variable *v, int idx)
+write_value_labels (struct sfm_writer *w, struct variable *v, int idx, const struct dictionary *dict)
{
const struct val_labs *val_labs;
const struct val_lab **labels;
for (i = 0; i < n_labels; i++)
{
const struct val_lab *vl = labels[i];
- const char *label = val_lab_get_label (vl);
+ char *label = recode_string (dict_get_encoding (dict), UTF8, val_lab_get_label (vl), -1);
uint8_t len = MIN (strlen (label), 255);
write_value (w, val_lab_get_value (vl), var_get_width (v));
write_bytes (w, &len, 1);
write_bytes (w, label, len);
write_zeros (w, REM_RND_UP (len + 1, 8));
+ free (label);
}
free (labels);
for (i = 0; i < dict_get_var_cnt (dict); i++)
{
struct variable *v = dict_get_var (dict, i);
+ char *longname = recode_string (dict_get_encoding (dict), UTF8, var_get_name (v), -1);
if (i)
ds_put_char (&map, '\t');
ds_put_format (&map, "%s=%s",
- var_get_short_name (v, 0), var_get_name (v));
+ var_get_short_name (v, 0), longname);
+ free (longname);
}
write_int (w, 7); /* Record type. */
#include <libpspp/hash.h>
#include <libpspp/pool.h>
#include <libpspp/str.h>
+#include <gl/unistr.h>
#include "minmax.h"
#include "xalloc.h"
const union value *src, int src_width,
char pad)
{
- buf_copy_rpad (value_str_rw (dst, dst_width), dst_width,
+ u8_buf_copy_rpad (value_str_rw (dst, dst_width), dst_width,
value_str (src, src_width), src_width,
pad);
}
DST was initialized. Passing, e.g., a smaller value in order
to modify only a prefix of DST will not work in every case. */
void
-value_copy_str_rpad (union value *dst, int dst_width, const char *src,
+value_copy_str_rpad (union value *dst, int dst_width, const uint8_t *src,
char pad)
{
- value_copy_buf_rpad (dst, dst_width, src, strlen (src), pad);
+ value_copy_buf_rpad (dst, dst_width, src, u8_strlen (src), pad);
}
/* Copies the SRC_LEN bytes at SRC to string value DST with width
to modify only a prefix of DST will not work in every case. */
void
value_copy_buf_rpad (union value *dst, int dst_width,
- const char *src, size_t src_len, char pad)
+ const uint8_t *src, size_t src_len, char pad)
{
- buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad);
+ u8_buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad);
}
/* Sets V to the system-missing value for data of the given
return false;
else
{
- const char *str = value_str (value, old_width);
+ const uint8_t *str = value_str (value, old_width);
int i;
for (i = new_width; i < old_width; i++)
{
if (new_width > MAX_SHORT_STRING)
{
- char *new_long_string = pool_alloc_unaligned (pool, new_width);
+ uint8_t *new_long_string = pool_alloc_unaligned (pool, new_width);
memcpy (new_long_string, value_str (value, old_width), old_width);
value->long_string = new_long_string;
}
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include "xalloc.h"
\f
union value
{
double f;
- char short_string[MAX_SHORT_STRING];
- char *long_string;
+ uint8_t short_string[MAX_SHORT_STRING];
+ uint8_t *long_string;
};
static inline void value_init (union value *, int width);
static inline void value_destroy (union value *, int width);
static inline double value_num (const union value *);
-static inline const char *value_str (const union value *, int width);
-static inline char *value_str_rw (union value *, int width);
+static inline const uint8_t *value_str (const union value *, int width);
+static inline uint8_t *value_str_rw (union value *, int width);
static inline void value_copy (union value *, const union value *, int width);
void value_copy_rpad (union value *, int dst_width,
const union value *, int src_width,
char pad);
-void value_copy_str_rpad (union value *, int dst_width, const char *,
+void value_copy_str_rpad (union value *, int dst_width, const uint8_t *,
char pad);
void value_copy_buf_rpad (union value *dst, int dst_width,
- const char *src, size_t src_len, char pad);
+ const uint8_t *src, size_t src_len, char pad);
void value_set_missing (union value *, int width);
int value_compare_3way (const union value *, const union value *, int width);
bool value_equal (const union value *, const union value *, int width);
It is important that WIDTH be the actual value that was passed
to value_init. Passing, e.g., a smaller value because only
that number of bytes will be accessed will not always work. */
-static inline const char *
+static inline const uint8_t *
value_str (const union value *v, int width)
{
assert (width > 0);
It is important that WIDTH be the actual value that was passed
to value_init. Passing, e.g., a smaller value because only
that number of bytes will be accessed will not always work. */
-static inline char *
+static inline uint8_t *
value_str_rw (union value *v, int width)
{
assert (width > 0);
S[] must contain exactly as many characters as V's width.
V must be a string variable. */
bool
-var_is_str_missing (const struct variable *v, const char s[],
+var_is_str_missing (const struct variable *v, const uint8_t s[],
enum mv_class class)
{
return mv_is_str_missing (&v->miss, s, class);
struct string *str)
{
const char *name = var_lookup_value_label (v, value);
+ const struct dictionary *dict = var_get_vardict (v)->dict;
if (name == NULL)
{
- char *s = ds_put_uninit (str, v->print.w);
- data_out (value, &v->print, s);
+ char *s = data_out (value, dict_get_encoding (dict), &v->print);
+ ds_put_cstr (str, s);
+ free (s);
}
else
ds_put_cstr (str, name);
bool var_is_value_missing (const struct variable *, const union value *,
enum mv_class);
bool var_is_num_missing (const struct variable *, double, enum mv_class);
-bool var_is_str_missing (const struct variable *, const char[], enum mv_class);
+bool var_is_str_missing (const struct variable *, const uint8_t[], enum mv_class);
/* Value labels. */
const char *var_lookup_value_label (const struct variable *,
bool ok;
dict = in_input_program () ? dataset_dict (ds) : dict_create ();
- parser = data_parser_create ();
+ parser = data_parser_create (dict);
reader = NULL;
table = -1; /* Print table if nonzero, -1=undecided. */
/* Data parser for textual data like that read by DATA LIST. */
struct data_parser
{
+ const struct dictionary *dict; /*Dictionary of destination */
enum data_parser_type type; /* Type of data to parse. */
int skip_records; /* Records to skip before first real data. */
casenumber max_cases; /* Max number of cases to read. */
/* Creates and returns a new data parser. */
struct data_parser *
-data_parser_create (void)
+data_parser_create (const struct dictionary *dict)
{
struct data_parser *parser = xmalloc (sizeof *parser);
parser->fields = NULL;
parser->field_cnt = 0;
parser->field_allocated = 0;
+ parser->dict = dict;
parser->span = true;
parser->empty_line_has_field = false;
parse_fixed (const struct data_parser *parser, struct dfm_reader *reader,
struct ccase *c)
{
- enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *encoding = dfm_reader_get_legacy_encoding (reader);
struct field *f;
int row;
f->format.w),
encoding, f->format.type, f->format.d,
f->first_column, f->first_column + f->format.w,
+ parser->dict,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format));
parse_delimited_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *encoding = dfm_reader_get_legacy_encoding (reader);
struct string tmp = DS_EMPTY_INITIALIZER;
struct field *f;
data_in (s, encoding, f->format.type, 0,
first_column, last_column,
+ parser->dict,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format));
}
parse_delimited_no_span (const struct data_parser *parser,
struct dfm_reader *reader, struct ccase *c)
{
- enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader);
+ const char *encoding = dfm_reader_get_legacy_encoding (reader);
struct string tmp = DS_EMPTY_INITIALIZER;
struct substring s;
struct field *f;
data_in (s, encoding, f->format.type, 0,
first_column, last_column,
+ parser->dict,
case_data_rw_idx (c, f->case_idx),
fmt_var_width (&f->format));
}
};
/* Creating and configuring any parser. */
-struct data_parser *data_parser_create (void);
+struct data_parser *data_parser_create (const struct dictionary *dict);
void data_parser_destroy (struct data_parser *);
enum data_parser_type data_parser_get_type (const struct data_parser *);
}
/* Returns the legacy character encoding of data read from READER. */
-enum legacy_encoding
+const char *
dfm_reader_get_legacy_encoding (const struct dfm_reader *reader)
{
return fh_get_legacy_encoding (reader->fh);
unsigned dfm_eof (struct dfm_reader *);
struct substring dfm_get_record (struct dfm_reader *);
void dfm_expand_tabs (struct dfm_reader *);
-enum legacy_encoding dfm_reader_get_legacy_encoding (
- const struct dfm_reader *);
+const char *dfm_reader_get_legacy_encoding (const struct dfm_reader *);
int dfm_get_percent_read (const struct dfm_reader *);
/* Line control. */
}
/* Returns the legacy character encoding of data written to WRITER. */
-enum legacy_encoding
+const char *
dfm_writer_get_legacy_encoding (const struct dfm_writer *writer)
{
return fh_get_legacy_encoding (writer->fh);
bool dfm_close_writer (struct dfm_writer *);
bool dfm_write_error (const struct dfm_writer *);
bool dfm_put_record (struct dfm_writer *, const char *rec, size_t len);
-enum legacy_encoding dfm_writer_get_legacy_encoding (
- const struct dfm_writer *);
+const char *dfm_writer_get_legacy_encoding (const struct dfm_writer *);
#endif /* data-writer.h */
properties.mode = FH_MODE_VARIABLE;
break;
case FH_360:
- properties.encoding = LEGACY_EBCDIC;
+ properties.encoding = "EBCDIC-US";
if (cmd.recform == FH_FIXED || cmd.recform == FH_F)
properties.mode = FH_MODE_FIXED;
else if (cmd.recform == FH_VARIABLE || cmd.recform == FH_V)
parse_get_txt (struct lexer *lexer, struct dataset *ds)
{
struct data_parser *parser = NULL;
- struct dictionary *dict = NULL;
+ struct dictionary *dict = dict_create ();
struct file_handle *fh = NULL;
struct dfm_reader *reader = NULL;
if (fh == NULL)
goto error;
- parser = data_parser_create ();
+ parser = data_parser_create (dict);
has_type = false;
data_parser_set_type (parser, DP_DELIMITED);
data_parser_set_span (parser, false);
}
lex_match (lexer, '=');
- dict = dict_create ();
+
record = 1;
type = data_parser_get_type (parser);
do
if (fmt_is_string (print->type)
|| dict_contains_var (dict, v))
{
- data_out (case_data (c, v), print,
- ds_put_uninit (&line_buffer, print->w));
+ char *s = data_out (case_data (c, v), dict_get_encoding (dict), print);
+ ds_put_cstr (&line_buffer, s);
+ free (s);
}
else
{
+ char *s;
union value case_idx_value;
case_idx_value.f = case_idx;
- data_out (&case_idx_value, print,
- ds_put_uninit (&line_buffer,print->w));
+ s = data_out (&case_idx_value, dict_get_encoding (dict), print);
+ ds_put_cstr (&line_buffer, s);
+ free (s);
}
- ds_put_char(&line_buffer, ' ');
+ ds_put_char (&line_buffer, ' ');
}
if (!n_lines_remaining (d))
{
const struct variable *v = cmd.v_variables[column];
const struct fmt_spec *print = var_get_print_format (v);
- char buf[256];
+ char *s = NULL;
if (fmt_is_string (print->type)
|| dict_contains_var (dict, v))
- data_out (case_data (c, v), print, buf);
+ s = data_out (case_data (c, v), dict_get_encoding (dict), print);
else
{
union value case_idx_value;
case_idx_value.f = case_idx;
- data_out (&case_idx_value, print, buf);
+ s = data_out (&case_idx_value, dict_get_encoding (dict), print);
}
fputs (" <TD>", x->file);
- html_put_cell_contents (d, TAB_FIX, ss_buffer (buf, print->w));
+ html_put_cell_contents (d, TAB_FIX, ss_buffer (s, print->w));
+ free (s);
fputs ("</TD>\n", x->file);
}
#include <language/lexer/lexer.h>
#include <language/lexer/variable-parser.h>
#include <libpspp/assertion.h>
+#include <libpspp/i18n.h>
#include <libpspp/compiler.h>
#include <libpspp/ll.h>
#include <libpspp/message.h>
struct pool *pool; /* Stores related data. */
bool eject; /* Eject page before printing? */
bool include_prefix; /* Prefix lines with space? */
- enum legacy_encoding encoding; /* Encoding to use for output. */
+ const char *encoding; /* Encoding to use for output. */
struct dfm_writer *writer; /* Output file, NULL=listing file. */
struct ll_list specs; /* List of struct prt_out_specs. */
size_t record_cnt; /* Number of records to write. */
else
{
ds_put_substring (&trns->line, ds_ss (&spec->string));
- if (trns->encoding != LEGACY_NATIVE)
+ if (0 != strcmp (trns->encoding, LEGACY_NATIVE))
{
size_t length = ds_length (&spec->string);
char *data = ss_data (ds_tail (&trns->line, length));
- legacy_recode (LEGACY_NATIVE, data,
- trns->encoding, data, length);
+ char *s = recode_string (trns->encoding, LEGACY_NATIVE, data, length);
+ memcpy (data, s, length);
+ free (s);
}
}
}
mv_init (&mv, MV_MAX_STRING);
while (!lex_match (lexer, ')'))
{
- char value[MV_MAX_STRING];
+ uint8_t value[MV_MAX_STRING];
size_t length;
if (!lex_force_string (lexer))
for (i = 0; i < split_cnt; i++)
{
const struct variable *v = split[i];
- char temp_buf[80];
+ char *s;
const char *val_lab;
const struct fmt_spec *print = var_get_print_format (v);
tab_text_format (t, 0, i + 1, TAB_LEFT, "%s", var_get_name (v));
- data_out (case_data (c, v), print, temp_buf);
- temp_buf[print->w] = 0;
-
- tab_text_format (t, 1, i + 1, 0, "%.*s", print->w, temp_buf);
+ s = data_out (case_data (c, v), dict_get_encoding (dict), print);
+ tab_text_format (t, 1, i + 1, 0, "%.*s", print->w, s);
+ free (s);
+
val_lab = var_lookup_value_label (v, case_data (c, v));
if (val_lab)
tab_text (t, 2, i + 1, TAB_LEFT, val_lab);
function NUMBER (string s, ni_format f)
{
union value out;
- data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, &out, 0);
+ data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, NULL, &out, 0);
return out.f;
}
{
union value v;
struct substring dst;
+ char *s;
v.f = x;
- dst = alloc_string (e, f->w);
+
assert (!fmt_is_string (f->type));
- data_out (&v, f, dst.string);
+ s = data_out (&v, LEGACY_NATIVE, f);
+ dst = alloc_string (e, strlen (s));
+ strcpy (dst.string, s);
+ free (s);
return dst;
}
else if (lex_token (lexer) == T_STRING && format != NULL)
{
union value v;
+ assert (! (fmt_get_category (*format) & ( FMT_CAT_STRING )));
data_in (ds_ss (lex_tokstr (lexer)), LEGACY_NATIVE,
- *format, 0, 0, 0, &v, 0);
+ *format, 0, 0, 0, NULL, &v, 0);
lex_get (lexer);
*x = v.f;
if (*x == SYSMIS)
struct crosstabs_proc
{
+ const struct dictionary *dict;
enum { INTEGER, GENERAL } mode;
enum mv_class exclude;
bool pivot;
init_proc (struct crosstabs_proc *proc, struct dataset *ds)
{
const struct variable *wv = dict_get_weight (dataset_dict (ds));
+ proc->dict = dataset_dict (ds);
proc->bad_warn = true;
proc->variables = NULL;
proc->n_variables = 0;
{
const struct variable *var = pt->const_vars[i];
size_t ofs;
+ char *s = NULL;
ds_put_format (&title, ", %s=", var_get_name (var));
/* Insert the formatted value of the variable, then trim
leading spaces in what was just inserted. */
ofs = ds_length (&title);
- data_out (&pt->const_values[i], var_get_print_format (var),
- ds_put_uninit (&title, var_get_width (var)));
+ s = data_out (&pt->const_values[i], dict_get_encoding (proc->dict), var_get_print_format (var));
+ ds_put_cstr (&title, s);
+ free (s);
ds_remove (&title, ofs, ss_cspan (ds_substr (&title, ofs, SIZE_MAX),
ss_cstr (" ")));
}
return;
}
- s.string = tab_alloc (table, print->w);
- data_out (v, print, s.string);
- s.length = print->w;
+ s = ss_cstr (data_out_pool (v, dict_get_encoding (proc->dict), print,
+ table->container));
if (proc->exclude == MV_NEVER && var_is_num_missing (var, v->f, MV_USER))
s.string[s.length++] = 'M';
while (s.length && *s.string == ' ')
additionally suffixed with a letter `M'. */
static void
format_cell_entry (struct tab_table *table, int c, int r, double value,
- char suffix, bool mark_missing)
+ char suffix, bool mark_missing, const struct dictionary *dict)
{
const struct fmt_spec f = {FMT_F, 10, 1};
union value v;
struct substring s;
- s.length = 10;
- s.string = tab_alloc (table, 16);
v.f = value;
- data_out (&v, &f, s.string);
+ s = ss_cstr (data_out_pool (&v, dict_get_encoding (dict), &f, table->container));
+
while (*s.string == ' ')
{
s.length--;
default:
NOT_REACHED ();
}
- format_cell_entry (table, c, i, v, suffix, mark_missing);
+ format_cell_entry (table, c, i, v, suffix, mark_missing, proc->dict);
}
mp++;
NOT_REACHED ();
}
- format_cell_entry (table, pt->n_cols, 0, v, suffix, mark_missing);
+ format_cell_entry (table, pt->n_cols, 0, v, suffix, mark_missing, proc->dict);
tab_next_row (table);
}
}
NOT_REACHED ();
}
- format_cell_entry (table, c, i, v, suffix, mark_missing);
+ format_cell_entry (table, c, i, v, suffix, mark_missing, proc->dict);
}
last_row = i;
}
struct hsh_table *data; /* Undifferentiated data. */
struct freq_mutable *valid; /* Valid freqs. */
int n_valid; /* Number of total freqs. */
+ const struct dictionary *dict; /* The dict from whence entries in the table
+ come */
struct freq_mutable *missing; /* Missing freqs. */
int n_missing; /* Number of missing freqs. */
}
vf = var_attach_aux (v, xmalloc (sizeof *vf), var_dtor_free);
vf->tab.valid = vf->tab.missing = NULL;
+ vf->tab.dict = dataset_dict (ds);
vf->n_groups = 0;
vf->groups = NULL;
vf->width = var_get_width (v);
tab_text (t, 0, r, TAB_LEFT, label);
}
- tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL);
tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL);
tab_text (t, 0, r, TAB_LEFT, label);
}
- tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2 + lab, r, TAB_NONE,
f->count / ft->total_cases * 100.0, NULL);
percent = f->count / ft->total_cases * 100.0;
cum_total += f->count / ft->valid_cases * 100.0;
- tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2, r, TAB_NONE, percent, NULL);
tab_double (t, 3, r, TAB_NONE, cum_total, NULL);
}
for (; f < &ft->valid[n_categories]; f++)
{
- tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print);
+ tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print);
tab_double (t, 1, r, TAB_NONE, f->count, wfmt);
tab_double (t, 2, r, TAB_NONE,
f->count / ft->total_cases * 100.0, NULL);
{
struct pool *pool;
+
+
/* Variable types, for convenience. */
enum val_type src_type; /* src_vars[*] type. */
enum val_type dst_type; /* dst_vars[*] type. */
/* Variables. */
const struct variable **src_vars; /* Source variables. */
const struct variable **dst_vars; /* Destination variables. */
+ const struct dictionary *dst_dict; /* Dictionary of dst_vars */
char **dst_names; /* Name of dest variables, if they're new. */
size_t var_cnt; /* Number of variables. */
{
size_t i;
+ trns->dst_dict = dict;
+
for (i = 0; i < trns->var_cnt; i++)
{
const struct variable **var = &trns->dst_vars[i];
/* Returns the output mapping in TRNS for an input of VALUE with
the given WIDTH, or a null pointer if there is no mapping. */
static const struct map_out *
-find_src_string (struct recode_trns *trns, const char *value,
+find_src_string (struct recode_trns *trns, const uint8_t *value,
const struct variable *src_var)
{
struct mapping *m;
msg_disable ();
match = data_in (ss_buffer (value, width), LEGACY_NATIVE,
- FMT_F, 0, 0, 0, &uv, 0);
+ FMT_F, 0, 0, 0, trns->dst_dict, &uv, 0);
msg_enable ();
out->value.f = uv.f;
break;
#include "assertion.h"
#include "hmapx.h"
#include "hash-functions.h"
+#include "pool.h"
#include "i18n.h"
size_t hash;
struct hmapx_node *node;
struct converter *converter;
+ assert (fromcode);
hash = hash_string (tocode, hash_string (fromcode, 0));
HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map)
return converter->conv;
}
-/* Return a string based on TEXT converted according to HOW.
+char *
+recode_string (const char *to, const char *from,
+ const char *text, int length)
+{
+ return recode_string_pool (to, from, text, length, NULL);
+}
+
+
+/* Return a string based on TEXT which must be encoded using FROM.
+ The returned string will be encoded in TO.
If length is not -1, then it must be the number of bytes in TEXT.
The returned string must be freed when no longer required.
*/
char *
-recode_string (const char *to, const char *from,
- const char *text, int length)
+recode_string_pool (const char *to, const char *from,
+ const char *text, int length, struct pool *pool)
{
char *outbuf = 0;
size_t outbufferlength;
if ( length == -1 )
length = strlen(text);
-
if (to == NULL)
to = default_encoding;
if ( outbufferlength > length)
break;
- outbuf = xmalloc(outbufferlength);
+ outbuf = pool_malloc (pool, outbufferlength);
op = outbuf;
outbytes = outbufferlength;
case E2BIG:
free (outbuf);
outbufferlength <<= 1;
- outbuf = xmalloc (outbufferlength);
+ outbuf = pool_malloc (pool, outbufferlength);
op = outbuf;
outbytes = outbufferlength;
inbytes = length;
if (outbytes == 0 )
{
char *const oldaddr = outbuf;
- outbuf = xrealloc (outbuf, outbufferlength + 1);
+ outbuf = pool_realloc (pool, outbuf, outbufferlength + 1);
op += (outbuf - oldaddr) ;
}
#define UTF8 "UTF-8"
-char * recode_string (const char *to, const char *from,
+struct pool;
+
+char *recode_string_pool (const char *to, const char *from,
+ const char *text, int length, struct pool *pool);
+
+char *recode_string (const char *to, const char *from,
const char *text, int len);
#include <config.h>
#include <libpspp/legacy-encoding.h>
-
-#include "str.h"
-
-static const char ascii_to_ebcdic[256];
-static const char ebcdic_to_ascii[256];
-
-void
-legacy_recode (enum legacy_encoding from, const char *src,
- enum legacy_encoding to, char *dst,
- size_t size)
-{
- if (from != to)
- {
- const char *table;
- size_t i;
-
- table = from == LEGACY_ASCII ? ascii_to_ebcdic : ebcdic_to_ascii;
- for (i = 0; i < size; i++)
- dst[i] = table[(unsigned char) src[i]];
- }
- else
- {
- if (src != dst)
- memcpy (dst, src, size);
- }
-}
+#include <libpspp/i18n.h>
+#include <stdlib.h>
char
-legacy_to_native (enum legacy_encoding from, char c)
+legacy_to_native (const char *from, char c)
{
- legacy_recode (from, &c, LEGACY_NATIVE, &c, 1);
- return c;
+ char x;
+ char *s = recode_string (LEGACY_NATIVE, from, &c, 1);
+ x = s[0];
+ free (s);
+ return x;
}
char
-legacy_from_native (enum legacy_encoding to, char c)
+legacy_from_native (const char *to, char c)
{
- legacy_recode (LEGACY_NATIVE, &c, to, &c, 1);
- return c;
+ char x;
+ char *s = recode_string (to, LEGACY_NATIVE, &c, 1);
+ x = s[0];
+ free (s);
+ return x;
}
-
-static const char ascii_to_ebcdic[256] =
- {
- 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f,
- 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26,
- 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d,
- 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
- 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
- 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
- 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
- 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
- 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
- 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x9a, 0x6d,
- 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
- 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
- 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
- 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0x5f, 0x07,
- 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17,
- 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b,
- 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08,
- 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xe1,
- 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
- 0x49, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
- 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
- 0x68, 0x69, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75,
- 0x76, 0x77, 0x78, 0x80, 0x8a, 0x8b, 0x8c, 0x8d,
- 0x8e, 0x8f, 0x90, 0x6a, 0x9b, 0x9c, 0x9d, 0x9e,
- 0x9f, 0xa0, 0xaa, 0xab, 0xac, 0x4a, 0xae, 0xaf,
- 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
- 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xa1, 0xbe, 0xbf,
- 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xda, 0xdb,
- 0xdc, 0xdd, 0xde, 0xdf, 0xea, 0xeb, 0xec, 0xed,
- 0xee, 0xef, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
- };
-
-static const char ebcdic_to_ascii[256] =
- {
- 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f,
- 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
- 0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87,
- 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f,
- 0x80, 0x81, 0x82, 0x83, 0x84, 0x0a, 0x17, 0x1b,
- 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07,
- 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
- 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a,
- 0x20, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6,
- 0xa7, 0xa8, 0xd5, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
- 0x26, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
- 0xb0, 0xb1, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x7e,
- 0x2d, 0x2f, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
- 0xb8, 0xb9, 0xcb, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
- 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1,
- 0xc2, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
- 0xc3, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
- 0x68, 0x69, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9,
- 0xca, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70,
- 0x71, 0x72, 0x5e, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
- 0xd1, 0xe5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
- 0x79, 0x7a, 0xd2, 0xd3, 0xd4, 0x5b, 0xd6, 0xd7,
- 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
- 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0x5d, 0xe6, 0xe7,
- 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
- 0x48, 0x49, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed,
- 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50,
- 0x51, 0x52, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3,
- 0x5c, 0x9f, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
- 0x59, 0x5a, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9,
- 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
- 0x38, 0x39, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
- };
-
#ifndef LIBPSPP_LEGACY_ENCODING
#define LIBPSPP_LEGACY_ENCODING 1
-#include <stddef.h>
#include <libpspp/compiler.h>
-/* A legacy character encoding.
- This exists only to handle the specific legacy EBCDIC-to-ASCII
- recoding that MODE=360 file handles perform. */
-enum legacy_encoding
- {
- LEGACY_ASCII, /* ASCII or similar character set. */
- LEGACY_EBCDIC, /* IBM EBCDIC character set. */
-
- /* Native character set. */
#if 'A' == 0x41
- LEGACY_NATIVE = LEGACY_ASCII
+#define LEGACY_NATIVE "ASCII"
#elif 'A' == 0xc1
- LEGACY_NATIVE = LEGACY_EBCDIC
+#define LEGACY_NATIVE "EBCDIC-US"
#else
#error Cannot detect native character set.
#endif
- };
-void legacy_recode (enum legacy_encoding, const char *src,
- enum legacy_encoding, char *dst, size_t);
-char legacy_to_native (enum legacy_encoding from, char) PURE_FUNCTION;
-char legacy_from_native (enum legacy_encoding to, char) PURE_FUNCTION;
+char legacy_to_native (const char *from, char) PURE_FUNCTION;
+char legacy_from_native (const char *to, char) PURE_FUNCTION;
+
#endif /* libpspp/legacy-encoding.h */
free ((char *) rel);
}
}
+
+
+\f
+
+/* Operations on uint8_t "strings" */
+
+/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes.
+ DST is truncated to DST_SIZE bytes or padded on the right with
+ copies of PAD as needed. */
+void
+u8_buf_copy_rpad (uint8_t *dst, size_t dst_size,
+ const uint8_t *src, size_t src_size,
+ char pad)
+{
+ if (src_size >= dst_size)
+ memmove (dst, src, dst_size);
+ else
+ {
+ memmove (dst, src, src_size);
+ memset (&dst[src_size], pad, dst_size - src_size);
+ }
+}
#include <assert.h>
#include <stdarg.h>
#include <stdbool.h>
+#include <stdint.h>
#include <stdio.h>
#include <string.h>
/* calls relocate from gnulib on ST */
void ds_relocate (struct string *st);
+
+void u8_buf_copy_rpad (uint8_t *dst, size_t dst_size,
+ const uint8_t *src, size_t src_size,
+ char pad);
+
+
#endif /* str_h */
if (var != NULL)
{
int val_width = 1;
- char *val;
+ uint8_t *val;
result = xmalloc (sizeof (*result));
result->intr = var;
#include <data/data-out.h>
#include <data/format.h>
#include <data/value.h>
+#include <data/dictionary.h>
#include <libpspp/assertion.h>
#include <libpspp/compiler.h>
#include <libpspp/misc.h>
from V, displayed with format spec F. */
void
tab_value (struct tab_table *table, int c, int r, unsigned char opt,
- const union value *v, const struct fmt_spec *f)
+ const union value *v, const struct dictionary *dict,
+ const struct fmt_spec *f)
{
char *contents;
}
#endif
- contents = pool_alloc (table->container, f->w);
- table->cc[c + r * table->cf] = ss_buffer (contents, f->w);
- table->ct[c + r * table->cf] = opt;
+ contents = data_out_pool (v, dict_get_encoding (dict), f, table->container);
- data_out (v, f, contents);
+ table->cc[c + r * table->cf] = ss_cstr (contents);
+ table->ct[c + r * table->cf] = opt;
}
/* Sets cell (C,R) in TABLE, with options OPT, to have value VAL
tab_fixed (struct tab_table *table, int c, int r, unsigned char opt,
double val, int w, int d)
{
- char *contents;
- char buf[40], *cp;
+ char *s, *cp;
struct fmt_spec f;
union value double_value;
#endif
double_value.f = val;
- data_out (&double_value, &f, buf);
+ s = data_out_pool (&double_value, LEGACY_NATIVE, &f, table->container);
- cp = buf;
- while (isspace ((unsigned char) *cp) && cp < &buf[w])
+ cp = s;
+ while (isspace ((unsigned char) *cp) && cp < &s[w])
cp++;
- f.w = w - (cp - buf);
+ f.w = w - (cp - s);
- contents = pool_alloc (table->container, f.w);
- table->cc[c + r * table->cf] = ss_buffer (contents, f.w);
+ table->cc[c + r * table->cf] = ss_buffer (cp, f.w);
table->ct[c + r * table->cf] = opt;
- memcpy (contents, cp, f.w);
}
/* Sets cell (C,R) in TABLE, with options OPT, to have value VAL as
tab_double (struct tab_table *table, int c, int r, unsigned char opt,
double val, const struct fmt_spec *fmt)
{
- int w;
- char *contents;
- char buf[40], *cp;
-
- union value double_value;
+ struct substring ss;
+ union value double_value ;
assert (table != NULL);
#endif
double_value.f = val;
- data_out (&double_value, fmt, buf);
+ ss = ss_cstr (data_out_pool (&double_value, LEGACY_NATIVE, fmt, table->container));
- cp = buf;
- while (isspace ((unsigned char) *cp) && cp < &buf[fmt->w])
- cp++;
- w = fmt->w - (cp - buf);
+ ss_ltrim (&ss, ss_cstr (" "));
- contents = pool_alloc (table->container, w);
- table->cc[c + r * table->cf] = ss_buffer (contents, w);
+ table->cc[c + r * table->cf] = ss;
table->ct[c + r * table->cf] = opt;
- memcpy (contents, cp, w);
}
/* Cells. */
struct fmt_spec;
+struct dictionary;
union value;
void tab_value (struct tab_table *, int c, int r, unsigned char opt,
- const union value *, const struct fmt_spec *);
+ const union value *, const struct dictionary *dict,
+ const struct fmt_spec *);
void tab_fixed (struct tab_table *, int c, int r, unsigned char opt,
double v, int w, int d);
gint *idx;
struct variable *var;
GtkTreeIter dict_iter;
- gchar *name;
GtkTextBuffer *buffer;
g_return_if_fail (GTK_IS_TEXT_VIEW (dest));
gtk_tree_path_free (path);
- name = recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_name (var),
- -1);
-
buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (dest));
erase_selection (buffer);
- gtk_text_buffer_insert_at_cursor (buffer, name, -1);
+ gtk_text_buffer_insert_at_cursor (buffer, var_get_name (var), -1);
- g_free (name);
}
gpointer data)
{
PsppireDict *dict = data;
- struct variable *var;
- gchar *name;
-
- var = get_selected_variable (tree_model, iter, dict);
+ const struct variable *var = get_selected_variable (tree_model, iter, dict);
- name = recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_name (var), -1);
- g_object_set (cell, "text", name, NULL);
- g_free (name);
+ g_object_set (cell, "text", var_get_name (var), NULL);
}
gint *idx;
struct variable *var;
GtkTreeIter dict_iter;
- gchar *name;
g_return_if_fail (GTK_IS_ENTRY(dest));
gtk_tree_path_free (path);
- name = recode_string (UTF8, psppire_dict_encoding (PSPPIRE_DICT (dict)),
- var_get_name (var), -1);
- gtk_entry_set_text (GTK_ENTRY (dest), name);
- g_free (name);
+ gtk_entry_set_text (GTK_ENTRY (dest), var_get_name (var));
}
PsppireSelector *selector)
{
gboolean result;
- gchar *name;
GtkTreeIter dict_iter;
GtkTreeModel *dict;
struct variable *var;
gint dict_index;
gint *indeces;
GtkTreePath *path;
- const gchar *text = gtk_entry_get_text (GTK_ENTRY (selector->dest));
+ const gchar *text = gtk_entry_get_text (GTK_ENTRY (selector->dest));
get_base_model (model, iter, &dict, &dict_iter);
gtk_tree_path_free (path);
- name = recode_string (UTF8, psppire_dict_encoding (PSPPIRE_DICT (dict)),
- var_get_name (var), -1);
- result = ( 0 == strcmp (text, name));
- g_free (name);
+ result = ( 0 == strcmp (text, var_get_name (var) ));
return result;
}
{
const struct variable *var;
enum string_cmp_flags flags;
+ const PsppireDict *dict;
bool (*compare) (const struct comparator *,
const union value *);
string_value_compare (const struct comparator *cmptr,
const union value *val)
{
+ bool found;
+ char *text;
const struct string_comparator *ssc =
(const struct string_comparator *) cmptr;
int width = var_get_width (cmptr->var);
- const char *text = value_str (val, width);
-
+ g_return_val_if_fail (width > 0, false);
assert ( ! (cmptr->flags & STR_CMP_LABELS));
- g_return_val_if_fail (width > 0, false);
+ text = value_to_text (*val, cmptr->dict, *var_get_write_format (cmptr->var));
if ( cmptr->flags & STR_CMP_SUBSTR)
- return (NULL != g_strstr_len (text, width, ssc->pattern));
+ found = (NULL != g_strstr_len (text, width, ssc->pattern));
else
- return (0 == strncmp (text, ssc->pattern, width));
+ found = (0 == strncmp (text, ssc->pattern, width));
+
+ free (text);
+ return found;
}
g_return_val_if_fail (width > 0, false);
+ text = value_to_text (*val, cmptr->dict, *var_get_write_format (cmptr->var));
/* We must remove trailing whitespace, otherwise $ will not match where
one would expect */
- text = g_strndup (value_str (val, width), width);
g_strchomp (text);
retval = (0 == regexec (&rec->re, text, 0, 0, 0));
static struct comparator *
-value_comparator_create (const struct variable *var, const char *target)
+value_comparator_create (const struct variable *var, const PsppireDict *dict, const char *target)
{
const struct fmt_spec *fmt;
int width ;
cmptr->var = var;
cmptr->compare = value_compare ;
cmptr->destroy = cmptr_value_destroy;
+ cmptr->dict = dict;
width = var_get_width (var);
fmt = var_get_write_format (var);
value_init (&vc->pattern, width);
- if ( ! data_in (ss_cstr (target),
- LEGACY_NATIVE,
- fmt->type,
- 0, 0, 0,
- &vc->pattern, width) )
- {
- value_destroy (&vc->pattern, width);
- free (vc);
- return NULL;
- }
+ text_to_value (target, &vc->pattern, dict, *var_get_write_format (var) );
return cmptr;
}
static struct comparator *
-string_comparator_create (const struct variable *var, const char *target,
+string_comparator_create (const struct variable *var, const PsppireDict *dict,
+ const char *target,
enum string_cmp_flags flags)
{
struct string_comparator *ssc = xzalloc (sizeof (*ssc));
cmptr->flags = flags;
cmptr->var = var;
+ cmptr->dict = dict;
if ( flags & STR_CMP_LABELS)
cmptr->compare = string_label_compare;
static struct comparator *
-regexp_comparator_create (const struct variable *var, const char *target,
+regexp_comparator_create (const struct variable *var, const PsppireDict *dict, const char *target,
enum string_cmp_flags flags)
{
int code;
cmptr->flags = flags;
cmptr->var = var;
+ cmptr->dict = dict;
cmptr->compare = (flags & STR_CMP_LABELS)
? regexp_label_compare : regexp_value_compare ;
static struct comparator *
-comparator_factory (const struct variable *var, const char *str,
+comparator_factory (const struct variable *var, const PsppireDict *dict, const char *str,
enum string_cmp_flags flags)
{
if ( flags & STR_CMP_REGEXP )
- return regexp_comparator_create (var, str, flags);
+ return regexp_comparator_create (var, dict, str, flags);
if ( flags & (STR_CMP_SUBSTR | STR_CMP_LABELS) )
- return string_comparator_create (var, str, flags);
+ return string_comparator_create (var, dict, str, flags);
- return value_comparator_create (var, str);
+ return value_comparator_create (var, dict, str);
}
casenumber i;
const struct casenum_iterator *ip = get_iteration_params (fd);
struct comparator *cmptr =
- comparator_factory (var, target_string, flags);
+ comparator_factory (var, fd->dict, target_string, flags);
value_init (&val, width);
if ( ! cmptr)
/* Formats a value according to FORMAT
The returned string must be freed when no longer required */
gchar *
-value_to_text (union value v, struct fmt_spec format)
+value_to_text (union value v, const PsppireDict *dict, struct fmt_spec format)
{
gchar *s = 0;
- s = g_new (gchar, format.w + 1);
- data_out (&v, &format, s);
- s[format.w]='\0';
+ s = data_out (&v, dict_get_encoding (dict->dict), &format);
g_strchug (s);
return s;
gboolean
text_to_value (const gchar *text, union value *v,
+ const PsppireDict *dict,
struct fmt_spec format)
{
bool ok;
}
msg_disable ();
- ok = data_in (ss_cstr (text), LEGACY_NATIVE, format.type, 0, 0, 0,
+ ok = data_in (ss_cstr (text), UTF8, format.type, 0, 0, 0,
+ dict->dict,
v, fmt_var_width (&format));
msg_enable ();
#include <gtk/gtk.h>
-
+#include "psppire-dict.h"
void paste_syntax_in_new_window (const gchar *syntax);
struct fmt_spec;
+
/* Formats a value according to FORMAT
The returned string must be freed when no longer required */
-gchar * value_to_text (union value v, struct fmt_spec format);
+gchar * value_to_text (union value v, const PsppireDict *dict, struct fmt_spec format);
gboolean text_to_value (const gchar *text, union value *v,
+ const PsppireDict *dict,
struct fmt_spec format);
GObject *get_object_assert (GtkBuilder *builder, const gchar *name, GType type);
continue;
}
- if ( text_to_value (text, &v, *write_spec))
+ if ( text_to_value (text, &v,
+ dialog->dict, *write_spec))
{
nvals++;
mv_add_value (&dialog->mvl, &v);
const gchar *low_text = gtk_entry_get_text (GTK_ENTRY (dialog->low));
const gchar *high_text = gtk_entry_get_text (GTK_ENTRY (dialog->high));
- if ( text_to_value (low_text, &low_val, *write_spec)
+ if ( text_to_value (low_text, &low_val, dialog->dict, *write_spec)
&&
- text_to_value (high_text, &high_val, *write_spec) )
+ text_to_value (high_text, &high_val, dialog->dict, *write_spec) )
{
if ( low_val.f > high_val.f )
{
{
union value discrete_val;
if ( !text_to_value (discrete_text, &discrete_val,
+ dialog->dict,
*write_spec))
{
err_dialog (_("Incorrect value for variable type"),
gchar *high_text;
mv_get_range (&dialog->mvl, &low.f, &high.f);
- low_text = value_to_text (low, *write_spec);
- high_text = value_to_text (high, *write_spec);
+
+ low_text = value_to_text (low, dialog->dict, *write_spec);
+ high_text = value_to_text (high, dialog->dict, *write_spec);
gtk_entry_set_text (GTK_ENTRY (dialog->low), low_text);
gtk_entry_set_text (GTK_ENTRY (dialog->high), high_text);
if ( mv_has_value (&dialog->mvl))
{
gchar *text;
- text = value_to_text (*mv_get_value (&dialog->mvl, 0), *write_spec);
+ text = value_to_text (*mv_get_value (&dialog->mvl, 0), dialog->dict, *write_spec);
gtk_entry_set_text (GTK_ENTRY (dialog->discrete), text);
g_free (text);
}
{
gchar *text ;
- text = value_to_text (*mv_get_value (&dialog->mvl, i),
+ text = value_to_text (*mv_get_value (&dialog->mvl, i), dialog->dict,
*write_spec);
gtk_entry_set_text (GTK_ENTRY (dialog->mv[i]), text);
g_free (text);
/* The variable whose missing values are to be updated */
struct variable *pv;
+ /* The dictionary to which that value belongs */
+ PsppireDict *dict;
+
/* local copy */
struct missing_values mvl;
gchar *text = g_strdup_printf ("%d: %s", row + FIRST_CASE_NUMBER,
var_get_name (var));
- gchar *s = recode_string (UTF8,
- psppire_dict_encoding (data_store->dict),
- text, -1);
- g_free (text);
-
- gtk_entry_set_text (GTK_ENTRY (de->cell_ref_entry), s);
+ gtk_entry_set_text (GTK_ENTRY (de->cell_ref_entry), text);
- g_free (s);
+ g_free (text);
}
else
goto blank_entry;
/* Perform data_out for case CC, variable V, appending to STRING */
static void
-data_out_g_string (GString *string, const struct variable *v,
+data_out_g_string (GString *string, const struct dictionary *dict,
+ const struct variable *v,
const struct ccase *cc)
{
- char *buf ;
-
const struct fmt_spec *fs = var_get_print_format (v);
const union value *val = case_data (cc, v);
- buf = xzalloc (fs->w);
- data_out (val, fs, buf);
+ char *s = data_out (val, dict_get_encoding (dict), fs);
- g_string_append_len (string, buf, fs->w);
+ g_string_append (string, s);
- g_free (buf);
+ g_free (s);
}
static GString *
for (c = 0 ; c < var_cnt ; ++c)
{
const struct variable *v = dict_get_var (clip_dict, c);
- data_out_g_string (string, v, cc);
+ data_out_g_string (string, clip_dict, v, cc);
if ( c < val_cnt - 1 )
g_string_append (string, "\t");
}
{
const struct variable *v = dict_get_var (clip_dict, c);
g_string_append (string, "<td>");
- data_out_g_string (string, v, cc);
+ data_out_g_string (string, clip_dict, v, cc);
g_string_append (string, "</td>\n");
}
char *text;
const struct fmt_spec *fp ;
const struct variable *pv ;
+ const struct dictionary *dict;
union value v;
int width;
- GString *s;
g_return_val_if_fail (store->dict, NULL);
g_return_val_if_fail (store->datasheet, NULL);
+ dict = store->dict->dict;
+
if (column >= psppire_dict_get_var_cnt (store->dict))
return NULL;
if (label)
{
value_destroy (&v, width);
- return recode_string (UTF8, psppire_dict_encoding (store->dict),
- label, -1);
+ return g_strdup (label);
}
}
fp = var_get_write_format (pv);
- s = g_string_sized_new (fp->w + 1);
- g_string_set_size (s, fp->w);
-
- memset (s->str, 0, fp->w);
-
- g_assert (fp->w == s->len);
-
- /* Converts binary value V into printable form in the exactly
- FP->W character in buffer S according to format specification
- FP. No null terminator is appended to the buffer. */
- data_out (&v, fp, s->str);
-
- text = recode_string (UTF8, psppire_dict_encoding (store->dict),
- s->str, fp->w);
- g_string_free (s, TRUE);
+ text = data_out (&v, dict_get_encoding (dict), fp);
g_strchomp (text);
psppire_data_store_set_string (PsppireDataStore *store,
const gchar *text, glong row, glong col)
{
- gchar *s;
glong n_cases;
const struct variable *pv = psppire_dict_get_variable (store->dict, col);
if ( NULL == pv)
if (row == n_cases)
psppire_data_store_insert_new_case (store, row);
- s = recode_string (psppire_dict_encoding (store->dict), UTF8, text, -1);
-
psppire_data_store_data_in (store, row,
- var_get_case_index (pv), ss_cstr (s),
+ var_get_case_index (pv), ss_cstr (text),
var_get_write_format (pv));
- free (s);
psppire_sheet_model_range_changed (PSPPIRE_SHEET_MODEL (store), row, col, row, col);
static gchar *
get_row_button_label (const PsppireSheetModel *model, gint unit)
{
- PsppireDataStore *ds = PSPPIRE_DATA_STORE (model);
- gchar *s = g_strdup_printf (_("%d"), unit + FIRST_CASE_NUMBER);
-
- gchar *text = recode_string (UTF8, psppire_dict_encoding (ds->dict),
- s, -1);
+ // PsppireDataStore *ds = PSPPIRE_DATA_STORE (model);
- g_free (s);
-
- return text;
+ return g_strdup_printf (_("%d"), unit + FIRST_CASE_NUMBER);
}
static gchar *
get_column_subtitle (const PsppireSheetModel *model, gint col)
{
- gchar *text;
const struct variable *v ;
PsppireDataStore *ds = PSPPIRE_DATA_STORE (model);
if ( ! var_has_label (v))
return NULL;
- text = recode_string (UTF8, psppire_dict_encoding (ds->dict),
- var_get_label (v), -1);
-
- return text;
+ return xstrdup (var_get_label (v));
}
static gchar *
get_column_button_label (const PsppireSheetModel *model, gint col)
{
- gchar *text;
struct variable *pv ;
PsppireDataStore *ds = PSPPIRE_DATA_STORE (model);
pv = psppire_dict_get_variable (ds->dict, col);
- text = recode_string (UTF8, psppire_dict_encoding (ds->dict),
- var_get_name (pv), -1);
+ if (NULL == pv)
+ return NULL;
- return text;
+ return xstrdup (var_get_name (pv));
}
static gboolean
int width;
bool ok;
+ PsppireDict *dict;
+
g_return_val_if_fail (ds, FALSE);
g_return_val_if_fail (ds->datasheet, FALSE);
g_return_val_if_fail (idx < datasheet_get_n_columns (ds->datasheet), FALSE);
+ dict = ds->dict;
+
width = fmt_var_width (fmt);
g_return_val_if_fail (caseproto_get_width (
datasheet_get_proto (ds->datasheet), idx) == width,
FALSE);
value_init (&value, width);
ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value)
- && data_in (input, LEGACY_NATIVE, fmt->type, 0, 0, 0, &value, width)
+ && data_in (input, UTF8, fmt->type, 0, 0, 0,
+ dict->dict, &value, width)
&& datasheet_put_value (ds->datasheet, casenum, idx, &value));
value_destroy (&value, width);
{
case DICT_TVM_COL_NAME:
{
- gchar *name = recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_name (var), -1);
g_value_init (value, G_TYPE_STRING);
- g_value_set_string (value, name);
- g_free (name);
+ g_value_set_string (value, var_get_name (var));
}
break;
case DICT_TVM_COL_VAR:
"<span stretch=\"condensed\">%s</span>",
var_get_label (var));
- char *utf8 = recode_string (UTF8, psppire_dict_encoding (dict),
- text, -1);
-
+ g_object_set (cell, "markup", text, NULL);
g_free (text);
- g_object_set (cell, "markup", utf8, NULL);
- g_free (utf8);
}
else
{
- char *name = recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_name (var), -1);
- g_object_set (cell, "text", name, NULL);
- g_free (name);
+ g_object_set (cell, "text", var_get_name (var), NULL);
}
}
return FALSE;
{
- gchar *tip ;
+ const gchar *tip ;
GtkTreeModel *m;
PsppireDict *dict;
dict = PSPPIRE_DICT (m);
if ( PSPPIRE_DICT_VIEW (treeview)->prefer_labels )
- tip = recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_name (var), -1);
+ tip = var_get_name (var);
else
- tip = recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_label (var), -1);
+ tip = var_get_label (var);
gtk_tooltip_set_text (tooltip, tip);
-
- g_free (tip);
}
return TRUE;
vs->missing_val_dialog->pv =
psppire_var_store_get_var (var_store, row);
+ vs->missing_val_dialog->dict = var_store->dict;
+
g_signal_connect_swapped (customEntry,
"clicked",
G_CALLBACK (missing_val_dialog_show),
GtkWidget *toplevel = gtk_widget_get_toplevel (GTK_WIDGET (vs));
vs->val_labs_dialog = val_labs_dialog_create (GTK_WINDOW (toplevel),
- PSPPIRE_SHEET (vs));
+ PSPPIRE_VAR_STORE (psppire_sheet_get_model (PSPPIRE_SHEET (vs))));
+
vs->missing_val_dialog = missing_val_dialog_create (GTK_WINDOW (toplevel));
vs->var_type_dialog = var_type_dialog_create (GTK_WINDOW (toplevel));
switch (col)
{
case PSPPIRE_VAR_STORE_COL_LABEL:
- var_set_label (pv, 0);
+ var_set_label (pv, NULL);
return TRUE;
break;
}
case PSPPIRE_VAR_STORE_COL_NAME:
{
gboolean ok;
- char *s = recode_string (psppire_dict_encoding (var_store->dict),
- UTF8,
- text, -1);
-
- ok = psppire_dict_rename_var (var_store->dict, pv, s);
-
- free (s);
+ ok = psppire_dict_rename_var (var_store->dict, pv, text);
return ok;
}
case PSPPIRE_VAR_STORE_COL_COLUMNS:
break;
case PSPPIRE_VAR_STORE_COL_LABEL:
{
- gchar *s = recode_string (psppire_dict_encoding (var_store->dict),
- UTF8,
- text, -1);
- var_set_label (pv, s);
- free (s);
+ var_set_label (pv, text);
return TRUE;
}
break;
N_("Custom"),
N_("String")
};
+
enum {VT_NUMERIC, VT_COMMA, VT_DOT, VT_SCIENTIFIC, VT_DATE, VT_DOLLAR,
VT_CUSTOM, VT_STRING};
switch (c)
{
case PSPPIRE_VAR_STORE_COL_NAME:
- return recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_name (pv), -1);
+ return xstrdup (var_get_name (pv));
break;
case PSPPIRE_VAR_STORE_COL_TYPE:
{
}
break;
case PSPPIRE_VAR_STORE_COL_LABEL:
- return recode_string (UTF8, psppire_dict_encoding (dict),
- var_get_label (pv), -1);
+ {
+ const char *label = var_get_label (pv);
+ if (label)
+ return xstrdup (label);
+ return NULL;
+ }
break;
case PSPPIRE_VAR_STORE_COL_MISSING:
return g_locale_to_utf8 (gettext (none), -1, 0, 0, err);
else
{
- gchar *ss;
- GString *gstr = g_string_sized_new (10);
const struct val_labs *vls = var_get_value_labels (pv);
const struct val_lab **labels = val_labs_sorted (vls);
const struct val_lab *vl = labels[0];
g_assert (vl);
{
- gchar *const vstr = value_to_text (vl->value, *write_spec);
+ gchar *const vstr = value_to_text (vl->value, dict, *write_spec);
- g_string_printf (gstr, "{%s,\"%s\"}_",
- vstr, val_lab_get_label (vl));
- g_free (vstr);
+ return g_strdup_printf ( "{%s,\"%s\"}_", vstr, val_lab_get_label (vl));
}
-
- ss = recode_string (UTF8, psppire_dict_encoding (dict),
- gstr->str, gstr->len);
- g_string_free (gstr, TRUE);
- return ss;
}
}
break;
p->data_tree_view = GTK_TREE_VIEW (get_widget_assert (builder, "data"));
p->modified_vars = NULL;
p->modified_var_cnt = 0;
+ p->dict = NULL;
}
/* Frees IA's formats substructure. */
if (field.string != NULL)
{
msg_disable ();
+
if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, 0,
+ ia->formats.dict,
&val, var_get_width (var)))
{
char fmt_string[FMT_STRING_LEN_MAX + 1];
}
if (outputp != NULL)
{
- char *output = xmalloc (out.w + 1);
- data_out (&val, &out, output);
- output[out.w] = '\0';
- *outputp = output;
+ *outputp = data_out (&val, dict_get_encoding (ia->formats.dict), &out);
}
value_destroy (&val, var_get_width (var));
{
GtkWidget *window;
- PsppireSheet *vs;
+ PsppireVarStore *var_store;
/* The variable to be updated */
struct variable *pv;
text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry));
text_to_value (text, &v,
+ dialog->var_store->dict,
*var_get_write_format (dialog->pv));
union value v;
text_to_value (text, &v,
+ dialog->var_store->dict,
*var_get_write_format (dialog->pv));
union value v;
text_to_value (val_text, &v,
+ dialog->var_store->dict,
*var_get_write_format (dialog->pv));
val_labs_replace (dialog->labs, &v,
const gchar *text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry));
text_to_value (text, &v,
+ dialog->var_store->dict,
*var_get_write_format (dialog->pv));
static void
on_select_row (GtkTreeView *treeview, gpointer data)
{
- gchar *labeltext;
struct val_labs_dialog *dialog = data;
union value value;
- const char *label;
+ const char *label = NULL;
gchar *text;
- PsppireVarStore *var_store =
- PSPPIRE_VAR_STORE (psppire_sheet_get_model (dialog->vs));
-
get_selected_tuple (dialog, &value, &label);
- text = value_to_text (value, *var_get_write_format (dialog->pv));
+ text = value_to_text (value, dialog->var_store->dict, *var_get_write_format (dialog->pv));
g_signal_handler_block (GTK_ENTRY (dialog->value_entry),
dialog->value_handler_id);
dialog->change_handler_id);
- labeltext = recode_string (UTF8, psppire_dict_encoding (var_store->dict),
- label, -1);
-
gtk_entry_set_text (GTK_ENTRY (dialog->label_entry),
- labeltext);
- g_free (labeltext);
+ label);
g_signal_handler_unblock (GTK_ENTRY (dialog->label_entry),
dialog->change_handler_id);
/* Create a new dialog box
(there should normally be only one)*/
struct val_labs_dialog *
-val_labs_dialog_create (GtkWindow *toplevel, PsppireSheet *sheet)
+val_labs_dialog_create (GtkWindow *toplevel, PsppireVarStore *var_store)
{
GtkTreeViewColumn *column;
struct val_labs_dialog *dialog = g_malloc (sizeof (*dialog));
+ dialog->var_store = var_store;
dialog->window = get_widget_assert (xml,"val_labs_dialog");
dialog->value_entry = get_widget_assert (xml,"value_entry");
dialog->label_entry = get_widget_assert (xml,"label_entry");
- dialog->vs = sheet;
gtk_window_set_transient_for
(GTK_WINDOW (dialog->window), toplevel);
GtkTreeIter iter;
- PsppireVarStore *var_store =
- PSPPIRE_VAR_STORE (psppire_sheet_get_model (dialog->vs));
-
GtkListStore *list_store = gtk_list_store_new (2,
G_TYPE_STRING,
G_TYPE_DOUBLE);
const struct val_lab *vl = labels[i];
gchar *const vstr =
- value_to_text (vl->value,
+ value_to_text (vl->value, dialog->var_store->dict,
*var_get_write_format (dialog->pv));
- gchar *labeltext =
- recode_string (UTF8,
- psppire_dict_encoding (var_store->dict),
- val_lab_get_label (vl), -1);
-
gchar *const text = g_strdup_printf ("%s = \"%s\"",
- vstr, labeltext);
+ vstr, val_lab_get_label (vl));
gtk_list_store_append (list_store, &iter);
gtk_list_store_set (list_store, &iter,
1, vl->value.f,
-1);
- g_free (labeltext);
g_free (text);
g_free (vstr);
}
#include <gtk/gtk.h>
#include <data/variable.h>
-#include <gtk-contrib/psppire-sheet.h>
-
+//#include <gtk-contrib/psppire-sheet.h>
+#include "psppire-var-store.h"
struct val_labs;
-struct val_labs_dialog * val_labs_dialog_create (GtkWindow *, PsppireSheet *);
+struct val_labs_dialog * val_labs_dialog_create (GtkWindow *, PsppireVarStore *);
void val_labs_dialog_show (struct val_labs_dialog *);
gint i;
for (i = 0 ; i < n; ++i )
{
- mv[i] = value_to_text (*mv_get_value (miss, i), *fmt);
+ mv[i] = value_to_text (*mv_get_value (miss, i), dict, *fmt);
if ( i > 0 )
g_string_append (gstr, ", ");
g_string_append (gstr, mv[i]);
g_free (mv[i]);
}
- s = recode_string (UTF8, psppire_dict_encoding (dict),
- gstr->str, gstr->len);
- g_string_free (gstr, TRUE);
+ s = gstr->str;
+ g_string_free (gstr, FALSE);
}
else
{
union value low, high;
mv_get_range (miss, &low.f, &high.f);
- l = value_to_text (low, *fmt);
- h = value_to_text (high, *fmt);
+ l = value_to_text (low, dict, *fmt);
+ h = value_to_text (high, dict,*fmt);
g_string_printf (gstr, "%s - %s", l, h);
g_free (l);
{
gchar *ss = 0;
- ss = value_to_text (*mv_get_value (miss, 0), *fmt);
+ ss = value_to_text (*mv_get_value (miss, 0), dict, *fmt);
g_string_append (gstr, ", ");
g_string_append (gstr, ss);
free (ss);
}
- s = recode_string (UTF8, psppire_dict_encoding (dict),
- gstr->str, gstr->len);
- g_string_free (gstr, TRUE);
+ s = gstr->str;
+ g_string_free (gstr, FALSE);
}
return s;
union value v;
v.f = 1234.56;
- sample_text = value_to_text (v, dialog->fmt_l);
+ sample_text = value_to_text (v, NULL, dialog->fmt_l);
gtk_label_set_text (GTK_LABEL (dialog->label_psample), sample_text);
g_free (sample_text);
v.f = -v.f;
- sample_text = value_to_text (v, dialog->fmt_l);
+ sample_text = value_to_text (v, NULL, dialog->fmt_l);
gtk_label_set_text (GTK_LABEL (dialog->label_nsample), sample_text);
g_free (sample_text);
}
static const gchar none[] = N_("None");
-static gchar *
-name_to_string (const struct variable *var, PsppireDict *dict)
-{
- const char *name = var_get_name (var);
- g_assert (name);
-
- return recode_string (UTF8, psppire_dict_encoding (dict),
- name, -1);
-}
-
-
-static gchar *
-label_to_string (const struct variable *var, PsppireDict *dict)
+static const gchar *
+label_to_string (const struct variable *var)
{
const char *label = var_get_label (var);
- if (! label) return g_strdup (none);
+ if (NULL == label) return g_strdup (none);
- return recode_string (UTF8, psppire_dict_encoding (dict),
- label, -1);
+ return label;
}
NULL);
gstring = g_string_sized_new (200);
- text = name_to_string (var, dict);
- g_string_assign (gstring, text);
- g_free (text);
+ g_string_assign (gstring, var_get_name (var));
g_string_append (gstring, "\n");
- text = label_to_string (var, dict);
- g_string_append_printf (gstring, _("Label: %s\n"), text);
- g_free (text);
-
+ g_string_append_printf (gstring, _("Label: %s\n"), label_to_string (var));
{
const struct fmt_spec *fmt = var_get_print_format (var);
char buffer[FMT_STRING_LEN_MAX + 1];
{
const struct val_lab *vl = labels[i];
gchar *const vstr =
- value_to_text (vl->value, *var_get_print_format (var));
-
- text = recode_string (UTF8, psppire_dict_encoding (dict),
- val_lab_get_label (vl), -1);
+ value_to_text (vl->value, dict, *var_get_print_format (var));
- g_string_append_printf (gstring, _("%s %s\n"), vstr, text);
+ g_string_append_printf (gstring, _("%s %s\n"), vstr, val_lab_get_label (vl));
- g_free (text);
g_free (vstr);
}
free (labels);
& (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)))
{
union value v_in, v_out;
- char buffer[FMT_MAX_NUMERIC_WIDTH];
+ char *s;
bool ok;
v_in.f = number;
- data_out (&v_in, format, buffer);
+ s = data_out (&v_in, "FIXME", format);
msg_disable ();
- ok = data_in (ss_buffer (buffer, format->w), LEGACY_NATIVE,
- format->type, false, 0, 0, &v_out, 0);
+ /* FIXME: UTF8 encoded strings will fail here */
+ ok = data_in (ss_cstr (s), LEGACY_NATIVE,
+ format->type, false, 0, 0, NULL, &v_out, 0);
msg_enable ();
if (ok && v_out.f == number)
{
- syntax_gen_string (output, ss_buffer (buffer, format->w));
+ syntax_gen_string (output, ss_cstr (s));
+ free (s);
return;
}
+ free (s);
}
if (number == SYSMIS)
#include <ctype.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include <data/casereader-provider.h>
else
{
unsigned int hash = hash_int (idx, 0);
- char *string = value_str_rw (value, width);
+ uint8_t *string = value_str_rw (value, width);
int offset;
assert (width < 32);