From: John Darrington Date: Sat, 18 Jul 2009 11:20:38 +0000 (+0200) Subject: Merge commit 'origin/data-encoding' X-Git-Tag: build37~53 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=commitdiff_plain;h=8f04b0ced35a66cfdebefbcb53c81979add36ca3;hp=2f1db0c762ca29e54b29d9a28bce14c54cf090db Merge commit 'origin/data-encoding' Conflicts: src/language/dictionary/split-file.c --- diff --git a/Smake b/Smake index 78789d4f..201474ce 100644 --- a/Smake +++ b/Smake @@ -76,6 +76,7 @@ GNULIB_MODULES = \ trunc \ unilbrk/ulc-width-linebreaks \ unistd \ + unistr/u8-strlen \ unlocked-io \ vasprintf-posix \ vfprintf-posix \ diff --git a/doc/dev/concepts.texi b/doc/dev/concepts.texi index cc6e7522..06652d62 100644 --- a/doc/dev/concepts.texi +++ b/doc/dev/concepts.texi @@ -654,19 +654,17 @@ Returns the name of the given format @var{type}. These functions provide the ability to convert data fields into @union{value}s and vice versa. -@deftypefun bool data_in (struct substring @var{input}, enum legacy_encoding @var{legacy_encoding}, enum fmt_type @var{type}, int @var{implied_decimals}, int @var{first_column}, union value *@var{output}, int @var{width}) +@deftypefun bool data_in (struct substring @var{input}, const char *@var{encoding}, enum fmt_type @var{type}, int @var{implied_decimals}, int @var{first_column}, const struct dictionary *@var{dict}, union value *@var{output}, int @var{width}) Parses @var{input} as a field containing data in the given format @var{type}. The resulting value is stored in @var{output}, which the caller must have initialized with the given @var{width}. For consistency, @var{width} must be 0 if @var{type} is a numeric format type and greater than 0 if @var{type} is a string format type. - -Ordinarily @var{legacy_encoding} should be @code{LEGACY_NATIVE}, -indicating that @var{input} is encoded in the character set -conventionally used on the host machine. It may be set to -@code{LEGACY_EBCDIC} to cause @var{input} to be re-encoded from EBCDIC -during data parsing. +@var{encoding} should be set to indicate the character +encoding of @var{input}. +@var{dict} must be a pointer to the dictionary with which @var{output} +is associated. If @var{input} is the empty string (with length 0), @var{output} is set to the value set on SET BLANKS (@pxref{SET BLANKS,,,pspp, PSPP @@ -701,21 +699,15 @@ not propagated to the caller as errors. This function is declared in @file{data/data-in.h}. @end deftypefun -@deftypefun void data_out (const union value *@var{input}, const struct fmt_spec *@var{format}, char *@var{output}) -@deftypefunx void data_out_legacy (const union value *@var{input}, enum legacy_encoding @var{legacy_encoding}, const struct fmt_spec *@var{format}, char *@var{output}) -Converts the data pointed to by @var{input} into a data field in -@var{output} according to output format specifier @var{format}, which -must be a valid output format. Exactly @code{@var{format}->w} bytes -are written to @var{output}. The width of @var{input} is also +@deftypefun char * data_out (const union value *@var{input}, const struct fmt_spec *@var{format}) +@deftypefunx char * data_out_legacy (const union value *@var{input}, const char *@var{encoding}, const struct fmt_spec *@var{format}) +Converts the data pointed to by @var{input} into a string value, which +will be encoded in UTF-8, according to output format specifier @var{format}. +Format +must be a valid output format. The width of @var{input} is inferred from @var{format} using an algorithm equivalent to @func{fmt_var_width}. -If @func{data_out} is called, or @func{data_out_legacy} is called with -@var{legacy_encoding} set to @code{LEGACY_NATIVE}, @var{output} will -be encoded in the character set conventionally used on the host -machine. If @var{legacy_encoding} is set to @code{LEGACY_EBCDIC}, -@var{output} will be re-encoded from EBCDIC during data output. - When @var{input} contains data that cannot be represented in the given @var{format}, @func{data_out} may output a message using @func{msg}, @c (@pxref{msg}), diff --git a/doc/dev/i18n.texi b/doc/dev/i18n.texi index 97077d34..3ab86c3d 100644 --- a/doc/dev/i18n.texi +++ b/doc/dev/i18n.texi @@ -53,7 +53,6 @@ Any string data stored in a @union{value} will be encoded in the dictionary's character set. - @section System files @file{*.sav} files contain a field which is supposed to identify the encoding of the data they contain (@pxref{Machine Integer Info Record}). @@ -103,25 +102,20 @@ It is the caller's responsibility to free the returned string when no longer required. @end deftypefun +In order to minimise the number of conversions required, and to simplify +design, PSPP attempts to store all internal strings in UTF8 encoding. +Thus, when reading system and portable files (or any other data source), +the following items are immediately converted to UTF8 encoding: +@itemize +@item Variable names +@item Variable labels +@item Value labels +@end itemize +Conversely, when writing system files, these are converted back to the +encoding of that system file. -For example, in order to display a string variable's value in a label widget in the psppire gui one would use code similar to -@example - -struct variable *var = /* assigned from somewhere */ -struct case c = /* from somewhere else */ - -const union value *val = case_data (&c, var); - -char *utf8string = recode_string (UTF8, dict_get_encoding (dict), val->s, - var_get_width (var)); - -GtkWidget *entry = gtk_entry_new(); -gtk_entry_set_text (entry, utf8string); -gtk_widget_show (entry); - -free (utf8string); - -@end example +String data stored in union values are left in their original encoding. +These will be converted by the data_in/data_out functions. diff --git a/perl-module/PSPP.xs b/perl-module/PSPP.xs index 8179d29a..b3ac4cdc 100644 --- a/perl-module/PSPP.xs +++ b/perl-module/PSPP.xs @@ -38,9 +38,11 @@ #include #include #include +#include #include #include #include +#include #include typedef struct fmt_spec input_format ; @@ -163,6 +165,7 @@ onBoot (ver) const char *ver CODE: assert (0 == strcmp (ver, bare_version)); + i18n_init (); msg_init (NULL, message_handler); settings_init (0, 0); fh_init (); @@ -174,12 +177,11 @@ format_value (val, var) CODE: SV *ret; const struct fmt_spec *fmt = var_get_print_format (var); + const struct dictionary *dict = var_get_vardict (var)->dict; union value uv; char *s; make_value_from_scalar (&uv, val, var); - s = malloc (fmt->w); - memset (s, '\0', fmt->w); - data_out (&uv, fmt, s); + s = data_out (&uv, dict_get_encoding (dict), fmt); value_destroy (&uv, var_get_width (var)); ret = newSVpv (s, fmt->w); free (s); @@ -612,6 +614,7 @@ CODE: { struct substring ss = ss_cstr (SvPV_nolen (sv)); if ( ! data_in (ss, LEGACY_NATIVE, ifmt->type, 0, 0, 0, + sfi->dict, case_data_rw (c, v), var_get_width (v)) ) { diff --git a/src/data/case.c b/src/data/case.c index a4a78dd0..dc402926 100644 --- a/src/data/case.c +++ b/src/data/case.c @@ -308,7 +308,7 @@ case_num_idx (const struct ccase *c, size_t idx) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -const char * +const uint8_t * case_str (const struct ccase *c, const struct variable *v) { size_t idx = var_get_case_index (v); @@ -321,7 +321,7 @@ case_str (const struct ccase *c, const struct variable *v) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -const char * +const uint8_t * case_str_idx (const struct ccase *c, size_t idx) { assert (idx < c->proto->n_widths); @@ -336,7 +336,7 @@ case_str_idx (const struct ccase *c, size_t idx) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -char * +uint8_t * case_str_rw (struct ccase *c, const struct variable *v) { size_t idx = var_get_case_index (v); @@ -352,7 +352,7 @@ case_str_rw (struct ccase *c, const struct variable *v) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -char * +uint8_t * case_str_rw_idx (struct ccase *c, size_t idx) { assert (idx < c->proto->n_widths); diff --git a/src/data/case.h b/src/data/case.h index 36feb15f..0bfc62cd 100644 --- a/src/data/case.h +++ b/src/data/case.h @@ -94,10 +94,10 @@ union value *case_data_rw_idx (struct ccase *, size_t idx); double case_num (const struct ccase *, const struct variable *); double case_num_idx (const struct ccase *, size_t idx); -const char *case_str (const struct ccase *, const struct variable *); -const char *case_str_idx (const struct ccase *, size_t idx); -char *case_str_rw (struct ccase *, const struct variable *); -char *case_str_rw_idx (struct ccase *, size_t idx); +const uint8_t *case_str (const struct ccase *, const struct variable *); +const uint8_t *case_str_idx (const struct ccase *, size_t idx); +uint8_t *case_str_rw (struct ccase *, const struct variable *); +uint8_t *case_str_rw_idx (struct ccase *, size_t idx); int case_compare (const struct ccase *, const struct ccase *, const struct variable *const *, size_t n_vars); diff --git a/src/data/data-in.c b/src/data/data-in.c index eda6d125..33e369f9 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -34,9 +34,11 @@ #include "settings.h" #include "value.h" #include "format.h" +#include "dictionary.h" #include #include +#include #include #include #include @@ -53,7 +55,7 @@ /* Information about parsing one data field. */ struct data_in { - enum legacy_encoding encoding;/* Encoding of source. */ + const char *src_enc; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ int implied_decimals; /* Number of implied decimal places. */ @@ -88,6 +90,9 @@ static int hexit_value (int c); representation in OUTPUT, which the caller must have initialized with the given WIDTH (0 for a numeric field, otherwise the string width). + Iff FORMAT is a string format, then DICT must be a pointer + to the dictionary associated with OUTPUT. Otherwise, DICT + may be null. If no decimal point is included in a numeric format, then IMPLIED_DECIMALS decimal places are implied. Specify 0 if no @@ -100,9 +105,11 @@ static int hexit_value (int c); FIRST_COLUMN plus the length of the input because of the possibility of escaped quotes in strings, etc.) */ bool -data_in (struct substring input, enum legacy_encoding encoding, +data_in (struct substring input, const char *encoding, enum fmt_type format, int implied_decimals, - int first_column, int last_column, union value *output, int width) + int first_column, int last_column, + const struct dictionary *dict, + union value *output, int width) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -111,25 +118,11 @@ data_in (struct substring input, enum legacy_encoding encoding, }; struct data_in i; - void *copy = NULL; + bool ok; assert ((width != 0) == fmt_is_string (format)); - if (encoding == LEGACY_NATIVE - || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)) - { - i.input = input; - i.encoding = encoding; - } - else - { - ss_alloc_uninit (&i.input, ss_length (input)); - legacy_recode (encoding, ss_data (input), LEGACY_NATIVE, - ss_data (i.input), ss_length (input)); - i.encoding = LEGACY_NATIVE; - copy = ss_data (i.input); - } i.format = format; i.implied_decimals = implied_decimals; @@ -138,21 +131,39 @@ data_in (struct substring input, enum legacy_encoding encoding, i.first_column = first_column; i.last_column = last_column; + i.src_enc = encoding; - if (!ss_is_empty (i.input)) + if (ss_is_empty (input)) { - ok = handlers[i.format] (&i); - if (!ok) - default_result (&i); + default_result (&i); + return true; + } + + if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY)) + { + i.input = input; } else { - default_result (&i); - ok = true; + const char *dest_encoding; + char *s = NULL; + if ( dict == NULL) + { + assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))); + dest_encoding = LEGACY_NATIVE; + } + else + dest_encoding = dict_get_encoding (dict); + + s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input)); + ss_alloc_uninit (&i.input, strlen (s)); + memcpy (ss_data (i.input), s, ss_length (input)); + free (s); } - if (copy) - free (copy); + ok = handlers[i.format] (&i); + if (!ok) + default_result (&i); return ok; } @@ -608,12 +619,13 @@ parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - char *dst = value_str_rw (i->output, i->width); + uint8_t *dst = value_str_rw (i->output, i->width); size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); - legacy_recode (i->encoding, src, LEGACY_NATIVE, dst, MIN (src_size, dst_size)); + memcpy (dst, src, MIN (src_size, dst_size)); + if (dst_size > src_size) memset (&dst[src_size], ' ', dst_size - src_size); @@ -624,7 +636,7 @@ parse_A (struct data_in *i) static bool parse_AHEX (struct data_in *i) { - char *s = value_str_rw (i->output, i->width); + uint8_t *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) @@ -639,10 +651,10 @@ parse_AHEX (struct data_in *i) return false; } - if (i->encoding != LEGACY_NATIVE) + if (0 != strcmp (i->src_enc, LEGACY_NATIVE)) { - hi = legacy_to_native (i->encoding, hi); - lo = legacy_to_native (i->encoding, lo); + hi = legacy_to_native (i->src_enc, hi); + lo = legacy_to_native (i->src_enc, lo); } if (!c_isxdigit (hi) || !c_isxdigit (lo)) { diff --git a/src/data/data-in.h b/src/data/data-in.h index 3a8d67cc..3ebd5933 100644 --- a/src/data/data-in.h +++ b/src/data/data-in.h @@ -26,9 +26,11 @@ enum fmt_type; union value; -bool data_in (struct substring input, enum legacy_encoding, +struct dictionary; +bool data_in (struct substring input, const char *encoding, enum fmt_type, int implied_decimals, int first_column, int last_column, + const struct dictionary *dict, union value *output, int width); #endif /* data/data-in.h */ diff --git a/src/data/data-out.c b/src/data/data-out.c index e7800a8f..94a6130a 100644 --- a/src/data/data-out.c +++ b/src/data/data-out.c @@ -36,6 +36,8 @@ #include #include #include +#include +#include #include "minmax.h" @@ -83,38 +85,68 @@ static void output_binary_integer (uint64_t, int bytes, enum integer_format, char *); static void output_hex (const void *, size_t bytes, char *); -/* Same as data_out, and additionally recodes the output from - native form into the given legacy character ENCODING. */ -void -data_out_legacy (const union value *input, enum legacy_encoding encoding, - const struct fmt_spec *format, char *output) -{ - static data_out_converter_func *const converters[FMT_NUMBER_OF_FORMATS] = + +static data_out_converter_func *const converters[FMT_NUMBER_OF_FORMATS] = { #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) output_##METHOD, #include "format.def" }; +/* Similar to data_out. Additionally recodes the output from + native form into the given legacy character ENCODING. + OUTPUT must be provided by the caller and must be at least + FORMAT->w long. No null terminator is appended to OUTPUT. +*/ +void +data_out_legacy (const union value *input, const char *encoding, + const struct fmt_spec *format, char *output) +{ assert (fmt_check_output (format)); converters[format->type] (input, format, output); - if (encoding != LEGACY_NATIVE + if (0 != strcmp (encoding, LEGACY_NATIVE) && fmt_get_category (format->type) != FMT_CAT_BINARY) - legacy_recode (LEGACY_NATIVE, output, encoding, output, format->w); + { + char *s = recode_string (encoding, LEGACY_NATIVE, output, format->w ); + memcpy (output, s, format->w); + free (s); + } } -/* Converts the INPUT value into printable form in the exactly - FORMAT->W characters in OUTPUT according to format - specification FORMAT. No null terminator is appended to the - buffer. +/* Converts the INPUT value into a UTF8 encoded string, according + to format specification FORMAT. VALUE must be the correct width for FORMAT, that is, its - width must be fmt_var_width(FORMAT). */ -void -data_out (const union value *input, const struct fmt_spec *format, - char *output) + width must be fmt_var_width(FORMAT). + + ENCODING must be the encoding of INPUT. Normally this can + be obtained by calling dict_get_encoding on the dictionary + with which INPUT is associated. + + The return value is dynamically allocated, and must be freed + by the caller. If POOL is non-null, then the return value is + allocated on that pool. +*/ +char * +data_out_pool (const union value *input, const char *encoding, + const struct fmt_spec *format, struct pool *pool) +{ + char *output = xmalloc (format->w + 1); + char *t ; + assert (fmt_check_output (format)); + + converters[format->type] (input, format, output); + output[format->w] = '\0'; + + t = recode_string_pool (UTF8, encoding, output, format->w, pool); + free (output); + return t; +} + +char * +data_out (const union value *input, const char *encoding, const struct fmt_spec *format) { - return data_out_legacy (input, LEGACY_NATIVE, format, output); + return data_out_pool (input, encoding, format, NULL); } diff --git a/src/data/data-out.h b/src/data/data-out.h index f9f70da9..735679b4 100644 --- a/src/data/data-out.h +++ b/src/data/data-out.h @@ -25,9 +25,11 @@ struct fmt_spec; union value; -void data_out (const union value *, const struct fmt_spec *, char *); +char * data_out (const union value *, const char *encoding, const struct fmt_spec *); -void data_out_legacy (const union value *, enum legacy_encoding, - const struct fmt_spec *, char *); +char * data_out_pool (const union value *, const char *encoding, const struct fmt_spec *, struct pool *pool); + +void data_out_legacy (const union value *input, const char *encoding, + const struct fmt_spec *format, char *output); #endif /* data-out.h */ diff --git a/src/data/file-handle-def.c b/src/data/file-handle-def.c index 0652501f..6ed3f8f9 100644 --- a/src/data/file-handle-def.c +++ b/src/data/file-handle-def.c @@ -49,7 +49,7 @@ struct file_handle /* FH_REF_FILE only. */ char *file_name; /* File name as provided by user. */ enum fh_mode mode; /* File mode. */ - enum legacy_encoding encoding;/* File encoding. */ + const char *encoding; /* File encoding. */ /* FH_REF_FILE and FH_REF_INLINE only. */ size_t record_width; /* Length of fixed-format records. */ @@ -325,7 +325,7 @@ fh_get_tab_width (const struct file_handle *handle) } /* Returns the encoding of characters read from HANDLE. */ -enum legacy_encoding +const char * fh_get_legacy_encoding (const struct file_handle *handle) { assert (handle->referent & (FH_REF_FILE | FH_REF_INLINE)); diff --git a/src/data/file-handle-def.h b/src/data/file-handle-def.h index 73e118cd..b4a6d610 100644 --- a/src/data/file-handle-def.h +++ b/src/data/file-handle-def.h @@ -54,7 +54,7 @@ struct fh_properties enum fh_mode mode; /* File mode. */ size_t record_width; /* Length of fixed-format records. */ size_t tab_width; /* Tab width, 0=do not expand tabs. */ - enum legacy_encoding encoding;/* ASCII or EBCDIC? */ + const char *encoding; /* ASCII or EBCDIC? */ }; void fh_init (void); @@ -89,7 +89,7 @@ enum fh_mode fh_get_mode (const struct file_handle *) ; /* Properties of FH_REF_FILE and FH_REF_INLINE file handles. */ size_t fh_get_record_width (const struct file_handle *); size_t fh_get_tab_width (const struct file_handle *); -enum legacy_encoding fh_get_legacy_encoding (const struct file_handle *); +const char *fh_get_legacy_encoding (const struct file_handle *); /* Properties of FH_REF_SCRATCH file handles. */ struct scratch_handle *fh_get_scratch_handle (const struct file_handle *); diff --git a/src/data/missing-values.c b/src/data/missing-values.c index c1a74691..61bb9bcb 100644 --- a/src/data/missing-values.c +++ b/src/data/missing-values.c @@ -160,7 +160,7 @@ mv_add_value (struct missing_values *mv, const union value *v) Returns true if successful, false if MV has no more room for missing values or if S is not an acceptable missing value. */ bool -mv_add_str (struct missing_values *mv, const char s[]) +mv_add_str (struct missing_values *mv, const uint8_t s[]) { union value v; bool ok; @@ -404,7 +404,7 @@ is_num_user_missing (const struct missing_values *mv, double d) MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ static bool -is_str_user_missing (const struct missing_values *mv, const char s[]) +is_str_user_missing (const struct missing_values *mv, const uint8_t s[]) { const union value *v = mv->values; assert (mv->width > 0); @@ -456,7 +456,7 @@ mv_is_num_missing (const struct missing_values *mv, double d, MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ bool -mv_is_str_missing (const struct missing_values *mv, const char s[], +mv_is_str_missing (const struct missing_values *mv, const uint8_t s[], enum mv_class class) { assert (mv->width > 0); diff --git a/src/data/missing-values.h b/src/data/missing-values.h index 5576fc6c..4d046fae 100644 --- a/src/data/missing-values.h +++ b/src/data/missing-values.h @@ -64,7 +64,7 @@ enum mv_class bool mv_is_value_missing (const struct missing_values *, const union value *, enum mv_class); bool mv_is_num_missing (const struct missing_values *, double, enum mv_class); -bool mv_is_str_missing (const struct missing_values *, const char[], +bool mv_is_str_missing (const struct missing_values *, const uint8_t[], enum mv_class); /* Initializing missing value sets. */ @@ -94,7 +94,7 @@ void mv_get_range (const struct missing_values *, double *low, double *high); /* Adding and modifying discrete values. */ bool mv_add_value (struct missing_values *, const union value *); -bool mv_add_str (struct missing_values *, const char[]); +bool mv_add_str (struct missing_values *, const uint8_t[]); bool mv_add_num (struct missing_values *, double); void mv_pop_value (struct missing_values *, union value *); bool mv_replace_value (struct missing_values *, const union value *, int idx); diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index 461796bf..cd8b213e 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -447,6 +447,28 @@ read_string (struct pfm_reader *r, char *buf) *buf = '\0'; } + +/* Reads a string into BUF, which must have room for 256 + characters. + Returns the number of bytes read. +*/ +static size_t +read_bytes (struct pfm_reader *r, uint8_t *buf) +{ + int n = read_int (r); + if (n < 0 || n > 255) + error (r, _("Bad string length %d."), n); + + while (n-- > 0) + { + *buf++ = r->cc; + advance (r); + } + return n; +} + + + /* Reads a string and returns a copy of it allocated from R's pool. */ static char * @@ -739,9 +761,9 @@ parse_value (struct pfm_reader *r, int width, union value *v) value_init (v, width); if (width > 0) { - char string[256]; - read_string (r, string); - value_copy_str_rpad (v, width, string, ' '); + uint8_t buf[256]; + size_t n_bytes = read_bytes (r, buf); + value_copy_buf_rpad (v, width, buf, n_bytes, ' '); } else v->f = read_float (r); @@ -844,9 +866,9 @@ por_file_casereader_read (struct casereader *reader, void *r_) case_data_rw_idx (c, i)->f = read_float (r); else { - char string[256]; - read_string (r, string); - buf_copy_str_rpad (case_str_rw_idx (c, i), width, string, ' '); + uint8_t buf[256]; + size_t n_bytes = read_bytes (r, buf); + u8_buf_copy_rpad (case_str_rw_idx (c, i), width, buf, n_bytes, ' '); } } diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index cbbbdd15..9978d43a 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include @@ -186,6 +187,62 @@ static void read_long_string_value_labels (struct sfm_reader *, size_t size, size_t count, struct dictionary *); +/* Convert all the strings in DICT from the dict encoding to UTF8 */ +static void +recode_strings (struct dictionary *dict) +{ + int i; + + const char *enc = dict_get_encoding (dict); + + if ( NULL == enc) + enc = get_default_encoding (); + + for (i = 0 ; i < dict_get_var_cnt (dict); ++i) + { + /* Convert the long variable name */ + struct variable *var = dict_get_var (dict, i); + const char *native_name = var_get_name (var); + char *utf8_name = recode_string (UTF8, enc, native_name, -1); + if ( 0 != strcmp (utf8_name, native_name)) + { + if ( NULL == dict_lookup_var (dict, utf8_name)) + dict_rename_var (dict, var, utf8_name); + else + msg (MW, + _("Recoded variable name duplicates an existing `%s' within system file."), utf8_name); + } + + free (utf8_name); + + /* Convert the variable label */ + if (var_has_label (var)) + { + char *utf8_label = recode_string (UTF8, enc, var_get_label (var), -1); + var_set_label (var, utf8_label); + free (utf8_label); + } + + if (var_has_value_labels (var)) + { + const struct val_lab *vl = NULL; + const struct val_labs *vlabs = var_get_value_labels (var); + + for (vl = val_labs_first (vlabs); vl != NULL; vl = val_labs_next (vlabs, vl)) + { + const union value *val = val_lab_get_value (vl); + const char *label = val_lab_get_label (vl); + char *new_label = NULL; + + new_label = recode_string (UTF8, enc, label, -1); + + var_replace_value_label (var, val, new_label); + free (new_label); + } + } + } +} + /* Opens the system file designated by file handle FH for reading. Reads the system file's dictionary into *DICT. If INFO is non-null, then it receives additional info about the @@ -303,6 +360,8 @@ sfm_open_reader (struct file_handle *fh, struct dictionary **dict, r->has_long_var_names = true; } + recode_strings (*dict); + /* Read record 999 data, which is just filler. */ read_int (r); @@ -582,7 +641,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, value_set_missing (&value, mv_width); for (i = 0; i < missing_value_code; i++) { - char *s = value_str_rw (&value, mv_width); + uint8_t *s = value_str_rw (&value, mv_width); read_bytes (r, s, 8); mv_add_str (&mv, s); } @@ -1138,7 +1197,7 @@ read_value_labels (struct sfm_reader *r, struct label { - char raw_value[8]; /* Value as uninterpreted bytes. */ + uint8_t raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; @@ -1236,7 +1295,7 @@ read_value_labels (struct sfm_reader *r, value_init_pool (subpool, &label->value, max_width); if (var_is_alpha (var[0])) - buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, + u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, label->raw_value, sizeof label->raw_value, ' '); else label->value.f = float_get_double (r->float_format, label->raw_value); @@ -1416,7 +1475,7 @@ read_long_string_value_labels (struct sfm_reader *r, /* Read value. */ value_length = read_int (r); if (value_length == width) - read_string (r, value_str_rw (&value, width), width + 1); + read_bytes (r, value_str_rw (&value, width), width); else { sys_warn (r, _("Ignoring long string value %zu for variable %s, " @@ -1473,11 +1532,11 @@ static void partial_record (struct sfm_reader *r) static void read_error (struct casereader *, const struct sfm_reader *); static bool read_case_number (struct sfm_reader *, double *); -static bool read_case_string (struct sfm_reader *, char *, size_t); +static bool read_case_string (struct sfm_reader *, uint8_t *, size_t); static int read_opcode (struct sfm_reader *); static bool read_compressed_number (struct sfm_reader *, double *); -static bool read_compressed_string (struct sfm_reader *, char *); -static bool read_whole_strings (struct sfm_reader *, char *, size_t); +static bool read_compressed_string (struct sfm_reader *, uint8_t *); +static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t); static bool skip_whole_strings (struct sfm_reader *, size_t); /* Reads and returns one case from READER's file. Returns a null @@ -1512,7 +1571,7 @@ sys_file_casereader_read (struct casereader *reader, void *r_) } else { - char *s = value_str_rw (v, sv->var_width); + uint8_t *s = value_str_rw (v, sv->var_width); if (!read_case_string (r, s + sv->offset, sv->segment_width)) goto eof; if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8))) @@ -1574,7 +1633,7 @@ read_case_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_case_string (struct sfm_reader *r, char *s, size_t length) +read_case_string (struct sfm_reader *r, uint8_t *s, size_t length) { size_t whole = ROUND_DOWN (length, 8); size_t partial = length % 8; @@ -1587,7 +1646,7 @@ read_case_string (struct sfm_reader *r, char *s, size_t length) if (partial) { - char bounce[8]; + uint8_t bounce[8]; if (!read_whole_strings (r, bounce, sizeof bounce)) { if (whole) @@ -1658,7 +1717,7 @@ read_compressed_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_compressed_string (struct sfm_reader *r, char *dst) +read_compressed_string (struct sfm_reader *r, uint8_t *dst) { switch (read_opcode (r)) { @@ -1687,7 +1746,7 @@ read_compressed_string (struct sfm_reader *r, char *dst) Returns true if successful, false if end of file is reached immediately. */ static bool -read_whole_strings (struct sfm_reader *r, char *s, size_t length) +read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length) { assert (length % 8 == 0); if (!r->compressed) @@ -1715,7 +1774,7 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length) static bool skip_whole_strings (struct sfm_reader *r, size_t length) { - char buffer[1024]; + uint8_t buffer[1024]; assert (length < sizeof buffer); return read_whole_strings (r, buffer, length); } diff --git a/src/data/sys-file-writer.c b/src/data/sys-file-writer.c index 13dc2de6..3fed2e05 100644 --- a/src/data/sys-file-writer.c +++ b/src/data/sys-file-writer.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -94,9 +95,9 @@ struct sfm_writer static const struct casewriter_class sys_file_casewriter_class; static void write_header (struct sfm_writer *, const struct dictionary *); -static void write_variable (struct sfm_writer *, const struct variable *); +static void write_variable (struct sfm_writer *, const struct variable *, const struct dictionary *); static void write_value_labels (struct sfm_writer *, - struct variable *, int idx); + struct variable *, int idx, const struct dictionary *); static void write_integer_info_record (struct sfm_writer *); static void write_float_info_record (struct sfm_writer *); @@ -222,7 +223,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, /* Write basic variable info. */ short_names_assign (d); for (i = 0; i < dict_get_var_cnt (d); i++) - write_variable (w, dict_get_var (d, i)); + write_variable (w, dict_get_var (d, i), d); /* Write out value labels. */ idx = 0; @@ -230,7 +231,7 @@ sfm_open_writer (struct file_handle *fh, struct dictionary *d, { struct variable *v = dict_get_var (d, i); - write_value_labels (w, v, idx); + write_value_labels (w, v, idx, d); idx += sfm_width_to_octs (var_get_width (v)); } @@ -420,7 +421,7 @@ write_variable_continuation_records (struct sfm_writer *w, int width) /* Write the variable record(s) for variable V to system file W. */ static void -write_variable (struct sfm_writer *w, const struct variable *v) +write_variable (struct sfm_writer *w, const struct variable *v, const struct dictionary *dict) { int width = var_get_width (v); int segment_cnt = sfm_width_to_segments (width); @@ -461,9 +462,11 @@ write_variable (struct sfm_writer *w, const struct variable *v) if (var_has_label (v)) { const char *label = var_get_label (v); - size_t padded_len = ROUND_UP (MIN (strlen (label), 255), 4); + char *l = recode_string (dict_get_encoding (dict), UTF8, label, -1); + size_t padded_len = ROUND_UP (MIN (strlen (l), 255), 4); write_int (w, padded_len); - write_string (w, label, padded_len); + write_string (w, l, padded_len); + free (l); } /* Write the missing values, if any, range first. */ @@ -505,7 +508,7 @@ write_variable (struct sfm_writer *w, const struct variable *v) Value labels for long string variables are written separately, by write_long_string_value_labels. */ static void -write_value_labels (struct sfm_writer *w, struct variable *v, int idx) +write_value_labels (struct sfm_writer *w, struct variable *v, int idx, const struct dictionary *dict) { const struct val_labs *val_labs; const struct val_lab **labels; @@ -524,13 +527,14 @@ write_value_labels (struct sfm_writer *w, struct variable *v, int idx) for (i = 0; i < n_labels; i++) { const struct val_lab *vl = labels[i]; - const char *label = val_lab_get_label (vl); + char *label = recode_string (dict_get_encoding (dict), UTF8, val_lab_get_label (vl), -1); uint8_t len = MIN (strlen (label), 255); write_value (w, val_lab_get_value (vl), var_get_width (v)); write_bytes (w, &len, 1); write_bytes (w, label, len); write_zeros (w, REM_RND_UP (len + 1, 8)); + free (label); } free (labels); @@ -774,11 +778,13 @@ write_longvar_table (struct sfm_writer *w, const struct dictionary *dict) for (i = 0; i < dict_get_var_cnt (dict); i++) { struct variable *v = dict_get_var (dict, i); + char *longname = recode_string (dict_get_encoding (dict), UTF8, var_get_name (v), -1); if (i) ds_put_char (&map, '\t'); ds_put_format (&map, "%s=%s", - var_get_short_name (v, 0), var_get_name (v)); + var_get_short_name (v, 0), longname); + free (longname); } write_int (w, 7); /* Record type. */ diff --git a/src/data/value.c b/src/data/value.c index ce050c01..6dbecb11 100644 --- a/src/data/value.c +++ b/src/data/value.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "minmax.h" #include "xalloc.h" @@ -44,7 +45,7 @@ value_copy_rpad (union value *dst, int dst_width, const union value *src, int src_width, char pad) { - buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, + u8_buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, value_str (src, src_width), src_width, pad); } @@ -62,10 +63,10 @@ value_copy_rpad (union value *dst, int dst_width, DST was initialized. Passing, e.g., a smaller value in order to modify only a prefix of DST will not work in every case. */ void -value_copy_str_rpad (union value *dst, int dst_width, const char *src, +value_copy_str_rpad (union value *dst, int dst_width, const uint8_t *src, char pad) { - value_copy_buf_rpad (dst, dst_width, src, strlen (src), pad); + value_copy_buf_rpad (dst, dst_width, src, u8_strlen (src), pad); } /* Copies the SRC_LEN bytes at SRC to string value DST with width @@ -81,9 +82,9 @@ value_copy_str_rpad (union value *dst, int dst_width, const char *src, to modify only a prefix of DST will not work in every case. */ void value_copy_buf_rpad (union value *dst, int dst_width, - const char *src, size_t src_len, char pad) + const uint8_t *src, size_t src_len, char pad) { - buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad); + u8_buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad); } /* Sets V to the system-missing value for data of the given @@ -145,7 +146,7 @@ value_is_resizable (const union value *value, int old_width, int new_width) return false; else { - const char *str = value_str (value, old_width); + const uint8_t *str = value_str (value, old_width); int i; for (i = new_width; i < old_width; i++) @@ -225,7 +226,7 @@ value_resize_pool (struct pool *pool, union value *value, { if (new_width > MAX_SHORT_STRING) { - char *new_long_string = pool_alloc_unaligned (pool, new_width); + uint8_t *new_long_string = pool_alloc_unaligned (pool, new_width); memcpy (new_long_string, value_str (value, old_width), old_width); value->long_string = new_long_string; } diff --git a/src/data/value.h b/src/data/value.h index 6955eea0..f9782e2d 100644 --- a/src/data/value.h +++ b/src/data/value.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "xalloc.h" @@ -45,8 +46,8 @@ union value { double f; - char short_string[MAX_SHORT_STRING]; - char *long_string; + uint8_t short_string[MAX_SHORT_STRING]; + uint8_t *long_string; }; static inline void value_init (union value *, int width); @@ -55,17 +56,17 @@ static inline bool value_try_init (union value *, int width); static inline void value_destroy (union value *, int width); static inline double value_num (const union value *); -static inline const char *value_str (const union value *, int width); -static inline char *value_str_rw (union value *, int width); +static inline const uint8_t *value_str (const union value *, int width); +static inline uint8_t *value_str_rw (union value *, int width); static inline void value_copy (union value *, const union value *, int width); void value_copy_rpad (union value *, int dst_width, const union value *, int src_width, char pad); -void value_copy_str_rpad (union value *, int dst_width, const char *, +void value_copy_str_rpad (union value *, int dst_width, const uint8_t *, char pad); void value_copy_buf_rpad (union value *dst, int dst_width, - const char *src, size_t src_len, char pad); + const uint8_t *src, size_t src_len, char pad); void value_set_missing (union value *, int width); int value_compare_3way (const union value *, const union value *, int width); bool value_equal (const union value *, const union value *, int width); @@ -147,7 +148,7 @@ value_num (const union value *v) It is important that WIDTH be the actual value that was passed to value_init. Passing, e.g., a smaller value because only that number of bytes will be accessed will not always work. */ -static inline const char * +static inline const uint8_t * value_str (const union value *v, int width) { assert (width > 0); @@ -161,7 +162,7 @@ value_str (const union value *v, int width) It is important that WIDTH be the actual value that was passed to value_init. Passing, e.g., a smaller value because only that number of bytes will be accessed will not always work. */ -static inline char * +static inline uint8_t * value_str_rw (union value *v, int width) { assert (width > 0); diff --git a/src/data/variable.c b/src/data/variable.c index 8d85b518..d1e30864 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -494,7 +494,7 @@ var_is_num_missing (const struct variable *v, double d, enum mv_class class) S[] must contain exactly as many characters as V's width. V must be a string variable. */ bool -var_is_str_missing (const struct variable *v, const char s[], +var_is_str_missing (const struct variable *v, const uint8_t s[], enum mv_class class) { return mv_is_str_missing (&v->miss, s, class); @@ -590,10 +590,12 @@ var_append_value_name (const struct variable *v, const union value *value, struct string *str) { const char *name = var_lookup_value_label (v, value); + const struct dictionary *dict = var_get_vardict (v)->dict; if (name == NULL) { - char *s = ds_put_uninit (str, v->print.w); - data_out (value, &v->print, s); + char *s = data_out (value, dict_get_encoding (dict), &v->print); + ds_put_cstr (str, s); + free (s); } else ds_put_cstr (str, name); diff --git a/src/data/variable.h b/src/data/variable.h index 5d28d5b8..0b619a49 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -69,7 +69,7 @@ bool var_has_missing_values (const struct variable *); bool var_is_value_missing (const struct variable *, const union value *, enum mv_class); bool var_is_num_missing (const struct variable *, double, enum mv_class); -bool var_is_str_missing (const struct variable *, const char[], enum mv_class); +bool var_is_str_missing (const struct variable *, const uint8_t[], enum mv_class); /* Value labels. */ const char *var_lookup_value_label (const struct variable *, diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index de857488..d43af347 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -86,7 +86,7 @@ cmd_data_list (struct lexer *lexer, struct dataset *ds) bool ok; dict = in_input_program () ? dataset_dict (ds) : dict_create (); - parser = data_parser_create (); + parser = data_parser_create (dict); reader = NULL; table = -1; /* Print table if nonzero, -1=undecided. */ diff --git a/src/language/data-io/data-parser.c b/src/language/data-io/data-parser.c index e7e92bf6..6ec3d0d5 100644 --- a/src/language/data-io/data-parser.c +++ b/src/language/data-io/data-parser.c @@ -41,6 +41,7 @@ /* Data parser for textual data like that read by DATA LIST. */ struct data_parser { + const struct dictionary *dict; /*Dictionary of destination */ enum data_parser_type type; /* Type of data to parse. */ int skip_records; /* Records to skip before first real data. */ casenumber max_cases; /* Max number of cases to read. */ @@ -79,7 +80,7 @@ static void set_any_sep (struct data_parser *parser); /* Creates and returns a new data parser. */ struct data_parser * -data_parser_create (void) +data_parser_create (const struct dictionary *dict) { struct data_parser *parser = xmalloc (sizeof *parser); @@ -91,6 +92,7 @@ data_parser_create (void) parser->fields = NULL; parser->field_cnt = 0; parser->field_allocated = 0; + parser->dict = dict; parser->span = true; parser->empty_line_has_field = false; @@ -505,7 +507,7 @@ static bool parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct field *f; int row; @@ -531,6 +533,7 @@ parse_fixed (const struct data_parser *parser, struct dfm_reader *reader, f->format.w), encoding, f->format.type, f->format.d, f->first_column, f->first_column + f->format.w, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); @@ -547,7 +550,7 @@ static bool parse_delimited_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct field *f; @@ -574,6 +577,7 @@ parse_delimited_span (const struct data_parser *parser, data_in (s, encoding, f->format.type, 0, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } @@ -588,7 +592,7 @@ static bool parse_delimited_no_span (const struct data_parser *parser, struct dfm_reader *reader, struct ccase *c) { - enum legacy_encoding encoding = dfm_reader_get_legacy_encoding (reader); + const char *encoding = dfm_reader_get_legacy_encoding (reader); struct string tmp = DS_EMPTY_INITIALIZER; struct substring s; struct field *f; @@ -614,6 +618,7 @@ parse_delimited_no_span (const struct data_parser *parser, data_in (s, encoding, f->format.type, 0, first_column, last_column, + parser->dict, case_data_rw_idx (c, f->case_idx), fmt_var_width (&f->format)); } diff --git a/src/language/data-io/data-parser.h b/src/language/data-io/data-parser.h index b250e91b..5a53a2f6 100644 --- a/src/language/data-io/data-parser.h +++ b/src/language/data-io/data-parser.h @@ -38,7 +38,7 @@ enum data_parser_type }; /* Creating and configuring any parser. */ -struct data_parser *data_parser_create (void); +struct data_parser *data_parser_create (const struct dictionary *dict); void data_parser_destroy (struct data_parser *); enum data_parser_type data_parser_get_type (const struct data_parser *); diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index 24ddcf13..6f620a6a 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -597,7 +597,7 @@ dfm_expand_tabs (struct dfm_reader *r) } /* Returns the legacy character encoding of data read from READER. */ -enum legacy_encoding +const char * dfm_reader_get_legacy_encoding (const struct dfm_reader *reader) { return fh_get_legacy_encoding (reader->fh); diff --git a/src/language/data-io/data-reader.h b/src/language/data-io/data-reader.h index c7fee613..308701c2 100644 --- a/src/language/data-io/data-reader.h +++ b/src/language/data-io/data-reader.h @@ -38,8 +38,7 @@ bool dfm_reader_error (const struct dfm_reader *); unsigned dfm_eof (struct dfm_reader *); struct substring dfm_get_record (struct dfm_reader *); void dfm_expand_tabs (struct dfm_reader *); -enum legacy_encoding dfm_reader_get_legacy_encoding ( - const struct dfm_reader *); +const char *dfm_reader_get_legacy_encoding (const struct dfm_reader *); int dfm_get_percent_read (const struct dfm_reader *); /* Line control. */ diff --git a/src/language/data-io/data-writer.c b/src/language/data-io/data-writer.c index b5df59d4..85b11d4c 100644 --- a/src/language/data-io/data-writer.c +++ b/src/language/data-io/data-writer.c @@ -200,7 +200,7 @@ dfm_close_writer (struct dfm_writer *w) } /* Returns the legacy character encoding of data written to WRITER. */ -enum legacy_encoding +const char * dfm_writer_get_legacy_encoding (const struct dfm_writer *writer) { return fh_get_legacy_encoding (writer->fh); diff --git a/src/language/data-io/data-writer.h b/src/language/data-io/data-writer.h index 2142f215..045db316 100644 --- a/src/language/data-io/data-writer.h +++ b/src/language/data-io/data-writer.h @@ -27,7 +27,6 @@ struct dfm_writer *dfm_open_writer (struct file_handle *); bool dfm_close_writer (struct dfm_writer *); bool dfm_write_error (const struct dfm_writer *); bool dfm_put_record (struct dfm_writer *, const char *rec, size_t len); -enum legacy_encoding dfm_writer_get_legacy_encoding ( - const struct dfm_writer *); +const char *dfm_writer_get_legacy_encoding (const struct dfm_writer *); #endif /* data-writer.h */ diff --git a/src/language/data-io/file-handle.q b/src/language/data-io/file-handle.q index 827dbab4..3e053ed5 100644 --- a/src/language/data-io/file-handle.q +++ b/src/language/data-io/file-handle.q @@ -102,7 +102,7 @@ cmd_file_handle (struct lexer *lexer, struct dataset *ds) properties.mode = FH_MODE_VARIABLE; break; case FH_360: - properties.encoding = LEGACY_EBCDIC; + properties.encoding = "EBCDIC-US"; if (cmd.recform == FH_FIXED || cmd.recform == FH_F) properties.mode = FH_MODE_FIXED; else if (cmd.recform == FH_VARIABLE || cmd.recform == FH_V) diff --git a/src/language/data-io/get-data.c b/src/language/data-io/get-data.c index e4ab76a9..32202bab 100644 --- a/src/language/data-io/get-data.c +++ b/src/language/data-io/get-data.c @@ -271,7 +271,7 @@ static int parse_get_txt (struct lexer *lexer, struct dataset *ds) { struct data_parser *parser = NULL; - struct dictionary *dict = NULL; + struct dictionary *dict = dict_create (); struct file_handle *fh = NULL; struct dfm_reader *reader = NULL; @@ -288,7 +288,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) if (fh == NULL) goto error; - parser = data_parser_create (); + parser = data_parser_create (dict); has_type = false; data_parser_set_type (parser, DP_DELIMITED); data_parser_set_span (parser, false); @@ -465,7 +465,7 @@ parse_get_txt (struct lexer *lexer, struct dataset *ds) } lex_match (lexer, '='); - dict = dict_create (); + record = 1; type = data_parser_get_type (parser); do diff --git a/src/language/data-io/list.q b/src/language/data-io/list.q index 28820a85..c3f9b088 100644 --- a/src/language/data-io/list.q +++ b/src/language/data-io/list.q @@ -706,18 +706,21 @@ list_case (const struct ccase *c, casenumber case_idx, if (fmt_is_string (print->type) || dict_contains_var (dict, v)) { - data_out (case_data (c, v), print, - ds_put_uninit (&line_buffer, print->w)); + char *s = data_out (case_data (c, v), dict_get_encoding (dict), print); + ds_put_cstr (&line_buffer, s); + free (s); } else { + char *s; union value case_idx_value; case_idx_value.f = case_idx; - data_out (&case_idx_value, print, - ds_put_uninit (&line_buffer,print->w)); + s = data_out (&case_idx_value, dict_get_encoding (dict), print); + ds_put_cstr (&line_buffer, s); + free (s); } - ds_put_char(&line_buffer, ' '); + ds_put_char (&line_buffer, ' '); } if (!n_lines_remaining (d)) @@ -740,20 +743,21 @@ list_case (const struct ccase *c, casenumber case_idx, { const struct variable *v = cmd.v_variables[column]; const struct fmt_spec *print = var_get_print_format (v); - char buf[256]; + char *s = NULL; if (fmt_is_string (print->type) || dict_contains_var (dict, v)) - data_out (case_data (c, v), print, buf); + s = data_out (case_data (c, v), dict_get_encoding (dict), print); else { union value case_idx_value; case_idx_value.f = case_idx; - data_out (&case_idx_value, print, buf); + s = data_out (&case_idx_value, dict_get_encoding (dict), print); } fputs (" ", x->file); - html_put_cell_contents (d, TAB_FIX, ss_buffer (buf, print->w)); + html_put_cell_contents (d, TAB_FIX, ss_buffer (s, print->w)); + free (s); fputs ("\n", x->file); } diff --git a/src/language/data-io/print.c b/src/language/data-io/print.c index 34042aa8..e345477b 100644 --- a/src/language/data-io/print.c +++ b/src/language/data-io/print.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,7 @@ struct print_trns struct pool *pool; /* Stores related data. */ bool eject; /* Eject page before printing? */ bool include_prefix; /* Prefix lines with space? */ - enum legacy_encoding encoding; /* Encoding to use for output. */ + const char *encoding; /* Encoding to use for output. */ struct dfm_writer *writer; /* Output file, NULL=listing file. */ struct ll_list specs; /* List of struct prt_out_specs. */ size_t record_cnt; /* Number of records to write. */ @@ -480,12 +481,13 @@ print_trns_proc (void *trns_, struct ccase **c, casenumber case_num UNUSED) else { ds_put_substring (&trns->line, ds_ss (&spec->string)); - if (trns->encoding != LEGACY_NATIVE) + if (0 != strcmp (trns->encoding, LEGACY_NATIVE)) { size_t length = ds_length (&spec->string); char *data = ss_data (ds_tail (&trns->line, length)); - legacy_recode (LEGACY_NATIVE, data, - trns->encoding, data, length); + char *s = recode_string (trns->encoding, LEGACY_NATIVE, data, length); + memcpy (data, s, length); + free (s); } } } diff --git a/src/language/dictionary/missing-values.c b/src/language/dictionary/missing-values.c index aa3ce698..819b0a90 100644 --- a/src/language/dictionary/missing-values.c +++ b/src/language/dictionary/missing-values.c @@ -101,7 +101,7 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) mv_init (&mv, MV_MAX_STRING); while (!lex_match (lexer, ')')) { - char value[MV_MAX_STRING]; + uint8_t value[MV_MAX_STRING]; size_t length; if (!lex_force_string (lexer)) diff --git a/src/language/dictionary/split-file.c b/src/language/dictionary/split-file.c index d2b79c63..5d2b42d7 100644 --- a/src/language/dictionary/split-file.c +++ b/src/language/dictionary/split-file.c @@ -88,17 +88,17 @@ output_split_file_values (const struct dataset *ds, const struct ccase *c) for (i = 0; i < split_cnt; i++) { const struct variable *v = split[i]; - char temp_buf[80]; + char *s; const char *val_lab; const struct fmt_spec *print = var_get_print_format (v); tab_text_format (t, 0, i + 1, TAB_LEFT, "%s", var_get_name (v)); - data_out (case_data (c, v), print, temp_buf); - temp_buf[print->w] = 0; - - tab_text_format (t, 1, i + 1, 0, "%.*s", print->w, temp_buf); + s = data_out (case_data (c, v), dict_get_encoding (dict), print); + tab_text_format (t, 1, i + 1, 0, "%.*s", print->w, s); + free (s); + val_lab = var_lookup_value_label (v, case_data (c, v)); if (val_lab) tab_text (t, 2, i + 1, TAB_LEFT, val_lab); diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index 2d31bd47..d2838672 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -573,7 +573,7 @@ string function RTRIM (string s, string c) function NUMBER (string s, ni_format f) { union value out; - data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, &out, 0); + data_in (ss_head (s, f->w), LEGACY_NATIVE, f->type, f->d, 0, 0, NULL, &out, 0); return out.f; } @@ -582,11 +582,15 @@ absorb_miss string function STRING (x, no_format f) { union value v; struct substring dst; + char *s; v.f = x; - dst = alloc_string (e, f->w); + assert (!fmt_is_string (f->type)); - data_out (&v, f, dst.string); + s = data_out (&v, LEGACY_NATIVE, f); + dst = alloc_string (e, strlen (s)); + strcpy (dst.string, s); + free (s); return dst; } diff --git a/src/language/lexer/value-parser.c b/src/language/lexer/value-parser.c index 2cf9cf2b..c780d86f 100644 --- a/src/language/lexer/value-parser.c +++ b/src/language/lexer/value-parser.c @@ -99,8 +99,9 @@ parse_number (struct lexer *lexer, double *x, const enum fmt_type *format) else if (lex_token (lexer) == T_STRING && format != NULL) { union value v; + assert (! (fmt_get_category (*format) & ( FMT_CAT_STRING ))); data_in (ds_ss (lex_tokstr (lexer)), LEGACY_NATIVE, - *format, 0, 0, 0, &v, 0); + *format, 0, 0, 0, NULL, &v, 0); lex_get (lexer); *x = v.f; if (*x == SYSMIS) diff --git a/src/language/stats/crosstabs.q b/src/language/stats/crosstabs.q index 8347bb5f..99fa41d9 100644 --- a/src/language/stats/crosstabs.q +++ b/src/language/stats/crosstabs.q @@ -177,6 +177,7 @@ get_var_range (const struct variable *v) struct crosstabs_proc { + const struct dictionary *dict; enum { INTEGER, GENERAL } mode; enum mv_class exclude; bool pivot; @@ -204,6 +205,7 @@ static void init_proc (struct crosstabs_proc *proc, struct dataset *ds) { const struct variable *wv = dict_get_weight (dataset_dict (ds)); + proc->dict = dataset_dict (ds); proc->bad_warn = true; proc->variables = NULL; proc->n_variables = 0; @@ -1184,14 +1186,16 @@ create_crosstab_table (struct crosstabs_proc *proc, struct pivot_table *pt) { const struct variable *var = pt->const_vars[i]; size_t ofs; + char *s = NULL; ds_put_format (&title, ", %s=", var_get_name (var)); /* Insert the formatted value of the variable, then trim leading spaces in what was just inserted. */ ofs = ds_length (&title); - data_out (&pt->const_values[i], var_get_print_format (var), - ds_put_uninit (&title, var_get_width (var))); + s = data_out (&pt->const_values[i], dict_get_encoding (proc->dict), var_get_print_format (var)); + ds_put_cstr (&title, s); + free (s); ds_remove (&title, ofs, ss_cspan (ds_substr (&title, ofs, SIZE_MAX), ss_cstr (" "))); } @@ -1519,9 +1523,8 @@ table_value_missing (struct crosstabs_proc *proc, return; } - s.string = tab_alloc (table, print->w); - data_out (v, print, s.string); - s.length = print->w; + s = ss_cstr (data_out_pool (v, dict_get_encoding (proc->dict), print, + table->container)); if (proc->exclude == MV_NEVER && var_is_num_missing (var, v->f, MV_USER)) s.string[s.length++] = 'M'; while (s.length && *s.string == ' ') @@ -1554,16 +1557,15 @@ display_dimensions (struct crosstabs_proc *proc, struct pivot_table *pt, additionally suffixed with a letter `M'. */ static void format_cell_entry (struct tab_table *table, int c, int r, double value, - char suffix, bool mark_missing) + char suffix, bool mark_missing, const struct dictionary *dict) { const struct fmt_spec f = {FMT_F, 10, 1}; union value v; struct substring s; - s.length = 10; - s.string = tab_alloc (table, 16); v.f = value; - data_out (&v, &f, s.string); + s = ss_cstr (data_out_pool (&v, dict_get_encoding (dict), &f, table->container)); + while (*s.string == ' ') { s.length--; @@ -1649,7 +1651,7 @@ display_crosstabulation (struct crosstabs_proc *proc, struct pivot_table *pt, default: NOT_REACHED (); } - format_cell_entry (table, c, i, v, suffix, mark_missing); + format_cell_entry (table, c, i, v, suffix, mark_missing, proc->dict); } mp++; @@ -1700,7 +1702,7 @@ display_crosstabulation (struct crosstabs_proc *proc, struct pivot_table *pt, NOT_REACHED (); } - format_cell_entry (table, pt->n_cols, 0, v, suffix, mark_missing); + format_cell_entry (table, pt->n_cols, 0, v, suffix, mark_missing, proc->dict); tab_next_row (table); } } @@ -1750,7 +1752,7 @@ display_crosstabulation (struct crosstabs_proc *proc, struct pivot_table *pt, NOT_REACHED (); } - format_cell_entry (table, c, i, v, suffix, mark_missing); + format_cell_entry (table, c, i, v, suffix, mark_missing, proc->dict); } last_row = i; } diff --git a/src/language/stats/frequencies.q b/src/language/stats/frequencies.q index 8295f51a..83b864c9 100644 --- a/src/language/stats/frequencies.q +++ b/src/language/stats/frequencies.q @@ -207,6 +207,8 @@ struct freq_tab struct hsh_table *data; /* Undifferentiated data. */ struct freq_mutable *valid; /* Valid freqs. */ int n_valid; /* Number of total freqs. */ + const struct dictionary *dict; /* The dict from whence entries in the table + come */ struct freq_mutable *missing; /* Missing freqs. */ int n_missing; /* Number of missing freqs. */ @@ -755,6 +757,7 @@ frq_custom_variables (struct lexer *lexer, struct dataset *ds, struct cmd_freque } vf = var_attach_aux (v, xmalloc (sizeof *vf), var_dtor_free); vf->tab.valid = vf->tab.missing = NULL; + vf->tab.dict = dataset_dict (ds); vf->n_groups = 0; vf->groups = NULL; vf->width = var_get_width (v); @@ -1091,7 +1094,7 @@ dump_full (const struct variable *v, const struct variable *wv) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); tab_double (t, 2 + lab, r, TAB_NONE, percent, NULL); tab_double (t, 3 + lab, r, TAB_NONE, valid_percent, NULL); @@ -1109,7 +1112,7 @@ dump_full (const struct variable *v, const struct variable *wv) tab_text (t, 0, r, TAB_LEFT, label); } - tab_value (t, 0 + lab, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0 + lab, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1 + lab, r, TAB_NONE, f->count, wfmt); tab_double (t, 2 + lab, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL); @@ -1185,7 +1188,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) percent = f->count / ft->total_cases * 100.0; cum_total += f->count / ft->valid_cases * 100.0; - tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1, r, TAB_NONE, f->count, wfmt); tab_double (t, 2, r, TAB_NONE, percent, NULL); tab_double (t, 3, r, TAB_NONE, cum_total, NULL); @@ -1193,7 +1196,7 @@ dump_condensed (const struct variable *v, const struct variable *wv) } for (; f < &ft->valid[n_categories]; f++) { - tab_value (t, 0, r, TAB_NONE, &f->value, &vf->print); + tab_value (t, 0, r, TAB_NONE, &f->value, ft->dict, &vf->print); tab_double (t, 1, r, TAB_NONE, f->count, wfmt); tab_double (t, 2, r, TAB_NONE, f->count / ft->total_cases * 100.0, NULL); diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index 60c8f408..62b03ba0 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -83,6 +83,8 @@ struct recode_trns { struct pool *pool; + + /* Variable types, for convenience. */ enum val_type src_type; /* src_vars[*] type. */ enum val_type dst_type; /* dst_vars[*] type. */ @@ -90,6 +92,7 @@ struct recode_trns /* Variables. */ const struct variable **src_vars; /* Source variables. */ const struct variable **dst_vars; /* Destination variables. */ + const struct dictionary *dst_dict; /* Dictionary of dst_vars */ char **dst_names; /* Name of dest variables, if they're new. */ size_t var_cnt; /* Number of variables. */ @@ -540,6 +543,8 @@ create_dst_vars (struct recode_trns *trns, struct dictionary *dict) { size_t i; + trns->dst_dict = dict; + for (i = 0; i < trns->var_cnt; i++) { const struct variable **var = &trns->dst_vars[i]; @@ -598,7 +603,7 @@ find_src_numeric (struct recode_trns *trns, double value, const struct variable /* Returns the output mapping in TRNS for an input of VALUE with the given WIDTH, or a null pointer if there is no mapping. */ static const struct map_out * -find_src_string (struct recode_trns *trns, const char *value, +find_src_string (struct recode_trns *trns, const uint8_t *value, const struct variable *src_var) { struct mapping *m; @@ -625,7 +630,7 @@ find_src_string (struct recode_trns *trns, const char *value, msg_disable (); match = data_in (ss_buffer (value, width), LEGACY_NATIVE, - FMT_F, 0, 0, 0, &uv, 0); + FMT_F, 0, 0, 0, trns->dst_dict, &uv, 0); msg_enable (); out->value.f = uv.f; break; diff --git a/src/libpspp/i18n.c b/src/libpspp/i18n.c index 7fd7580e..e08ba280 100644 --- a/src/libpspp/i18n.c +++ b/src/libpspp/i18n.c @@ -28,6 +28,7 @@ #include "assertion.h" #include "hmapx.h" #include "hash-functions.h" +#include "pool.h" #include "i18n.h" @@ -57,6 +58,7 @@ create_iconv (const char* tocode, const char* fromcode) size_t hash; struct hmapx_node *node; struct converter *converter; + assert (fromcode); hash = hash_string (tocode, hash_string (fromcode, 0)); HMAPX_FOR_EACH_WITH_HASH (converter, node, hash, &map) @@ -84,13 +86,22 @@ create_iconv (const char* tocode, const char* fromcode) return converter->conv; } -/* Return a string based on TEXT converted according to HOW. +char * +recode_string (const char *to, const char *from, + const char *text, int length) +{ + return recode_string_pool (to, from, text, length, NULL); +} + + +/* Return a string based on TEXT which must be encoded using FROM. + The returned string will be encoded in TO. If length is not -1, then it must be the number of bytes in TEXT. The returned string must be freed when no longer required. */ char * -recode_string (const char *to, const char *from, - const char *text, int length) +recode_string_pool (const char *to, const char *from, + const char *text, int length, struct pool *pool) { char *outbuf = 0; size_t outbufferlength; @@ -109,7 +120,6 @@ recode_string (const char *to, const char *from, if ( length == -1 ) length = strlen(text); - if (to == NULL) to = default_encoding; @@ -120,7 +130,7 @@ recode_string (const char *to, const char *from, if ( outbufferlength > length) break; - outbuf = xmalloc(outbufferlength); + outbuf = pool_malloc (pool, outbufferlength); op = outbuf; outbytes = outbufferlength; @@ -157,7 +167,7 @@ recode_string (const char *to, const char *from, case E2BIG: free (outbuf); outbufferlength <<= 1; - outbuf = xmalloc (outbufferlength); + outbuf = pool_malloc (pool, outbufferlength); op = outbuf; outbytes = outbufferlength; inbytes = length; @@ -175,7 +185,7 @@ recode_string (const char *to, const char *from, if (outbytes == 0 ) { char *const oldaddr = outbuf; - outbuf = xrealloc (outbuf, outbufferlength + 1); + outbuf = pool_realloc (pool, outbuf, outbufferlength + 1); op += (outbuf - oldaddr) ; } diff --git a/src/libpspp/i18n.h b/src/libpspp/i18n.h index 2c30a700..9c8f7c14 100644 --- a/src/libpspp/i18n.h +++ b/src/libpspp/i18n.h @@ -24,7 +24,12 @@ void i18n_init (void); #define UTF8 "UTF-8" -char * recode_string (const char *to, const char *from, +struct pool; + +char *recode_string_pool (const char *to, const char *from, + const char *text, int length, struct pool *pool); + +char *recode_string (const char *to, const char *from, const char *text, int len); diff --git a/src/libpspp/legacy-encoding.c b/src/libpspp/legacy-encoding.c index 45f0195f..18a62197 100644 --- a/src/libpspp/legacy-encoding.c +++ b/src/libpspp/legacy-encoding.c @@ -17,116 +17,25 @@ #include #include - -#include "str.h" - -static const char ascii_to_ebcdic[256]; -static const char ebcdic_to_ascii[256]; - -void -legacy_recode (enum legacy_encoding from, const char *src, - enum legacy_encoding to, char *dst, - size_t size) -{ - if (from != to) - { - const char *table; - size_t i; - - table = from == LEGACY_ASCII ? ascii_to_ebcdic : ebcdic_to_ascii; - for (i = 0; i < size; i++) - dst[i] = table[(unsigned char) src[i]]; - } - else - { - if (src != dst) - memcpy (dst, src, size); - } -} +#include +#include char -legacy_to_native (enum legacy_encoding from, char c) +legacy_to_native (const char *from, char c) { - legacy_recode (from, &c, LEGACY_NATIVE, &c, 1); - return c; + char x; + char *s = recode_string (LEGACY_NATIVE, from, &c, 1); + x = s[0]; + free (s); + return x; } char -legacy_from_native (enum legacy_encoding to, char c) +legacy_from_native (const char *to, char c) { - legacy_recode (LEGACY_NATIVE, &c, to, &c, 1); - return c; + char x; + char *s = recode_string (to, LEGACY_NATIVE, &c, 1); + x = s[0]; + free (s); + return x; } - -static const char ascii_to_ebcdic[256] = - { - 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, - 0x16, 0x05, 0x25, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, - 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f, - 0x40, 0x5a, 0x7f, 0x7b, 0x5b, 0x6c, 0x50, 0x7d, - 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61, - 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, - 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f, - 0x7c, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, - 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, - 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, - 0xe7, 0xe8, 0xe9, 0xad, 0xe0, 0xbd, 0x9a, 0x6d, - 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, - 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, - 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, - 0xa7, 0xa8, 0xa9, 0xc0, 0x4f, 0xd0, 0x5f, 0x07, - 0x20, 0x21, 0x22, 0x23, 0x24, 0x15, 0x06, 0x17, - 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x09, 0x0a, 0x1b, - 0x30, 0x31, 0x1a, 0x33, 0x34, 0x35, 0x36, 0x08, - 0x38, 0x39, 0x3a, 0x3b, 0x04, 0x14, 0x3e, 0xe1, - 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, - 0x49, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, - 0x58, 0x59, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, - 0x76, 0x77, 0x78, 0x80, 0x8a, 0x8b, 0x8c, 0x8d, - 0x8e, 0x8f, 0x90, 0x6a, 0x9b, 0x9c, 0x9d, 0x9e, - 0x9f, 0xa0, 0xaa, 0xab, 0xac, 0x4a, 0xae, 0xaf, - 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xa1, 0xbe, 0xbf, - 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xda, 0xdb, - 0xdc, 0xdd, 0xde, 0xdf, 0xea, 0xeb, 0xec, 0xed, - 0xee, 0xef, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, - }; - -static const char ebcdic_to_ascii[256] = - { - 0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, - 0x97, 0x8d, 0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x10, 0x11, 0x12, 0x13, 0x9d, 0x85, 0x08, 0x87, - 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d, 0x1e, 0x1f, - 0x80, 0x81, 0x82, 0x83, 0x84, 0x0a, 0x17, 0x1b, - 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, - 0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04, - 0x98, 0x99, 0x9a, 0x9b, 0x14, 0x15, 0x9e, 0x1a, - 0x20, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, - 0xa7, 0xa8, 0xd5, 0x2e, 0x3c, 0x28, 0x2b, 0x7c, - 0x26, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, - 0xb0, 0xb1, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x7e, - 0x2d, 0x2f, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, - 0xb8, 0xb9, 0xcb, 0x2c, 0x25, 0x5f, 0x3e, 0x3f, - 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, - 0xc2, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22, - 0xc3, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, - 0x68, 0x69, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, - 0xca, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, - 0x71, 0x72, 0x5e, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, - 0xd1, 0xe5, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, - 0x79, 0x7a, 0xd2, 0xd3, 0xd4, 0x5b, 0xd6, 0xd7, - 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, - 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0x5d, 0xe6, 0xe7, - 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, - 0x48, 0x49, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, - 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, - 0x51, 0x52, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, - 0x5c, 0x9f, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, - 0x59, 0x5a, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, - 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, - 0x38, 0x39, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, - }; - diff --git a/src/libpspp/legacy-encoding.h b/src/libpspp/legacy-encoding.h index 12afe42b..c6ae0ab4 100644 --- a/src/libpspp/legacy-encoding.h +++ b/src/libpspp/legacy-encoding.h @@ -17,30 +17,18 @@ #ifndef LIBPSPP_LEGACY_ENCODING #define LIBPSPP_LEGACY_ENCODING 1 -#include #include -/* A legacy character encoding. - This exists only to handle the specific legacy EBCDIC-to-ASCII - recoding that MODE=360 file handles perform. */ -enum legacy_encoding - { - LEGACY_ASCII, /* ASCII or similar character set. */ - LEGACY_EBCDIC, /* IBM EBCDIC character set. */ - - /* Native character set. */ #if 'A' == 0x41 - LEGACY_NATIVE = LEGACY_ASCII +#define LEGACY_NATIVE "ASCII" #elif 'A' == 0xc1 - LEGACY_NATIVE = LEGACY_EBCDIC +#define LEGACY_NATIVE "EBCDIC-US" #else #error Cannot detect native character set. #endif - }; -void legacy_recode (enum legacy_encoding, const char *src, - enum legacy_encoding, char *dst, size_t); -char legacy_to_native (enum legacy_encoding from, char) PURE_FUNCTION; -char legacy_from_native (enum legacy_encoding to, char) PURE_FUNCTION; +char legacy_to_native (const char *from, char) PURE_FUNCTION; +char legacy_from_native (const char *to, char) PURE_FUNCTION; + #endif /* libpspp/legacy-encoding.h */ diff --git a/src/libpspp/str.c b/src/libpspp/str.c index ccd7739c..afe32de9 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1441,3 +1441,25 @@ ds_relocate (struct string *st) free ((char *) rel); } } + + + + +/* Operations on uint8_t "strings" */ + +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the right with + copies of PAD as needed. */ +void +u8_buf_copy_rpad (uint8_t *dst, size_t dst_size, + const uint8_t *src, size_t src_size, + char pad) +{ + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memmove (dst, src, src_size); + memset (&dst[src_size], pad, dst_size - src_size); + } +} diff --git a/src/libpspp/str.h b/src/libpspp/str.h index b9be394c..a134079f 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -223,4 +224,10 @@ char *ds_put_uninit (struct string *st, size_t incr); /* calls relocate from gnulib on ST */ void ds_relocate (struct string *st); + +void u8_buf_copy_rpad (uint8_t *dst, size_t dst_size, + const uint8_t *src, size_t src_size, + char pad); + + #endif /* str_h */ diff --git a/src/math/interaction.c b/src/math/interaction.c index 444edbfa..7911c8e7 100644 --- a/src/math/interaction.c +++ b/src/math/interaction.c @@ -149,7 +149,7 @@ interaction_value_create (const struct interaction_variable *var, const union va if (var != NULL) { int val_width = 1; - char *val; + uint8_t *val; result = xmalloc (sizeof (*result)); result->intr = var; diff --git a/src/output/table.c b/src/output/table.c index 5686e2bf..72edf17f 100644 --- a/src/output/table.c +++ b/src/output/table.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -506,7 +507,8 @@ tab_natural_dimensions (struct tab_table *t, struct outp_driver *d, from V, displayed with format spec F. */ void tab_value (struct tab_table *table, int c, int r, unsigned char opt, - const union value *v, const struct fmt_spec *f) + const union value *v, const struct dictionary *dict, + const struct fmt_spec *f) { char *contents; @@ -525,11 +527,10 @@ tab_value (struct tab_table *table, int c, int r, unsigned char opt, } #endif - contents = pool_alloc (table->container, f->w); - table->cc[c + r * table->cf] = ss_buffer (contents, f->w); - table->ct[c + r * table->cf] = opt; + contents = data_out_pool (v, dict_get_encoding (dict), f, table->container); - data_out (v, f, contents); + table->cc[c + r * table->cf] = ss_cstr (contents); + table->ct[c + r * table->cf] = opt; } /* Sets cell (C,R) in TABLE, with options OPT, to have value VAL @@ -538,8 +539,7 @@ void tab_fixed (struct tab_table *table, int c, int r, unsigned char opt, double val, int w, int d) { - char *contents; - char buf[40], *cp; + char *s, *cp; struct fmt_spec f; union value double_value; @@ -568,17 +568,15 @@ tab_fixed (struct tab_table *table, int c, int r, unsigned char opt, #endif double_value.f = val; - data_out (&double_value, &f, buf); + s = data_out_pool (&double_value, LEGACY_NATIVE, &f, table->container); - cp = buf; - while (isspace ((unsigned char) *cp) && cp < &buf[w]) + cp = s; + while (isspace ((unsigned char) *cp) && cp < &s[w]) cp++; - f.w = w - (cp - buf); + f.w = w - (cp - s); - contents = pool_alloc (table->container, f.w); - table->cc[c + r * table->cf] = ss_buffer (contents, f.w); + table->cc[c + r * table->cf] = ss_buffer (cp, f.w); table->ct[c + r * table->cf] = opt; - memcpy (contents, cp, f.w); } /* Sets cell (C,R) in TABLE, with options OPT, to have value VAL as @@ -589,11 +587,8 @@ void tab_double (struct tab_table *table, int c, int r, unsigned char opt, double val, const struct fmt_spec *fmt) { - int w; - char *contents; - char buf[40], *cp; - - union value double_value; + struct substring ss; + union value double_value ; assert (table != NULL); @@ -622,17 +617,12 @@ tab_double (struct tab_table *table, int c, int r, unsigned char opt, #endif double_value.f = val; - data_out (&double_value, fmt, buf); + ss = ss_cstr (data_out_pool (&double_value, LEGACY_NATIVE, fmt, table->container)); - cp = buf; - while (isspace ((unsigned char) *cp) && cp < &buf[fmt->w]) - cp++; - w = fmt->w - (cp - buf); + ss_ltrim (&ss, ss_cstr (" ")); - contents = pool_alloc (table->container, w); - table->cc[c + r * table->cf] = ss_buffer (contents, w); + table->cc[c + r * table->cf] = ss; table->ct[c + r * table->cf] = opt; - memcpy (contents, cp, w); } diff --git a/src/output/table.h b/src/output/table.h index 21d3b7bf..1748c24c 100644 --- a/src/output/table.h +++ b/src/output/table.h @@ -153,9 +153,11 @@ enum /* Cells. */ struct fmt_spec; +struct dictionary; union value; void tab_value (struct tab_table *, int c, int r, unsigned char opt, - const union value *, const struct fmt_spec *); + const union value *, const struct dictionary *dict, + const struct fmt_spec *); void tab_fixed (struct tab_table *, int c, int r, unsigned char opt, double v, int w, int d); diff --git a/src/ui/gui/compute-dialog.c b/src/ui/gui/compute-dialog.c index 183aa31b..e779fba6 100644 --- a/src/ui/gui/compute-dialog.c +++ b/src/ui/gui/compute-dialog.c @@ -604,7 +604,6 @@ insert_source_row_into_text_view (GtkTreeIter iter, gint *idx; struct variable *var; GtkTreeIter dict_iter; - gchar *name; GtkTextBuffer *buffer; g_return_if_fail (GTK_IS_TEXT_VIEW (dest)); @@ -632,15 +631,10 @@ insert_source_row_into_text_view (GtkTreeIter iter, gtk_tree_path_free (path); - name = recode_string (UTF8, psppire_dict_encoding (dict), - var_get_name (var), - -1); - buffer = gtk_text_view_get_buffer (GTK_TEXT_VIEW (dest)); erase_selection (buffer); - gtk_text_buffer_insert_at_cursor (buffer, name, -1); + gtk_text_buffer_insert_at_cursor (buffer, var_get_name (var), -1); - g_free (name); } diff --git a/src/ui/gui/dialog-common.c b/src/ui/gui/dialog-common.c index 8d03bed1..5d52204c 100644 --- a/src/ui/gui/dialog-common.c +++ b/src/ui/gui/dialog-common.c @@ -114,15 +114,9 @@ cell_var_name (GtkTreeViewColumn *tree_column, gpointer data) { PsppireDict *dict = data; - struct variable *var; - gchar *name; - - var = get_selected_variable (tree_model, iter, dict); + const struct variable *var = get_selected_variable (tree_model, iter, dict); - name = recode_string (UTF8, psppire_dict_encoding (dict), - var_get_name (var), -1); - g_object_set (cell, "text", name, NULL); - g_free (name); + g_object_set (cell, "text", var_get_name (var), NULL); } diff --git a/src/ui/gui/dict-display.c b/src/ui/gui/dict-display.c index d6b1bcd5..1665d7ff 100644 --- a/src/ui/gui/dict-display.c +++ b/src/ui/gui/dict-display.c @@ -67,7 +67,6 @@ insert_source_row_into_entry (GtkTreeIter iter, gint *idx; struct variable *var; GtkTreeIter dict_iter; - gchar *name; g_return_if_fail (GTK_IS_ENTRY(dest)); @@ -81,10 +80,7 @@ insert_source_row_into_entry (GtkTreeIter iter, gtk_tree_path_free (path); - name = recode_string (UTF8, psppire_dict_encoding (PSPPIRE_DICT (dict)), - var_get_name (var), -1); - gtk_entry_set_text (GTK_ENTRY (dest), name); - g_free (name); + gtk_entry_set_text (GTK_ENTRY (dest), var_get_name (var)); } @@ -123,14 +119,13 @@ is_currently_in_entry (GtkTreeModel *model, GtkTreeIter *iter, PsppireSelector *selector) { gboolean result; - gchar *name; GtkTreeIter dict_iter; GtkTreeModel *dict; struct variable *var; gint dict_index; gint *indeces; GtkTreePath *path; - const gchar *text = gtk_entry_get_text (GTK_ENTRY (selector->dest)); + const gchar *text = gtk_entry_get_text (GTK_ENTRY (selector->dest)); get_base_model (model, iter, &dict, &dict_iter); @@ -144,10 +139,7 @@ is_currently_in_entry (GtkTreeModel *model, GtkTreeIter *iter, gtk_tree_path_free (path); - name = recode_string (UTF8, psppire_dict_encoding (PSPPIRE_DICT (dict)), - var_get_name (var), -1); - result = ( 0 == strcmp (text, name)); - g_free (name); + result = ( 0 == strcmp (text, var_get_name (var) )); return result; } diff --git a/src/ui/gui/find-dialog.c b/src/ui/gui/find-dialog.c index 86d29583..faeb8f64 100644 --- a/src/ui/gui/find-dialog.c +++ b/src/ui/gui/find-dialog.c @@ -427,6 +427,7 @@ struct comparator { const struct variable *var; enum string_cmp_flags flags; + const PsppireDict *dict; bool (*compare) (const struct comparator *, const union value *); @@ -492,20 +493,24 @@ static bool string_value_compare (const struct comparator *cmptr, const union value *val) { + bool found; + char *text; const struct string_comparator *ssc = (const struct string_comparator *) cmptr; int width = var_get_width (cmptr->var); - const char *text = value_str (val, width); - + g_return_val_if_fail (width > 0, false); assert ( ! (cmptr->flags & STR_CMP_LABELS)); - g_return_val_if_fail (width > 0, false); + text = value_to_text (*val, cmptr->dict, *var_get_write_format (cmptr->var)); if ( cmptr->flags & STR_CMP_SUBSTR) - return (NULL != g_strstr_len (text, width, ssc->pattern)); + found = (NULL != g_strstr_len (text, width, ssc->pattern)); else - return (0 == strncmp (text, ssc->pattern, width)); + found = (0 == strncmp (text, ssc->pattern, width)); + + free (text); + return found; } @@ -526,9 +531,9 @@ regexp_value_compare (const struct comparator *cmptr, g_return_val_if_fail (width > 0, false); + text = value_to_text (*val, cmptr->dict, *var_get_write_format (cmptr->var)); /* We must remove trailing whitespace, otherwise $ will not match where one would expect */ - text = g_strndup (value_str (val, width), width); g_strchomp (text); retval = (0 == regexec (&rec->re, text, 0, 0, 0)); @@ -578,7 +583,7 @@ cmptr_value_destroy (struct comparator *cmptr) static struct comparator * -value_comparator_create (const struct variable *var, const char *target) +value_comparator_create (const struct variable *var, const PsppireDict *dict, const char *target) { const struct fmt_spec *fmt; int width ; @@ -589,28 +594,21 @@ value_comparator_create (const struct variable *var, const char *target) cmptr->var = var; cmptr->compare = value_compare ; cmptr->destroy = cmptr_value_destroy; + cmptr->dict = dict; width = var_get_width (var); fmt = var_get_write_format (var); value_init (&vc->pattern, width); - if ( ! data_in (ss_cstr (target), - LEGACY_NATIVE, - fmt->type, - 0, 0, 0, - &vc->pattern, width) ) - { - value_destroy (&vc->pattern, width); - free (vc); - return NULL; - } + text_to_value (target, &vc->pattern, dict, *var_get_write_format (var) ); return cmptr; } static struct comparator * -string_comparator_create (const struct variable *var, const char *target, +string_comparator_create (const struct variable *var, const PsppireDict *dict, + const char *target, enum string_cmp_flags flags) { struct string_comparator *ssc = xzalloc (sizeof (*ssc)); @@ -618,6 +616,7 @@ string_comparator_create (const struct variable *var, const char *target, cmptr->flags = flags; cmptr->var = var; + cmptr->dict = dict; if ( flags & STR_CMP_LABELS) cmptr->compare = string_label_compare; @@ -631,7 +630,7 @@ string_comparator_create (const struct variable *var, const char *target, static struct comparator * -regexp_comparator_create (const struct variable *var, const char *target, +regexp_comparator_create (const struct variable *var, const PsppireDict *dict, const char *target, enum string_cmp_flags flags) { int code; @@ -640,6 +639,7 @@ regexp_comparator_create (const struct variable *var, const char *target, cmptr->flags = flags; cmptr->var = var; + cmptr->dict = dict; cmptr->compare = (flags & STR_CMP_LABELS) ? regexp_label_compare : regexp_value_compare ; @@ -689,16 +689,16 @@ comparator_destroy (struct comparator *cmptr) static struct comparator * -comparator_factory (const struct variable *var, const char *str, +comparator_factory (const struct variable *var, const PsppireDict *dict, const char *str, enum string_cmp_flags flags) { if ( flags & STR_CMP_REGEXP ) - return regexp_comparator_create (var, str, flags); + return regexp_comparator_create (var, dict, str, flags); if ( flags & (STR_CMP_SUBSTR | STR_CMP_LABELS) ) - return string_comparator_create (var, str, flags); + return string_comparator_create (var, dict, str, flags); - return value_comparator_create (var, str); + return value_comparator_create (var, dict, str); } @@ -744,7 +744,7 @@ find_value (const struct find_dialog *fd, casenumber current_row, casenumber i; const struct casenum_iterator *ip = get_iteration_params (fd); struct comparator *cmptr = - comparator_factory (var, target_string, flags); + comparator_factory (var, fd->dict, target_string, flags); value_init (&val, width); if ( ! cmptr) diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c index a4c07ca4..ff750b25 100644 --- a/src/ui/gui/helper.c +++ b/src/ui/gui/helper.c @@ -49,13 +49,11 @@ /* Formats a value according to FORMAT The returned string must be freed when no longer required */ gchar * -value_to_text (union value v, struct fmt_spec format) +value_to_text (union value v, const PsppireDict *dict, struct fmt_spec format) { gchar *s = 0; - s = g_new (gchar, format.w + 1); - data_out (&v, &format, s); - s[format.w]='\0'; + s = data_out (&v, dict_get_encoding (dict->dict), &format); g_strchug (s); return s; @@ -65,6 +63,7 @@ value_to_text (union value v, struct fmt_spec format) gboolean text_to_value (const gchar *text, union value *v, + const PsppireDict *dict, struct fmt_spec format) { bool ok; @@ -87,7 +86,8 @@ text_to_value (const gchar *text, union value *v, } msg_disable (); - ok = data_in (ss_cstr (text), LEGACY_NATIVE, format.type, 0, 0, 0, + ok = data_in (ss_cstr (text), UTF8, format.type, 0, 0, 0, + dict->dict, v, fmt_var_width (&format)); msg_enable (); diff --git a/src/ui/gui/helper.h b/src/ui/gui/helper.h index a6287dda..f6c084d4 100644 --- a/src/ui/gui/helper.h +++ b/src/ui/gui/helper.h @@ -25,18 +25,20 @@ #include - +#include "psppire-dict.h" void paste_syntax_in_new_window (const gchar *syntax); struct fmt_spec; + /* Formats a value according to FORMAT The returned string must be freed when no longer required */ -gchar * value_to_text (union value v, struct fmt_spec format); +gchar * value_to_text (union value v, const PsppireDict *dict, struct fmt_spec format); gboolean text_to_value (const gchar *text, union value *v, + const PsppireDict *dict, struct fmt_spec format); GObject *get_object_assert (GtkBuilder *builder, const gchar *name, GType type); diff --git a/src/ui/gui/missing-val-dialog.c b/src/ui/gui/missing-val-dialog.c index 8efbaf97..16a7f6f3 100644 --- a/src/ui/gui/missing-val-dialog.c +++ b/src/ui/gui/missing-val-dialog.c @@ -100,7 +100,8 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) continue; } - if ( text_to_value (text, &v, *write_spec)) + if ( text_to_value (text, &v, + dialog->dict, *write_spec)) { nvals++; mv_add_value (&dialog->mvl, &v); @@ -126,9 +127,9 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) const gchar *low_text = gtk_entry_get_text (GTK_ENTRY (dialog->low)); const gchar *high_text = gtk_entry_get_text (GTK_ENTRY (dialog->high)); - if ( text_to_value (low_text, &low_val, *write_spec) + if ( text_to_value (low_text, &low_val, dialog->dict, *write_spec) && - text_to_value (high_text, &high_val, *write_spec) ) + text_to_value (high_text, &high_val, dialog->dict, *write_spec) ) { if ( low_val.f > high_val.f ) { @@ -154,6 +155,7 @@ missing_val_dialog_accept (GtkWidget *w, gpointer data) { union value discrete_val; if ( !text_to_value (discrete_text, &discrete_val, + dialog->dict, *write_spec)) { err_dialog (_("Incorrect value for variable type"), @@ -309,8 +311,9 @@ missing_val_dialog_show (struct missing_val_dialog *dialog) gchar *high_text; mv_get_range (&dialog->mvl, &low.f, &high.f); - low_text = value_to_text (low, *write_spec); - high_text = value_to_text (high, *write_spec); + + low_text = value_to_text (low, dialog->dict, *write_spec); + high_text = value_to_text (high, dialog->dict, *write_spec); gtk_entry_set_text (GTK_ENTRY (dialog->low), low_text); gtk_entry_set_text (GTK_ENTRY (dialog->high), high_text); @@ -320,7 +323,7 @@ missing_val_dialog_show (struct missing_val_dialog *dialog) if ( mv_has_value (&dialog->mvl)) { gchar *text; - text = value_to_text (*mv_get_value (&dialog->mvl, 0), *write_spec); + text = value_to_text (*mv_get_value (&dialog->mvl, 0), dialog->dict, *write_spec); gtk_entry_set_text (GTK_ENTRY (dialog->discrete), text); g_free (text); } @@ -341,7 +344,7 @@ missing_val_dialog_show (struct missing_val_dialog *dialog) { gchar *text ; - text = value_to_text (*mv_get_value (&dialog->mvl, i), + text = value_to_text (*mv_get_value (&dialog->mvl, i), dialog->dict, *write_spec); gtk_entry_set_text (GTK_ENTRY (dialog->mv[i]), text); g_free (text); diff --git a/src/ui/gui/missing-val-dialog.h b/src/ui/gui/missing-val-dialog.h index 7dc079d7..82acf975 100644 --- a/src/ui/gui/missing-val-dialog.h +++ b/src/ui/gui/missing-val-dialog.h @@ -32,6 +32,9 @@ struct missing_val_dialog /* The variable whose missing values are to be updated */ struct variable *pv; + /* The dictionary to which that value belongs */ + PsppireDict *dict; + /* local copy */ struct missing_values mvl; diff --git a/src/ui/gui/psppire-data-editor.c b/src/ui/gui/psppire-data-editor.c index 212259f8..38250f7e 100644 --- a/src/ui/gui/psppire-data-editor.c +++ b/src/ui/gui/psppire-data-editor.c @@ -744,15 +744,10 @@ update_data_ref_entry (const PsppireSheet *sheet, gchar *text = g_strdup_printf ("%d: %s", row + FIRST_CASE_NUMBER, var_get_name (var)); - gchar *s = recode_string (UTF8, - psppire_dict_encoding (data_store->dict), - text, -1); - g_free (text); - - gtk_entry_set_text (GTK_ENTRY (de->cell_ref_entry), s); + gtk_entry_set_text (GTK_ENTRY (de->cell_ref_entry), text); - g_free (s); + g_free (text); } else goto blank_entry; @@ -1651,20 +1646,18 @@ enum { /* Perform data_out for case CC, variable V, appending to STRING */ static void -data_out_g_string (GString *string, const struct variable *v, +data_out_g_string (GString *string, const struct dictionary *dict, + const struct variable *v, const struct ccase *cc) { - char *buf ; - const struct fmt_spec *fs = var_get_print_format (v); const union value *val = case_data (cc, v); - buf = xzalloc (fs->w); - data_out (val, fs, buf); + char *s = data_out (val, dict_get_encoding (dict), fs); - g_string_append_len (string, buf, fs->w); + g_string_append (string, s); - g_free (buf); + g_free (s); } static GString * @@ -1694,7 +1687,7 @@ clip_to_text (void) for (c = 0 ; c < var_cnt ; ++c) { const struct variable *v = dict_get_var (clip_dict, c); - data_out_g_string (string, v, cc); + data_out_g_string (string, clip_dict, v, cc); if ( c < val_cnt - 1 ) g_string_append (string, "\t"); } @@ -1739,7 +1732,7 @@ clip_to_html (void) { const struct variable *v = dict_get_var (clip_dict, c); g_string_append (string, ""); - data_out_g_string (string, v, cc); + data_out_g_string (string, clip_dict, v, cc); g_string_append (string, "\n"); } diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index b67e27c1..9833fb49 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -583,13 +583,15 @@ psppire_data_store_get_string (PsppireDataStore *store, glong row, glong column) char *text; const struct fmt_spec *fp ; const struct variable *pv ; + const struct dictionary *dict; union value v; int width; - GString *s; g_return_val_if_fail (store->dict, NULL); g_return_val_if_fail (store->datasheet, NULL); + dict = store->dict->dict; + if (column >= psppire_dict_get_var_cnt (store->dict)) return NULL; @@ -615,28 +617,13 @@ psppire_data_store_get_string (PsppireDataStore *store, glong row, glong column) if (label) { value_destroy (&v, width); - return recode_string (UTF8, psppire_dict_encoding (store->dict), - label, -1); + return g_strdup (label); } } fp = var_get_write_format (pv); - s = g_string_sized_new (fp->w + 1); - g_string_set_size (s, fp->w); - - memset (s->str, 0, fp->w); - - g_assert (fp->w == s->len); - - /* Converts binary value V into printable form in the exactly - FP->W character in buffer S according to format specification - FP. No null terminator is appended to the buffer. */ - data_out (&v, fp, s->str); - - text = recode_string (UTF8, psppire_dict_encoding (store->dict), - s->str, fp->w); - g_string_free (s, TRUE); + text = data_out (&v, dict_get_encoding (dict), fp); g_strchomp (text); @@ -677,7 +664,6 @@ gboolean psppire_data_store_set_string (PsppireDataStore *store, const gchar *text, glong row, glong col) { - gchar *s; glong n_cases; const struct variable *pv = psppire_dict_get_variable (store->dict, col); if ( NULL == pv) @@ -691,12 +677,9 @@ psppire_data_store_set_string (PsppireDataStore *store, if (row == n_cases) psppire_data_store_insert_new_case (store, row); - s = recode_string (psppire_dict_encoding (store->dict), UTF8, text, -1); - psppire_data_store_data_in (store, row, - var_get_case_index (pv), ss_cstr (s), + var_get_case_index (pv), ss_cstr (text), var_get_write_format (pv)); - free (s); psppire_sheet_model_range_changed (PSPPIRE_SHEET_MODEL (store), row, col, row, col); @@ -767,15 +750,9 @@ static const gchar null_var_name[]=N_("var"); static gchar * get_row_button_label (const PsppireSheetModel *model, gint unit) { - PsppireDataStore *ds = PSPPIRE_DATA_STORE (model); - gchar *s = g_strdup_printf (_("%d"), unit + FIRST_CASE_NUMBER); - - gchar *text = recode_string (UTF8, psppire_dict_encoding (ds->dict), - s, -1); + // PsppireDataStore *ds = PSPPIRE_DATA_STORE (model); - g_free (s); - - return text; + return g_strdup_printf (_("%d"), unit + FIRST_CASE_NUMBER); } @@ -795,7 +772,6 @@ get_row_sensitivity (const PsppireSheetModel *model, gint unit) static gchar * get_column_subtitle (const PsppireSheetModel *model, gint col) { - gchar *text; const struct variable *v ; PsppireDataStore *ds = PSPPIRE_DATA_STORE (model); @@ -807,16 +783,12 @@ get_column_subtitle (const PsppireSheetModel *model, gint col) if ( ! var_has_label (v)) return NULL; - text = recode_string (UTF8, psppire_dict_encoding (ds->dict), - var_get_label (v), -1); - - return text; + return xstrdup (var_get_label (v)); } static gchar * get_column_button_label (const PsppireSheetModel *model, gint col) { - gchar *text; struct variable *pv ; PsppireDataStore *ds = PSPPIRE_DATA_STORE (model); @@ -825,10 +797,10 @@ get_column_button_label (const PsppireSheetModel *model, gint col) pv = psppire_dict_get_variable (ds->dict, col); - text = recode_string (UTF8, psppire_dict_encoding (ds->dict), - var_get_name (pv), -1); + if (NULL == pv) + return NULL; - return text; + return xstrdup (var_get_name (pv)); } static gboolean @@ -972,18 +944,23 @@ psppire_data_store_data_in (PsppireDataStore *ds, casenumber casenum, gint idx, int width; bool ok; + PsppireDict *dict; + g_return_val_if_fail (ds, FALSE); g_return_val_if_fail (ds->datasheet, FALSE); g_return_val_if_fail (idx < datasheet_get_n_columns (ds->datasheet), FALSE); + dict = ds->dict; + width = fmt_var_width (fmt); g_return_val_if_fail (caseproto_get_width ( datasheet_get_proto (ds->datasheet), idx) == width, FALSE); value_init (&value, width); ok = (datasheet_get_value (ds->datasheet, casenum, idx, &value) - && data_in (input, LEGACY_NATIVE, fmt->type, 0, 0, 0, &value, width) + && data_in (input, UTF8, fmt->type, 0, 0, 0, + dict->dict, &value, width) && datasheet_put_value (ds->datasheet, casenum, idx, &value)); value_destroy (&value, width); diff --git a/src/ui/gui/psppire-dict.c b/src/ui/gui/psppire-dict.c index d6d278f5..c82395f2 100644 --- a/src/ui/gui/psppire-dict.c +++ b/src/ui/gui/psppire-dict.c @@ -760,11 +760,8 @@ tree_model_get_value (GtkTreeModel *model, GtkTreeIter *iter, { case DICT_TVM_COL_NAME: { - gchar *name = recode_string (UTF8, psppire_dict_encoding (dict), - var_get_name (var), -1); g_value_init (value, G_TYPE_STRING); - g_value_set_string (value, name); - g_free (name); + g_value_set_string (value, var_get_name (var)); } break; case DICT_TVM_COL_VAR: diff --git a/src/ui/gui/psppire-dictview.c b/src/ui/gui/psppire-dictview.c index f63ea0ba..243d9078 100644 --- a/src/ui/gui/psppire-dictview.c +++ b/src/ui/gui/psppire-dictview.c @@ -340,19 +340,12 @@ var_description_cell_data_func (GtkTreeViewColumn *col, "%s", var_get_label (var)); - char *utf8 = recode_string (UTF8, psppire_dict_encoding (dict), - text, -1); - + g_object_set (cell, "markup", text, NULL); g_free (text); - g_object_set (cell, "markup", utf8, NULL); - g_free (utf8); } else { - char *name = recode_string (UTF8, psppire_dict_encoding (dict), - var_get_name (var), -1); - g_object_set (cell, "text", name, NULL); - g_free (name); + g_object_set (cell, "text", var_get_name (var), NULL); } } @@ -439,7 +432,7 @@ set_tooltip_for_variable (GtkTreeView *treeview, return FALSE; { - gchar *tip ; + const gchar *tip ; GtkTreeModel *m; PsppireDict *dict; @@ -447,15 +440,11 @@ set_tooltip_for_variable (GtkTreeView *treeview, dict = PSPPIRE_DICT (m); if ( PSPPIRE_DICT_VIEW (treeview)->prefer_labels ) - tip = recode_string (UTF8, psppire_dict_encoding (dict), - var_get_name (var), -1); + tip = var_get_name (var); else - tip = recode_string (UTF8, psppire_dict_encoding (dict), - var_get_label (var), -1); + tip = var_get_label (var); gtk_tooltip_set_text (tooltip, tip); - - g_free (tip); } return TRUE; diff --git a/src/ui/gui/psppire-var-sheet.c b/src/ui/gui/psppire-var-sheet.c index 3ebea2ea..dfd62bb6 100644 --- a/src/ui/gui/psppire-var-sheet.c +++ b/src/ui/gui/psppire-var-sheet.c @@ -384,6 +384,8 @@ var_sheet_change_active_cell (PsppireVarSheet *vs, vs->missing_val_dialog->pv = psppire_var_store_get_var (var_store, row); + vs->missing_val_dialog->dict = var_store->dict; + g_signal_connect_swapped (customEntry, "clicked", G_CALLBACK (missing_val_dialog_show), @@ -479,7 +481,8 @@ psppire_var_sheet_realize (GtkWidget *w) GtkWidget *toplevel = gtk_widget_get_toplevel (GTK_WIDGET (vs)); vs->val_labs_dialog = val_labs_dialog_create (GTK_WINDOW (toplevel), - PSPPIRE_SHEET (vs)); + PSPPIRE_VAR_STORE (psppire_sheet_get_model (PSPPIRE_SHEET (vs)))); + vs->missing_val_dialog = missing_val_dialog_create (GTK_WINDOW (toplevel)); vs->var_type_dialog = var_type_dialog_create (GTK_WINDOW (toplevel)); diff --git a/src/ui/gui/psppire-var-store.c b/src/ui/gui/psppire-var-store.c index aadc5855..26c1ee4d 100644 --- a/src/ui/gui/psppire-var-store.c +++ b/src/ui/gui/psppire-var-store.c @@ -435,7 +435,7 @@ psppire_var_store_clear (PsppireSheetModel *model, glong row, glong col) switch (col) { case PSPPIRE_VAR_STORE_COL_LABEL: - var_set_label (pv, 0); + var_set_label (pv, NULL); return TRUE; break; } @@ -468,13 +468,7 @@ psppire_var_store_set_string (PsppireSheetModel *model, case PSPPIRE_VAR_STORE_COL_NAME: { gboolean ok; - char *s = recode_string (psppire_dict_encoding (var_store->dict), - UTF8, - text, -1); - - ok = psppire_dict_rename_var (var_store->dict, pv, s); - - free (s); + ok = psppire_dict_rename_var (var_store->dict, pv, text); return ok; } case PSPPIRE_VAR_STORE_COL_COLUMNS: @@ -540,11 +534,7 @@ psppire_var_store_set_string (PsppireSheetModel *model, break; case PSPPIRE_VAR_STORE_COL_LABEL: { - gchar *s = recode_string (psppire_dict_encoding (var_store->dict), - UTF8, - text, -1); - var_set_label (pv, s); - free (s); + var_set_label (pv, text); return TRUE; } break; @@ -583,6 +573,7 @@ text_for_column (PsppireVarStore *vs, N_("Custom"), N_("String") }; + enum {VT_NUMERIC, VT_COMMA, VT_DOT, VT_SCIENTIFIC, VT_DATE, VT_DOLLAR, VT_CUSTOM, VT_STRING}; @@ -591,8 +582,7 @@ text_for_column (PsppireVarStore *vs, switch (c) { case PSPPIRE_VAR_STORE_COL_NAME: - return recode_string (UTF8, psppire_dict_encoding (dict), - var_get_name (pv), -1); + return xstrdup (var_get_name (pv)); break; case PSPPIRE_VAR_STORE_COL_TYPE: { @@ -679,8 +669,12 @@ text_for_column (PsppireVarStore *vs, } break; case PSPPIRE_VAR_STORE_COL_LABEL: - return recode_string (UTF8, psppire_dict_encoding (dict), - var_get_label (pv), -1); + { + const char *label = var_get_label (pv); + if (label) + return xstrdup (label); + return NULL; + } break; case PSPPIRE_VAR_STORE_COL_MISSING: @@ -694,8 +688,6 @@ text_for_column (PsppireVarStore *vs, return g_locale_to_utf8 (gettext (none), -1, 0, 0, err); else { - gchar *ss; - GString *gstr = g_string_sized_new (10); const struct val_labs *vls = var_get_value_labels (pv); const struct val_lab **labels = val_labs_sorted (vls); const struct val_lab *vl = labels[0]; @@ -704,17 +696,10 @@ text_for_column (PsppireVarStore *vs, g_assert (vl); { - gchar *const vstr = value_to_text (vl->value, *write_spec); + gchar *const vstr = value_to_text (vl->value, dict, *write_spec); - g_string_printf (gstr, "{%s,\"%s\"}_", - vstr, val_lab_get_label (vl)); - g_free (vstr); + return g_strdup_printf ( "{%s,\"%s\"}_", vstr, val_lab_get_label (vl)); } - - ss = recode_string (UTF8, psppire_dict_encoding (dict), - gstr->str, gstr->len); - g_string_free (gstr, TRUE); - return ss; } } break; diff --git a/src/ui/gui/text-data-import-dialog.c b/src/ui/gui/text-data-import-dialog.c index dac8b4c1..6f33ff6d 100644 --- a/src/ui/gui/text-data-import-dialog.c +++ b/src/ui/gui/text-data-import-dialog.c @@ -1543,6 +1543,7 @@ init_formats_page (struct import_assistant *ia) p->data_tree_view = GTK_TREE_VIEW (get_widget_assert (builder, "data")); p->modified_vars = NULL; p->modified_var_cnt = 0; + p->dict = NULL; } /* Frees IA's formats substructure. */ @@ -1748,7 +1749,9 @@ parse_field (struct import_assistant *ia, if (field.string != NULL) { msg_disable (); + if (!data_in (field, LEGACY_NATIVE, in->type, 0, 0, 0, + ia->formats.dict, &val, var_get_width (var))) { char fmt_string[FMT_STRING_LEN_MAX + 1]; @@ -1768,10 +1771,7 @@ parse_field (struct import_assistant *ia, } if (outputp != NULL) { - char *output = xmalloc (out.w + 1); - data_out (&val, &out, output); - output[out.w] = '\0'; - *outputp = output; + *outputp = data_out (&val, dict_get_encoding (ia->formats.dict), &out); } value_destroy (&val, var_get_width (var)); diff --git a/src/ui/gui/val-labs-dialog.c b/src/ui/gui/val-labs-dialog.c index 0af80591..4b575d97 100644 --- a/src/ui/gui/val-labs-dialog.c +++ b/src/ui/gui/val-labs-dialog.c @@ -34,7 +34,7 @@ struct val_labs_dialog { GtkWidget *window; - PsppireSheet *vs; + PsppireVarStore *var_store; /* The variable to be updated */ struct variable *pv; @@ -72,6 +72,7 @@ on_label_entry_change (GtkEntry *entry, gpointer data) text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry)); text_to_value (text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); @@ -142,6 +143,7 @@ on_value_entry_change (GtkEntry *entry, gpointer data) union value v; text_to_value (text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); @@ -268,6 +270,7 @@ on_change (GtkWidget *w, gpointer data) union value v; text_to_value (val_text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); val_labs_replace (dialog->labs, &v, @@ -292,6 +295,7 @@ on_add (GtkWidget *w, gpointer data) const gchar *text = gtk_entry_get_text (GTK_ENTRY (dialog->value_entry)); text_to_value (text, &v, + dialog->var_store->dict, *var_get_write_format (dialog->pv)); @@ -337,19 +341,15 @@ on_remove (GtkWidget *w, gpointer data) static void on_select_row (GtkTreeView *treeview, gpointer data) { - gchar *labeltext; struct val_labs_dialog *dialog = data; union value value; - const char *label; + const char *label = NULL; gchar *text; - PsppireVarStore *var_store = - PSPPIRE_VAR_STORE (psppire_sheet_get_model (dialog->vs)); - get_selected_tuple (dialog, &value, &label); - text = value_to_text (value, *var_get_write_format (dialog->pv)); + text = value_to_text (value, dialog->var_store->dict, *var_get_write_format (dialog->pv)); g_signal_handler_block (GTK_ENTRY (dialog->value_entry), dialog->value_handler_id); @@ -364,12 +364,8 @@ on_select_row (GtkTreeView *treeview, gpointer data) dialog->change_handler_id); - labeltext = recode_string (UTF8, psppire_dict_encoding (var_store->dict), - label, -1); - gtk_entry_set_text (GTK_ENTRY (dialog->label_entry), - labeltext); - g_free (labeltext); + label); g_signal_handler_unblock (GTK_ENTRY (dialog->label_entry), dialog->change_handler_id); @@ -382,7 +378,7 @@ on_select_row (GtkTreeView *treeview, gpointer data) /* Create a new dialog box (there should normally be only one)*/ struct val_labs_dialog * -val_labs_dialog_create (GtkWindow *toplevel, PsppireSheet *sheet) +val_labs_dialog_create (GtkWindow *toplevel, PsppireVarStore *var_store) { GtkTreeViewColumn *column; @@ -392,10 +388,10 @@ val_labs_dialog_create (GtkWindow *toplevel, PsppireSheet *sheet) struct val_labs_dialog *dialog = g_malloc (sizeof (*dialog)); + dialog->var_store = var_store; dialog->window = get_widget_assert (xml,"val_labs_dialog"); dialog->value_entry = get_widget_assert (xml,"value_entry"); dialog->label_entry = get_widget_assert (xml,"label_entry"); - dialog->vs = sheet; gtk_window_set_transient_for (GTK_WINDOW (dialog->window), toplevel); @@ -481,9 +477,6 @@ repopulate_dialog (struct val_labs_dialog *dialog) GtkTreeIter iter; - PsppireVarStore *var_store = - PSPPIRE_VAR_STORE (psppire_sheet_get_model (dialog->vs)); - GtkListStore *list_store = gtk_list_store_new (2, G_TYPE_STRING, G_TYPE_DOUBLE); @@ -508,16 +501,11 @@ repopulate_dialog (struct val_labs_dialog *dialog) const struct val_lab *vl = labels[i]; gchar *const vstr = - value_to_text (vl->value, + value_to_text (vl->value, dialog->var_store->dict, *var_get_write_format (dialog->pv)); - gchar *labeltext = - recode_string (UTF8, - psppire_dict_encoding (var_store->dict), - val_lab_get_label (vl), -1); - gchar *const text = g_strdup_printf ("%s = \"%s\"", - vstr, labeltext); + vstr, val_lab_get_label (vl)); gtk_list_store_append (list_store, &iter); gtk_list_store_set (list_store, &iter, @@ -525,7 +513,6 @@ repopulate_dialog (struct val_labs_dialog *dialog) 1, vl->value.f, -1); - g_free (labeltext); g_free (text); g_free (vstr); } diff --git a/src/ui/gui/val-labs-dialog.h b/src/ui/gui/val-labs-dialog.h index 3a09f1ca..745e0a0a 100644 --- a/src/ui/gui/val-labs-dialog.h +++ b/src/ui/gui/val-labs-dialog.h @@ -24,13 +24,13 @@ #include #include -#include - +//#include +#include "psppire-var-store.h" struct val_labs; -struct val_labs_dialog * val_labs_dialog_create (GtkWindow *, PsppireSheet *); +struct val_labs_dialog * val_labs_dialog_create (GtkWindow *, PsppireVarStore *); void val_labs_dialog_show (struct val_labs_dialog *); diff --git a/src/ui/gui/var-display.c b/src/ui/gui/var-display.c index b615fe52..7081e9e2 100644 --- a/src/ui/gui/var-display.c +++ b/src/ui/gui/var-display.c @@ -45,15 +45,14 @@ missing_values_to_string (const PsppireDict *dict, const struct variable *pv, GE gint i; for (i = 0 ; i < n; ++i ) { - mv[i] = value_to_text (*mv_get_value (miss, i), *fmt); + mv[i] = value_to_text (*mv_get_value (miss, i), dict, *fmt); if ( i > 0 ) g_string_append (gstr, ", "); g_string_append (gstr, mv[i]); g_free (mv[i]); } - s = recode_string (UTF8, psppire_dict_encoding (dict), - gstr->str, gstr->len); - g_string_free (gstr, TRUE); + s = gstr->str; + g_string_free (gstr, FALSE); } else { @@ -62,8 +61,8 @@ missing_values_to_string (const PsppireDict *dict, const struct variable *pv, GE union value low, high; mv_get_range (miss, &low.f, &high.f); - l = value_to_text (low, *fmt); - h = value_to_text (high, *fmt); + l = value_to_text (low, dict, *fmt); + h = value_to_text (high, dict,*fmt); g_string_printf (gstr, "%s - %s", l, h); g_free (l); @@ -73,15 +72,14 @@ missing_values_to_string (const PsppireDict *dict, const struct variable *pv, GE { gchar *ss = 0; - ss = value_to_text (*mv_get_value (miss, 0), *fmt); + ss = value_to_text (*mv_get_value (miss, 0), dict, *fmt); g_string_append (gstr, ", "); g_string_append (gstr, ss); free (ss); } - s = recode_string (UTF8, psppire_dict_encoding (dict), - gstr->str, gstr->len); - g_string_free (gstr, TRUE); + s = gstr->str; + g_string_free (gstr, FALSE); } return s; diff --git a/src/ui/gui/var-type-dialog.c b/src/ui/gui/var-type-dialog.c index c433bf3b..05ba5d3c 100644 --- a/src/ui/gui/var-type-dialog.c +++ b/src/ui/gui/var-type-dialog.c @@ -262,12 +262,12 @@ preview_custom (GtkWidget *w, gpointer data) union value v; v.f = 1234.56; - sample_text = value_to_text (v, dialog->fmt_l); + sample_text = value_to_text (v, NULL, dialog->fmt_l); gtk_label_set_text (GTK_LABEL (dialog->label_psample), sample_text); g_free (sample_text); v.f = -v.f; - sample_text = value_to_text (v, dialog->fmt_l); + sample_text = value_to_text (v, NULL, dialog->fmt_l); gtk_label_set_text (GTK_LABEL (dialog->label_nsample), sample_text); g_free (sample_text); } diff --git a/src/ui/gui/variable-info-dialog.c b/src/ui/gui/variable-info-dialog.c index fa9d5101..178a834c 100644 --- a/src/ui/gui/variable-info-dialog.c +++ b/src/ui/gui/variable-info-dialog.c @@ -40,26 +40,14 @@ static const gchar none[] = N_("None"); -static gchar * -name_to_string (const struct variable *var, PsppireDict *dict) -{ - const char *name = var_get_name (var); - g_assert (name); - - return recode_string (UTF8, psppire_dict_encoding (dict), - name, -1); -} - - -static gchar * -label_to_string (const struct variable *var, PsppireDict *dict) +static const gchar * +label_to_string (const struct variable *var) { const char *label = var_get_label (var); - if (! label) return g_strdup (none); + if (NULL == label) return g_strdup (none); - return recode_string (UTF8, psppire_dict_encoding (dict), - label, -1); + return label; } @@ -82,16 +70,11 @@ populate_text (PsppireDictView *treeview, gpointer data) NULL); gstring = g_string_sized_new (200); - text = name_to_string (var, dict); - g_string_assign (gstring, text); - g_free (text); + g_string_assign (gstring, var_get_name (var)); g_string_append (gstring, "\n"); - text = label_to_string (var, dict); - g_string_append_printf (gstring, _("Label: %s\n"), text); - g_free (text); - + g_string_append_printf (gstring, _("Label: %s\n"), label_to_string (var)); { const struct fmt_spec *fmt = var_get_print_format (var); char buffer[FMT_STRING_LEN_MAX + 1]; @@ -130,14 +113,10 @@ populate_text (PsppireDictView *treeview, gpointer data) { const struct val_lab *vl = labels[i]; gchar *const vstr = - value_to_text (vl->value, *var_get_print_format (var)); - - text = recode_string (UTF8, psppire_dict_encoding (dict), - val_lab_get_label (vl), -1); + value_to_text (vl->value, dict, *var_get_print_format (var)); - g_string_append_printf (gstring, _("%s %s\n"), vstr, text); + g_string_append_printf (gstring, _("%s %s\n"), vstr, val_lab_get_label (vl)); - g_free (text); g_free (vstr); } free (labels); diff --git a/src/ui/syntax-gen.c b/src/ui/syntax-gen.c index bf1ee12f..22e717ac 100644 --- a/src/ui/syntax-gen.c +++ b/src/ui/syntax-gen.c @@ -146,20 +146,23 @@ syntax_gen_number (struct string *output, & (FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))) { union value v_in, v_out; - char buffer[FMT_MAX_NUMERIC_WIDTH]; + char *s; bool ok; v_in.f = number; - data_out (&v_in, format, buffer); + s = data_out (&v_in, "FIXME", format); msg_disable (); - ok = data_in (ss_buffer (buffer, format->w), LEGACY_NATIVE, - format->type, false, 0, 0, &v_out, 0); + /* FIXME: UTF8 encoded strings will fail here */ + ok = data_in (ss_cstr (s), LEGACY_NATIVE, + format->type, false, 0, 0, NULL, &v_out, 0); msg_enable (); if (ok && v_out.f == number) { - syntax_gen_string (output, ss_buffer (buffer, format->w)); + syntax_gen_string (output, ss_cstr (s)); + free (s); return; } + free (s); } if (number == SYSMIS) diff --git a/tests/data/datasheet-test.c b/tests/data/datasheet-test.c index a9f4bf2f..c46c2683 100644 --- a/tests/data/datasheet-test.c +++ b/tests/data/datasheet-test.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -404,7 +405,7 @@ value_from_param (union value *value, int width, unsigned int idx) else { unsigned int hash = hash_int (idx, 0); - char *string = value_str_rw (value, width); + uint8_t *string = value_str_rw (value, width); int offset; assert (width < 32);