From: John Darrington Date: Tue, 7 Jul 2009 16:50:57 +0000 (+0800) Subject: Change union value type to contain uint8_t types instead of char. X-Git-Tag: build37~53^2~12 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=commitdiff_plain;h=8830c95bb9e8d72621787866141a27fc22e8c786 Change union value type to contain uint8_t types instead of char. Make the members of the union value type in src/data/value.h be uint8_t instead of char. This is more logical since the contents of values cannot be considered "strings" until they have been formatted. The unformatted values are merely arrays of bytes. This has the added advantage of provoking compiler warnings when a char * type is being implicitly cast to a uint8_t * or vici-versa. When such a warning is encountered, is probably means that the data needs to be re-encoded using recode_string. --- diff --git a/Smake b/Smake index 78789d4f..201474ce 100644 --- a/Smake +++ b/Smake @@ -76,6 +76,7 @@ GNULIB_MODULES = \ trunc \ unilbrk/ulc-width-linebreaks \ unistd \ + unistr/u8-strlen \ unlocked-io \ vasprintf-posix \ vfprintf-posix \ diff --git a/src/data/case.c b/src/data/case.c index a4a78dd0..dc402926 100644 --- a/src/data/case.c +++ b/src/data/case.c @@ -308,7 +308,7 @@ case_num_idx (const struct ccase *c, size_t idx) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -const char * +const uint8_t * case_str (const struct ccase *c, const struct variable *v) { size_t idx = var_get_case_index (v); @@ -321,7 +321,7 @@ case_str (const struct ccase *c, const struct variable *v) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -const char * +const uint8_t * case_str_idx (const struct ccase *c, size_t idx) { assert (idx < c->proto->n_widths); @@ -336,7 +336,7 @@ case_str_idx (const struct ccase *c, size_t idx) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -char * +uint8_t * case_str_rw (struct ccase *c, const struct variable *v) { size_t idx = var_get_case_index (v); @@ -352,7 +352,7 @@ case_str_rw (struct ccase *c, const struct variable *v) Like the strings embedded in all "union value"s, the return value is not null-terminated. */ -char * +uint8_t * case_str_rw_idx (struct ccase *c, size_t idx) { assert (idx < c->proto->n_widths); diff --git a/src/data/case.h b/src/data/case.h index 36feb15f..0bfc62cd 100644 --- a/src/data/case.h +++ b/src/data/case.h @@ -94,10 +94,10 @@ union value *case_data_rw_idx (struct ccase *, size_t idx); double case_num (const struct ccase *, const struct variable *); double case_num_idx (const struct ccase *, size_t idx); -const char *case_str (const struct ccase *, const struct variable *); -const char *case_str_idx (const struct ccase *, size_t idx); -char *case_str_rw (struct ccase *, const struct variable *); -char *case_str_rw_idx (struct ccase *, size_t idx); +const uint8_t *case_str (const struct ccase *, const struct variable *); +const uint8_t *case_str_idx (const struct ccase *, size_t idx); +uint8_t *case_str_rw (struct ccase *, const struct variable *); +uint8_t *case_str_rw_idx (struct ccase *, size_t idx); int case_compare (const struct ccase *, const struct ccase *, const struct variable *const *, size_t n_vars); diff --git a/src/data/data-in.c b/src/data/data-in.c index 4666a173..7e7d087d 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -612,7 +612,7 @@ parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - char *dst = value_str_rw (i->output, i->width); + uint8_t *dst = value_str_rw (i->output, i->width); size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); @@ -630,7 +630,7 @@ parse_A (struct data_in *i) static bool parse_AHEX (struct data_in *i) { - char *s = value_str_rw (i->output, i->width); + uint8_t *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) diff --git a/src/data/missing-values.c b/src/data/missing-values.c index c1a74691..61bb9bcb 100644 --- a/src/data/missing-values.c +++ b/src/data/missing-values.c @@ -160,7 +160,7 @@ mv_add_value (struct missing_values *mv, const union value *v) Returns true if successful, false if MV has no more room for missing values or if S is not an acceptable missing value. */ bool -mv_add_str (struct missing_values *mv, const char s[]) +mv_add_str (struct missing_values *mv, const uint8_t s[]) { union value v; bool ok; @@ -404,7 +404,7 @@ is_num_user_missing (const struct missing_values *mv, double d) MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ static bool -is_str_user_missing (const struct missing_values *mv, const char s[]) +is_str_user_missing (const struct missing_values *mv, const uint8_t s[]) { const union value *v = mv->values; assert (mv->width > 0); @@ -456,7 +456,7 @@ mv_is_num_missing (const struct missing_values *mv, double d, MV must be a set of string missing values. S[] must contain exactly as many characters as MV's width. */ bool -mv_is_str_missing (const struct missing_values *mv, const char s[], +mv_is_str_missing (const struct missing_values *mv, const uint8_t s[], enum mv_class class) { assert (mv->width > 0); diff --git a/src/data/missing-values.h b/src/data/missing-values.h index 5576fc6c..4d046fae 100644 --- a/src/data/missing-values.h +++ b/src/data/missing-values.h @@ -64,7 +64,7 @@ enum mv_class bool mv_is_value_missing (const struct missing_values *, const union value *, enum mv_class); bool mv_is_num_missing (const struct missing_values *, double, enum mv_class); -bool mv_is_str_missing (const struct missing_values *, const char[], +bool mv_is_str_missing (const struct missing_values *, const uint8_t[], enum mv_class); /* Initializing missing value sets. */ @@ -94,7 +94,7 @@ void mv_get_range (const struct missing_values *, double *low, double *high); /* Adding and modifying discrete values. */ bool mv_add_value (struct missing_values *, const union value *); -bool mv_add_str (struct missing_values *, const char[]); +bool mv_add_str (struct missing_values *, const uint8_t[]); bool mv_add_num (struct missing_values *, double); void mv_pop_value (struct missing_values *, union value *); bool mv_replace_value (struct missing_values *, const union value *, int idx); diff --git a/src/data/por-file-reader.c b/src/data/por-file-reader.c index 461796bf..cd8b213e 100644 --- a/src/data/por-file-reader.c +++ b/src/data/por-file-reader.c @@ -447,6 +447,28 @@ read_string (struct pfm_reader *r, char *buf) *buf = '\0'; } + +/* Reads a string into BUF, which must have room for 256 + characters. + Returns the number of bytes read. +*/ +static size_t +read_bytes (struct pfm_reader *r, uint8_t *buf) +{ + int n = read_int (r); + if (n < 0 || n > 255) + error (r, _("Bad string length %d."), n); + + while (n-- > 0) + { + *buf++ = r->cc; + advance (r); + } + return n; +} + + + /* Reads a string and returns a copy of it allocated from R's pool. */ static char * @@ -739,9 +761,9 @@ parse_value (struct pfm_reader *r, int width, union value *v) value_init (v, width); if (width > 0) { - char string[256]; - read_string (r, string); - value_copy_str_rpad (v, width, string, ' '); + uint8_t buf[256]; + size_t n_bytes = read_bytes (r, buf); + value_copy_buf_rpad (v, width, buf, n_bytes, ' '); } else v->f = read_float (r); @@ -844,9 +866,9 @@ por_file_casereader_read (struct casereader *reader, void *r_) case_data_rw_idx (c, i)->f = read_float (r); else { - char string[256]; - read_string (r, string); - buf_copy_str_rpad (case_str_rw_idx (c, i), width, string, ' '); + uint8_t buf[256]; + size_t n_bytes = read_bytes (r, buf); + u8_buf_copy_rpad (case_str_rw_idx (c, i), width, buf, n_bytes, ' '); } } diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 141fd881..5850887f 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -632,7 +632,7 @@ read_variable_record (struct sfm_reader *r, struct dictionary *dict, value_set_missing (&value, mv_width); for (i = 0; i < missing_value_code; i++) { - char *s = value_str_rw (&value, mv_width); + uint8_t *s = value_str_rw (&value, mv_width); read_bytes (r, s, 8); mv_add_str (&mv, s); } @@ -1183,7 +1183,7 @@ read_value_labels (struct sfm_reader *r, struct label { - char raw_value[8]; /* Value as uninterpreted bytes. */ + uint8_t raw_value[8]; /* Value as uninterpreted bytes. */ union value value; /* Value. */ char *label; /* Null-terminated label string. */ }; @@ -1281,7 +1281,7 @@ read_value_labels (struct sfm_reader *r, value_init_pool (subpool, &label->value, max_width); if (var_is_alpha (var[0])) - buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, + u8_buf_copy_rpad (value_str_rw (&label->value, max_width), max_width, label->raw_value, sizeof label->raw_value, ' '); else label->value.f = float_get_double (r->float_format, label->raw_value); @@ -1461,7 +1461,7 @@ read_long_string_value_labels (struct sfm_reader *r, /* Read value. */ value_length = read_int (r); if (value_length == width) - read_string (r, value_str_rw (&value, width), width + 1); + read_bytes (r, value_str_rw (&value, width), width); else { sys_warn (r, _("Ignoring long string value %zu for variable %s, " @@ -1518,11 +1518,11 @@ static void partial_record (struct sfm_reader *r) static void read_error (struct casereader *, const struct sfm_reader *); static bool read_case_number (struct sfm_reader *, double *); -static bool read_case_string (struct sfm_reader *, char *, size_t); +static bool read_case_string (struct sfm_reader *, uint8_t *, size_t); static int read_opcode (struct sfm_reader *); static bool read_compressed_number (struct sfm_reader *, double *); -static bool read_compressed_string (struct sfm_reader *, char *); -static bool read_whole_strings (struct sfm_reader *, char *, size_t); +static bool read_compressed_string (struct sfm_reader *, uint8_t *); +static bool read_whole_strings (struct sfm_reader *, uint8_t *, size_t); static bool skip_whole_strings (struct sfm_reader *, size_t); /* Reads and returns one case from READER's file. Returns a null @@ -1557,7 +1557,7 @@ sys_file_casereader_read (struct casereader *reader, void *r_) } else { - char *s = value_str_rw (v, sv->var_width); + uint8_t *s = value_str_rw (v, sv->var_width); if (!read_case_string (r, s + sv->offset, sv->segment_width)) goto eof; if (!skip_whole_strings (r, ROUND_DOWN (sv->padding, 8))) @@ -1619,7 +1619,7 @@ read_case_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_case_string (struct sfm_reader *r, char *s, size_t length) +read_case_string (struct sfm_reader *r, uint8_t *s, size_t length) { size_t whole = ROUND_DOWN (length, 8); size_t partial = length % 8; @@ -1632,7 +1632,7 @@ read_case_string (struct sfm_reader *r, char *s, size_t length) if (partial) { - char bounce[8]; + uint8_t bounce[8]; if (!read_whole_strings (r, bounce, sizeof bounce)) { if (whole) @@ -1703,7 +1703,7 @@ read_compressed_number (struct sfm_reader *r, double *d) Returns true if successful, false if end of file is reached immediately. */ static bool -read_compressed_string (struct sfm_reader *r, char *dst) +read_compressed_string (struct sfm_reader *r, uint8_t *dst) { switch (read_opcode (r)) { @@ -1732,7 +1732,7 @@ read_compressed_string (struct sfm_reader *r, char *dst) Returns true if successful, false if end of file is reached immediately. */ static bool -read_whole_strings (struct sfm_reader *r, char *s, size_t length) +read_whole_strings (struct sfm_reader *r, uint8_t *s, size_t length) { assert (length % 8 == 0); if (!r->compressed) @@ -1760,7 +1760,7 @@ read_whole_strings (struct sfm_reader *r, char *s, size_t length) static bool skip_whole_strings (struct sfm_reader *r, size_t length) { - char buffer[1024]; + uint8_t buffer[1024]; assert (length < sizeof buffer); return read_whole_strings (r, buffer, length); } diff --git a/src/data/value.c b/src/data/value.c index ce050c01..6dbecb11 100644 --- a/src/data/value.c +++ b/src/data/value.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "minmax.h" #include "xalloc.h" @@ -44,7 +45,7 @@ value_copy_rpad (union value *dst, int dst_width, const union value *src, int src_width, char pad) { - buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, + u8_buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, value_str (src, src_width), src_width, pad); } @@ -62,10 +63,10 @@ value_copy_rpad (union value *dst, int dst_width, DST was initialized. Passing, e.g., a smaller value in order to modify only a prefix of DST will not work in every case. */ void -value_copy_str_rpad (union value *dst, int dst_width, const char *src, +value_copy_str_rpad (union value *dst, int dst_width, const uint8_t *src, char pad) { - value_copy_buf_rpad (dst, dst_width, src, strlen (src), pad); + value_copy_buf_rpad (dst, dst_width, src, u8_strlen (src), pad); } /* Copies the SRC_LEN bytes at SRC to string value DST with width @@ -81,9 +82,9 @@ value_copy_str_rpad (union value *dst, int dst_width, const char *src, to modify only a prefix of DST will not work in every case. */ void value_copy_buf_rpad (union value *dst, int dst_width, - const char *src, size_t src_len, char pad) + const uint8_t *src, size_t src_len, char pad) { - buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad); + u8_buf_copy_rpad (value_str_rw (dst, dst_width), dst_width, src, src_len, pad); } /* Sets V to the system-missing value for data of the given @@ -145,7 +146,7 @@ value_is_resizable (const union value *value, int old_width, int new_width) return false; else { - const char *str = value_str (value, old_width); + const uint8_t *str = value_str (value, old_width); int i; for (i = new_width; i < old_width; i++) @@ -225,7 +226,7 @@ value_resize_pool (struct pool *pool, union value *value, { if (new_width > MAX_SHORT_STRING) { - char *new_long_string = pool_alloc_unaligned (pool, new_width); + uint8_t *new_long_string = pool_alloc_unaligned (pool, new_width); memcpy (new_long_string, value_str (value, old_width), old_width); value->long_string = new_long_string; } diff --git a/src/data/value.h b/src/data/value.h index 905f0823..84f08d87 100644 --- a/src/data/value.h +++ b/src/data/value.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include "xalloc.h" @@ -45,8 +46,8 @@ union value { double f; - char short_string[MAX_SHORT_STRING]; - char *long_string; + uint8_t short_string[MAX_SHORT_STRING]; + uint8_t *long_string; }; static inline void value_init (union value *, int width); @@ -55,8 +56,8 @@ static inline bool value_try_init (union value *, int width); static inline void value_destroy (union value *, int width); static inline double value_num (const union value *); -static inline const char *value_str (const union value *, int width); -static inline char *value_str_rw (union value *, int width); +static inline const uint8_t *value_str (const union value *, int width); +static inline uint8_t *value_str_rw (union value *, int width); int compare_values (const void *, const void *, const void *var); unsigned hash_value (const void *, const void *var); @@ -65,10 +66,10 @@ static inline void value_copy (union value *, const union value *, int width); void value_copy_rpad (union value *, int dst_width, const union value *, int src_width, char pad); -void value_copy_str_rpad (union value *, int dst_width, const char *, +void value_copy_str_rpad (union value *, int dst_width, const uint8_t *, char pad); void value_copy_buf_rpad (union value *dst, int dst_width, - const char *src, size_t src_len, char pad); + const uint8_t *src, size_t src_len, char pad); void value_set_missing (union value *, int width); int value_compare_3way (const union value *, const union value *, int width); bool value_equal (const union value *, const union value *, int width); @@ -150,7 +151,7 @@ value_num (const union value *v) It is important that WIDTH be the actual value that was passed to value_init. Passing, e.g., a smaller value because only that number of bytes will be accessed will not always work. */ -static inline const char * +static inline const uint8_t * value_str (const union value *v, int width) { assert (width > 0); @@ -164,7 +165,7 @@ value_str (const union value *v, int width) It is important that WIDTH be the actual value that was passed to value_init. Passing, e.g., a smaller value because only that number of bytes will be accessed will not always work. */ -static inline char * +static inline uint8_t * value_str_rw (union value *v, int width) { assert (width > 0); diff --git a/src/data/variable.c b/src/data/variable.c index 0b5ca26c..d1e30864 100644 --- a/src/data/variable.c +++ b/src/data/variable.c @@ -494,7 +494,7 @@ var_is_num_missing (const struct variable *v, double d, enum mv_class class) S[] must contain exactly as many characters as V's width. V must be a string variable. */ bool -var_is_str_missing (const struct variable *v, const char s[], +var_is_str_missing (const struct variable *v, const uint8_t s[], enum mv_class class) { return mv_is_str_missing (&v->miss, s, class); diff --git a/src/data/variable.h b/src/data/variable.h index 5d28d5b8..0b619a49 100644 --- a/src/data/variable.h +++ b/src/data/variable.h @@ -69,7 +69,7 @@ bool var_has_missing_values (const struct variable *); bool var_is_value_missing (const struct variable *, const union value *, enum mv_class); bool var_is_num_missing (const struct variable *, double, enum mv_class); -bool var_is_str_missing (const struct variable *, const char[], enum mv_class); +bool var_is_str_missing (const struct variable *, const uint8_t[], enum mv_class); /* Value labels. */ const char *var_lookup_value_label (const struct variable *, diff --git a/src/language/dictionary/missing-values.c b/src/language/dictionary/missing-values.c index aa3ce698..819b0a90 100644 --- a/src/language/dictionary/missing-values.c +++ b/src/language/dictionary/missing-values.c @@ -101,7 +101,7 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) mv_init (&mv, MV_MAX_STRING); while (!lex_match (lexer, ')')) { - char value[MV_MAX_STRING]; + uint8_t value[MV_MAX_STRING]; size_t length; if (!lex_force_string (lexer)) diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index 60c8f408..e8a382a3 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -598,7 +598,7 @@ find_src_numeric (struct recode_trns *trns, double value, const struct variable /* Returns the output mapping in TRNS for an input of VALUE with the given WIDTH, or a null pointer if there is no mapping. */ static const struct map_out * -find_src_string (struct recode_trns *trns, const char *value, +find_src_string (struct recode_trns *trns, const uint8_t *value, const struct variable *src_var) { struct mapping *m; diff --git a/src/libpspp/str.c b/src/libpspp/str.c index ccd7739c..afe32de9 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -1441,3 +1441,25 @@ ds_relocate (struct string *st) free ((char *) rel); } } + + + + +/* Operations on uint8_t "strings" */ + +/* Copies buffer SRC, of SRC_SIZE bytes, to DST, of DST_SIZE bytes. + DST is truncated to DST_SIZE bytes or padded on the right with + copies of PAD as needed. */ +void +u8_buf_copy_rpad (uint8_t *dst, size_t dst_size, + const uint8_t *src, size_t src_size, + char pad) +{ + if (src_size >= dst_size) + memmove (dst, src, dst_size); + else + { + memmove (dst, src, src_size); + memset (&dst[src_size], pad, dst_size - src_size); + } +} diff --git a/src/libpspp/str.h b/src/libpspp/str.h index b9be394c..a134079f 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -223,4 +224,10 @@ char *ds_put_uninit (struct string *st, size_t incr); /* calls relocate from gnulib on ST */ void ds_relocate (struct string *st); + +void u8_buf_copy_rpad (uint8_t *dst, size_t dst_size, + const uint8_t *src, size_t src_size, + char pad); + + #endif /* str_h */ diff --git a/src/math/interaction.c b/src/math/interaction.c index 556123d4..cd151097 100644 --- a/src/math/interaction.c +++ b/src/math/interaction.c @@ -149,7 +149,7 @@ interaction_value_create (const struct interaction_variable *var, const union va if (var != NULL) { int val_width = 1; - char *val; + uint8_t *val; result = xmalloc (sizeof (*result)); result->intr = var;