From: Ben Pfaff Date: Mon, 30 Sep 2013 01:55:38 +0000 (-0700) Subject: missing-values: Make mv_add_str() easier to use. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=4aa11074dae28b2026f6633295bd72db4dd51d1b;p=pspp missing-values: Make mv_add_str() easier to use. Both existing callers of mv_add_str() were applying their own padding to the string value. This commit moves the padding into mv_add_str() itself. This commit also fixes a minor to-do item in the MISSING VALUES command whereby truncation of too-long missing values was done bytewise rather than on the basis of full characters. --- diff --git a/src/data/missing-values.c b/src/data/missing-values.c index 0fc9d01af0..9aba57e0e0 100644 --- a/src/data/missing-values.c +++ b/src/data/missing-values.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005, 2009, 2011 Free Software Foundation, Inc. + Copyright (C) 2005, 2009, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -23,6 +23,7 @@ #include "data/variable.h" #include "libpspp/assertion.h" +#include "libpspp/cast.h" #include "libpspp/str.h" /* Types of user-missing values. @@ -158,19 +159,23 @@ mv_add_value (struct missing_values *mv, const union value *v) NOT_REACHED (); } -/* Attempts to add S to the set of string missing values MV. S - must contain exactly as many characters as MV's width. - Returns true if successful, false if MV has no more room for +/* Attempts to add S, which is LEN bytes long, to the set of string missing + values MV. Returns true if successful, false if MV has no more room for missing values or if S is not an acceptable missing value. */ bool -mv_add_str (struct missing_values *mv, const uint8_t s[]) +mv_add_str (struct missing_values *mv, const uint8_t s[], size_t len) { union value v; bool ok; assert (mv->width > 0); + while (len > mv->width) + if (s[--len] != ' ') + return false; + value_init (&v, mv->width); - memcpy (value_str_rw (&v, mv->width), s, mv->width); + buf_copy_rpad (CHAR_CAST (char *, value_str_rw (&v, mv->width)), mv->width, + CHAR_CAST (char *, s), len, ' '); ok = mv_add_value (mv, &v); value_destroy (&v, mv->width); diff --git a/src/data/missing-values.h b/src/data/missing-values.h index 4d046faec3..511ebd7ddd 100644 --- a/src/data/missing-values.h +++ b/src/data/missing-values.h @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2005, 2009 Free Software Foundation, Inc. + Copyright (C) 2005, 2009, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -94,7 +94,7 @@ void mv_get_range (const struct missing_values *, double *low, double *high); /* Adding and modifying discrete values. */ bool mv_add_value (struct missing_values *, const union value *); -bool mv_add_str (struct missing_values *, const uint8_t[]); +bool mv_add_str (struct missing_values *, const uint8_t[], size_t len); bool mv_add_num (struct missing_values *, double); void mv_pop_value (struct missing_values *, union value *); bool mv_replace_value (struct missing_values *, const union value *, int idx); diff --git a/src/data/sys-file-reader.c b/src/data/sys-file-reader.c index 681e6f531b..c416249872 100644 --- a/src/data/sys-file-reader.c +++ b/src/data/sys-file-reader.c @@ -1085,11 +1085,7 @@ parse_variable_records (struct sfm_reader *r, struct dictionary *dict, value_init_pool (r->pool, &value, width); value_set_missing (&value, width); for (i = 0; i < rec->missing_value_code; i++) - { - uint8_t *s = value_str_rw (&value, width); - memcpy (s, rec->missing + 8 * i, MIN (width, 8)); - mv_add_str (&mv, s); - } + mv_add_str (&mv, rec->missing + 8 * i, MIN (width, 8)); } var_set_missing_values (var, &mv); } diff --git a/src/language/dictionary/missing-values.c b/src/language/dictionary/missing-values.c index a906b8a8f3..a369c2ea69 100644 --- a/src/language/dictionary/missing-values.c +++ b/src/language/dictionary/missing-values.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2013 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -96,12 +96,16 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) } else { + const char *encoding = dict_get_encoding (dict); + mv_init (&mv, MV_MAX_STRING); while (!lex_match (lexer, T_RPAREN)) { - uint8_t value[MV_MAX_STRING]; - char *dict_mv; - size_t length; + const char *utf8_s; + size_t utf8_trunc_len; + size_t utf8_len; + + char *raw_s; if (!lex_force_string (lexer)) { @@ -109,24 +113,24 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) break; } - dict_mv = recode_string (dict_get_encoding (dict), "UTF-8", - lex_tokcstr (lexer), - ss_length (lex_tokss (lexer))); - length = strlen (dict_mv); - if (length > MV_MAX_STRING) - { - /* XXX truncate graphemes not bytes */ - msg (SE, _("Truncating missing value to maximum " - "acceptable length (%d bytes)."), - MV_MAX_STRING); - length = MV_MAX_STRING; - } - memset (value, ' ', MV_MAX_STRING); - memcpy (value, dict_mv, length); - free (dict_mv); - - if (!mv_add_str (&mv, value)) + /* Truncate the string to fit in 8 bytes in the dictionary + encoding. */ + utf8_s = lex_tokcstr (lexer); + utf8_len = ss_length (lex_tokss (lexer)); + utf8_trunc_len = utf8_encoding_trunc_len (utf8_s, encoding, + MV_MAX_STRING); + if (utf8_trunc_len < utf8_len) + msg (SE, _("Truncating missing value to maximum " + "acceptable length (%d bytes)."), + MV_MAX_STRING); + + /* Recode to dictionary encoding and add. */ + raw_s = recode_string (encoding, "UTF-8", + utf8_s, utf8_trunc_len); + if (!mv_add_str (&mv, CHAR_CAST (const uint8_t *, raw_s), + strlen (raw_s))) ok = false; + free (raw_s); lex_get (lexer); lex_match (lexer, T_COMMA);