X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=b8226b37f926601dd1ed04f0ce368a242a5346ff;hb=00129e248ab4ce96af72d380d8a0ccfad2f5d776;hp=2b842b8dcd93eb71fd5324eda0c4d7bd7c8c5c8c;hpb=6549c9aced0949b16abb372257772fc0893e02bb;p=pspp diff --git a/src/data/data-in.c b/src/data/data-in.c index 2b842b8dcd..b8226b37f9 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -20,34 +20,34 @@ #include #include +#include #include #include +#include #include #include #include #include -#include -#include #include "calendar.h" +#include "dictionary.h" +#include "format.h" #include "identifier.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/integer-format.h" +#include "libpspp/legacy-encoding.h" +#include "libpspp/message.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" #include "settings.h" #include "value.h" -#include "format.h" -#include "dictionary.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "c-ctype.h" -#include "c-strtod.h" -#include "minmax.h" -#include "xalloc.h" +#include "gl/c-ctype.h" +#include "gl/c-strtod.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -55,10 +55,8 @@ /* Information about parsing one data field. */ struct data_in { - const char *src_enc; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ - int implied_decimals; /* Number of implied decimal places. */ union value *output; /* Destination. */ int width; /* Output width. */ @@ -67,8 +65,6 @@ struct data_in int last_column; /* Last column. */ }; - - typedef bool data_in_parser_func (struct data_in *); #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ static data_in_parser_func parse_##METHOD; @@ -77,7 +73,6 @@ typedef bool data_in_parser_func (struct data_in *); static void data_warning (const struct data_in *, const char *, ...) PRINTF_FORMAT (2, 3); -static void apply_implied_decimals (struct data_in *); static void default_result (struct data_in *); static bool trim_spaces_and_check_missing (struct data_in *); @@ -90,24 +85,11 @@ static int hexit_value (int c); otherwise the string width). Iff FORMAT is a string format, then DICT must be a pointer to the dictionary associated with OUTPUT. Otherwise, DICT - may be null. - - If no decimal point is included in a numeric format, then - IMPLIED_DECIMALS decimal places are implied. Specify 0 if no - decimal places should be implied. - - If FIRST_COLUMN and LAST_COLUMN are nonzero, then they should - be the 1-based column number of the first and - one-past-the-last-character in INPUT, for use in error - messages. (LAST_COLUMN cannot always be calculated from - FIRST_COLUMN plus the length of the input because of the - possibility of escaped quotes in strings, etc.) */ + may be null. */ bool data_in (struct substring input, const char *encoding, - enum fmt_type format, int implied_decimals, - int first_column, int last_column, - const struct dictionary *dict, - union value *output, int width) + enum fmt_type format, int first_column, int last_column, + const struct dictionary *dict, union value *output, int width) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -117,20 +99,20 @@ data_in (struct substring input, const char *encoding, struct data_in i; - char *s = NULL; + enum fmt_category cat; + const char *dest_encoding; + char *s; bool ok; assert ((width != 0) == fmt_is_string (format)); i.format = format; - i.implied_decimals = implied_decimals; i.output = output; i.width = width; i.first_column = first_column; i.last_column = last_column; - i.src_enc = encoding; if (ss_is_empty (input)) { @@ -138,24 +120,45 @@ data_in (struct substring input, const char *encoding, return true; } - if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY)) + cat = fmt_get_category (format); + if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL + | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)) { - i.input = input; + /* We're going to parse these into numbers. For this purpose we want to + deal with them in the local "C" encoding. Any character not in that + encoding wouldn't be valid anyhow. */ + dest_encoding = LEGACY_NATIVE; + } + else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY)) + { + /* Don't recode these binary formats at all, since they are not text. */ + dest_encoding = NULL; } else { - const char *dest_encoding; - - if ( dict == NULL) - { - assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))); - dest_encoding = LEGACY_NATIVE; - } + assert (cat == FMT_CAT_STRING); + if (format == FMT_AHEX) + { + /* We want the hex digits in the local "C" encoding, even though the + result may not be in that encoding. */ + dest_encoding = LEGACY_NATIVE; + } else - dest_encoding = dict_get_encoding (dict); + { + /* Use the final output encoding. */ + dest_encoding = dict_get_encoding (dict); + } + } - s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input)); - i.input = ss_cstr (s); + if (dest_encoding != NULL) + { + i.input = recode_substring_pool (dest_encoding, encoding, input, NULL); + s = i.input.string; + } + else + { + i.input = input; + s = NULL; } ok = handlers[i.format] (&i); @@ -163,9 +166,102 @@ data_in (struct substring input, const char *encoding, default_result (&i); free (s); + return ok; } +static bool +number_has_implied_decimals (const char *s, enum fmt_type type) +{ + int decimal = settings_get_style (type)->decimal; + bool got_digit = false; + for (;;) + { + switch (*s) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + got_digit = true; + break; + + case '+': case '-': + if (got_digit) + return false; + break; + + case 'e': case 'E': case 'd': case 'D': + return false; + + case '.': case ',': + if (*s == decimal) + return false; + break; + + case '\0': + return true; + + default: + break; + } + + s++; + } +} + +static bool +has_implied_decimals (struct substring input, const char *encoding, + enum fmt_type format) +{ + bool retval; + char *s; + + switch (format) + { + case FMT_F: + case FMT_COMMA: + case FMT_DOT: + case FMT_DOLLAR: + case FMT_PCT: + case FMT_E: + case FMT_Z: + break; + + case FMT_N: + case FMT_IB: + case FMT_PIB: + case FMT_P: + case FMT_PK: + return true; + + default: + return false; + } + + s = recode_string (LEGACY_NATIVE, encoding, + ss_data (input), ss_length (input)); + retval = (format == FMT_Z + ? strchr (s, '.') == NULL + : number_has_implied_decimals (s, format)); + free (s); + + return retval; +} + +/* In some cases, when no decimal point is explicitly included in numeric + input, its position is implied by the number of decimal places in the input + format. In such a case, this function may be called just after data_in(). + Its arguments are a subset of that function's arguments plus D, the number + of decimal places associated with FORMAT. + + If it is appropriate, this function modifies the numeric value in OUTPUT. */ +void +data_in_imply_decimals (struct substring input, const char *encoding, + enum fmt_type format, int d, union value *output) +{ + if (d > 0 && output->f != SYSMIS + && has_implied_decimals (input, encoding, format)) + output->f /= pow (10., d); +} /* Format parsers. */ @@ -303,8 +399,6 @@ parse_number (struct data_in *i) else { errno = save_errno; - if (!explicit_decimals) - apply_implied_decimals (i); } ds_destroy (&tmp); @@ -328,7 +422,6 @@ parse_N (struct data_in *i) i->output->f = i->output->f * 10.0 + (c - '0'); } - apply_implied_decimals (i); return true; } @@ -484,11 +577,7 @@ parse_Z (struct data_in *i) } } else - { - errno = save_errno; - if (!got_dot) - apply_implied_decimals (i); - } + errno = save_errno; ds_destroy (&tmp); return true; @@ -515,8 +604,6 @@ parse_IB (struct data_in *i) i->output->f = -(double) -value; } - apply_implied_decimals (i); - return true; } @@ -527,8 +614,6 @@ parse_PIB (struct data_in *i) i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input), MIN (8, ss_length (i->input))); - apply_implied_decimals (i); - return true; } @@ -568,8 +653,6 @@ parse_P (struct data_in *i) else if (low_nibble == 0xb || low_nibble == 0xd) i->output->f = -i->output->f; - apply_implied_decimals (i); - return true; } @@ -591,8 +674,6 @@ parse_PK (struct data_in *i) i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } - apply_implied_decimals (i); - return true; } @@ -649,11 +730,6 @@ parse_AHEX (struct data_in *i) return false; } - if (0 != strcmp (i->src_enc, LEGACY_NATIVE)) - { - hi = legacy_to_native (i->src_enc, hi); - lo = legacy_to_native (i->src_enc, lo); - } if (!c_isxdigit (hi) || !c_isxdigit (lo)) { data_warning (i, _("Field must contain only hex digits.")); @@ -1165,6 +1241,7 @@ parse_date (struct data_in *i) return true; } + /* Utility functions. */ @@ -1195,14 +1272,6 @@ data_warning (const struct data_in *i, const char *format, ...) msg_emit (&m); } -/* Apply implied decimal places to output. */ -static void -apply_implied_decimals (struct data_in *i) -{ - if (i->implied_decimals > 0) - i->output->f /= pow (10., i->implied_decimals); -} - /* Sets the default result for I. For a numeric format, this is the value set on SET BLANKS (typically system-missing); for a string format, it is all