X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=33e369f971da8751f78e29e76f23d230e6e0227d;hb=a5097a183f00ab2d2dc538ba7094a4696e2fea04;hp=0027984e39a638065035316a7b1f7920524f7002;hpb=43b1296aafe7582e7dbe6c2b6a8b478d7d9b0fcf;p=pspp-builds.git diff --git a/src/data/data-in.c b/src/data/data-in.c index 0027984e..33e369f9 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,23 +27,26 @@ #include #include #include +#include #include "calendar.h" #include "identifier.h" #include "settings.h" #include "value.h" +#include "format.h" +#include "dictionary.h" #include +#include +#include #include #include -#include #include #include #include - #include "c-ctype.h" +#include "c-strtod.h" #include "minmax.h" -#include "size_max.h" #include "xalloc.h" #include "gettext.h" @@ -52,6 +55,7 @@ /* Information about parsing one data field. */ struct data_in { + const char *src_enc; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ int implied_decimals; /* Number of implied decimal places. */ @@ -63,11 +67,7 @@ struct data_in int last_column; /* Last column. */ }; -/* Integer format used for IB and PIB input. */ -static enum integer_format input_integer_format = INTEGER_NATIVE; -/* Floating-point format used for RB and RBHEX input. */ -static enum float_format input_float_format = FLOAT_NATIVE_DOUBLE; typedef bool data_in_parser_func (struct data_in *); #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ @@ -85,21 +85,31 @@ static bool trim_spaces_and_check_missing (struct data_in *); static int hexit_value (int c); -/* Parses the characters in INPUT according to FORMAT. Stores - the parsed representation in OUTPUT, which has the given WIDTH - (0 for a numeric field, otherwise the string width). +/* Parses the characters in INPUT, which are encoded in the given + ENCODING, according to FORMAT. Stores the parsed + representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, + otherwise the string width). + Iff FORMAT is a string format, then DICT must be a pointer + to the dictionary associated with OUTPUT. Otherwise, DICT + may be null. If no decimal point is included in a numeric format, then IMPLIED_DECIMALS decimal places are implied. Specify 0 if no decimal places should be implied. - If FIRST_COLUMN is nonzero, then it should be the 1-based - column number of the first character in INPUT, used in error - messages. */ + If FIRST_COLUMN and LAST_COLUMN are nonzero, then they should + be the 1-based column number of the first and + one-past-the-last-character in INPUT, for use in error + messages. (LAST_COLUMN cannot always be calculated from + FIRST_COLUMN plus the length of the input because of the + possibility of escaped quotes in strings, etc.) */ bool -data_in (struct substring input, +data_in (struct substring input, const char *encoding, enum fmt_type format, int implied_decimals, - int first_column, union value *output, int width) + int first_column, int last_column, + const struct dictionary *dict, + union value *output, int width) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -108,11 +118,11 @@ data_in (struct substring input, }; struct data_in i; + bool ok; assert ((width != 0) == fmt_is_string (format)); - i.input = input; i.format = format; i.implied_decimals = implied_decimals; @@ -120,53 +130,44 @@ data_in (struct substring input, i.width = width; i.first_column = first_column; - i.last_column = first_column + ss_length (input) - 1; + i.last_column = last_column; + i.src_enc = encoding; - if (!ss_is_empty (i.input)) - { - ok = handlers[i.format] (&i); - if (!ok) - default_result (&i); - } - else + if (ss_is_empty (input)) { default_result (&i); - ok = true; + return true; } - return ok; -} + if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY)) + { + i.input = input; + } + else + { + const char *dest_encoding; + char *s = NULL; + if ( dict == NULL) + { + assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING))); + dest_encoding = LEGACY_NATIVE; + } + else + dest_encoding = dict_get_encoding (dict); -/* Returns the integer format used for IB and PIB input. */ -enum integer_format -data_in_get_integer_format (void) -{ - return input_integer_format; -} + s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input)); + ss_alloc_uninit (&i.input, strlen (s)); + memcpy (ss_data (i.input), s, ss_length (input)); + free (s); + } -/* Sets the integer format used for IB and PIB input to - FORMAT. */ -void -data_in_set_integer_format (enum integer_format format) -{ - input_integer_format = format; -} + ok = handlers[i.format] (&i); + if (!ok) + default_result (&i); -/* Returns the floating-point format used for RB and RBHEX - input. */ -enum float_format -data_in_get_float_format (void) -{ - return input_float_format; + return ok; } -/* Sets the floating-point format used for RB and RBHEX input to - FORMAT. */ -void -data_in_set_float_format (enum float_format format) -{ - input_float_format = format; -} /* Format parsers. */ @@ -174,7 +175,8 @@ data_in_set_float_format (enum float_format format) static bool parse_number (struct data_in *i) { - const struct fmt_number_style *style = fmt_get_style (i->format); + const struct fmt_number_style *style = + settings_get_style (i->format); struct string tmp; @@ -182,7 +184,10 @@ parse_number (struct data_in *i) int save_errno; char *tail; - assert (fmt_get_category (i->format) != FMT_CAT_CUSTOM); + if (fmt_get_category (i->format) == FMT_CAT_CUSTOM) + { + style = settings_get_style (FMT_F); + } /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) @@ -273,10 +278,10 @@ parse_number (struct data_in *i) return false; } - /* Let strtod() do the conversion. */ + /* Let c_strtod() do the conversion. */ save_errno = errno; errno = 0; - i->output->f = strtod (ds_cstr (&tmp), &tail); + i->output->f = c_strtod (ds_cstr (&tmp), &tail); if (*tail != '\0') { data_warning (i, _("Invalid numeric syntax.")); @@ -463,10 +468,10 @@ parse_Z (struct data_in *i) return false; } - /* Let strtod() do the conversion. */ + /* Let c_strtod() do the conversion. */ save_errno = errno; errno = 0; - i->output->f = strtod (ds_cstr (&tmp), NULL); + i->output->f = c_strtod (ds_cstr (&tmp), NULL); if (errno == ERANGE) { if (fabs (i->output->f) > 1) @@ -500,7 +505,7 @@ parse_IB (struct data_in *i) uint64_t sign_bit; bytes = MIN (8, ss_length (i->input)); - value = integer_get (input_integer_format, ss_data (i->input), bytes); + value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes); sign_bit = UINT64_C(1) << (8 * bytes - 1); if (!(value & sign_bit)) @@ -521,7 +526,7 @@ parse_IB (struct data_in *i) static bool parse_PIB (struct data_in *i) { - i->output->f = integer_get (input_integer_format, ss_data (i->input), + i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input), MIN (8, ss_length (i->input))); apply_implied_decimals (i); @@ -597,9 +602,10 @@ parse_PK (struct data_in *i) static bool parse_RB (struct data_in *i) { - size_t size = float_get_size (input_float_format); + enum float_format ff = settings_get_input_float_format (); + size_t size = float_get_size (ff); if (ss_length (i->input) >= size) - float_convert (input_float_format, ss_data (i->input), + float_convert (ff, ss_data (i->input), FLOAT_NATIVE_DOUBLE, &i->output->f); else i->output->f = SYSMIS; @@ -611,8 +617,18 @@ parse_RB (struct data_in *i) static bool parse_A (struct data_in *i) { - buf_copy_rpad (i->output->s, i->width, - ss_data (i->input), ss_length (i->input)); + /* This is equivalent to buf_copy_rpad, except that we posibly + do a character set recoding in the middle. */ + uint8_t *dst = value_str_rw (i->output, i->width); + size_t dst_size = i->width; + const char *src = ss_data (i->input); + size_t src_size = ss_length (i->input); + + memcpy (dst, src, MIN (src_size, dst_size)); + + if (dst_size > src_size) + memset (&dst[src_size], ' ', dst_size - src_size); + return true; } @@ -620,6 +636,7 @@ parse_A (struct data_in *i) static bool parse_AHEX (struct data_in *i) { + uint8_t *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) @@ -634,6 +651,11 @@ parse_AHEX (struct data_in *i) return false; } + if (0 != strcmp (i->src_enc, LEGACY_NATIVE)) + { + hi = legacy_to_native (i->src_enc, hi); + lo = legacy_to_native (i->src_enc, lo); + } if (!c_isxdigit (hi) || !c_isxdigit (lo)) { data_warning (i, _("Field must contain only hex digits.")); @@ -641,10 +663,10 @@ parse_AHEX (struct data_in *i) } if (j < i->width) - i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - memset (i->output->s + j, ' ', i->width - j); + memset (&s[j], ' ', i->width - j); return true; } @@ -763,7 +785,7 @@ parse_name_token (struct data_in *i) exact matches (except for case) are allowed. Returns true if successful, false otherwise. */ static bool -match_name (struct substring token, const char **names, long *output) +match_name (struct substring token, const char *const *names, long *output) { int i; @@ -792,14 +814,14 @@ parse_month (struct data_in *i, long *month) } else { - static const char *english_names[] = + static const char *const english_names[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", NULL, }; - static const char *roman_names[] = + static const char *const roman_names[] = { "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", @@ -828,7 +850,7 @@ parse_year (struct data_in *i, long *year, size_t max_digits) if (*year >= 0 && *year <= 99) { - int epoch = get_epoch (); + int epoch = settings_get_epoch (); int epoch_century = ROUND_DOWN (epoch, 100); int epoch_offset = epoch - epoch_century; if (*year >= epoch_offset) @@ -963,7 +985,7 @@ parse_minute_second (struct data_in *i, double *time) cp = buf; while (c_isdigit (ss_first (i->input))) *cp++ = ss_get_char (&i->input); - if (ss_match_char (&i->input, fmt_decimal_char (FMT_F))) + if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F))) *cp++ = '.'; while (c_isdigit (ss_first (i->input))) *cp++ = ss_get_char (&i->input); @@ -980,7 +1002,7 @@ parse_minute_second (struct data_in *i, double *time) static bool parse_weekday (struct data_in *i, long *weekday) { - static const char *weekday_names[] = + static const char *const weekday_names[] = { "su", "mo", "tu", "we", "th", "fr", "sa", NULL, @@ -1167,11 +1189,11 @@ vdata_warning (const struct data_in *i, const char *format, va_list args) ds_put_char (&text, '('); if (i->first_column != 0) { - if (i->first_column == i->last_column) + if (i->first_column == i->last_column - 1) ds_put_format (&text, _("column %d"), i->first_column); else ds_put_format (&text, _("columns %d-%d"), - i->first_column, i->last_column); + i->first_column, i->last_column - 1); ds_put_cstr (&text, ", "); } ds_put_format (&text, _("%s field) "), fmt_name (i->format)); @@ -1212,9 +1234,9 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (i->output->s, ' ', i->width); + memset (value_str_rw (i->output, i->width), ' ', i->width); else - i->output->f = get_blanks (); + i->output->f = settings_get_blanks (); } /* Trims leading and trailing spaces from I.