X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=e7a83f25b44a5f7a55d2c46e20aed4dc7785e40e;hp=bcc880e402761a407de0917c562a62a1cfdd2af3;hb=9254d30d06a0565c89daccedd93a94c4c6086004;hpb=f5c108becd49d78f4898cab11352291f5689d24e diff --git a/src/data/data-in.c b/src/data/data-in.c index bcc880e4..e7a83f25 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,20 +1,18 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -29,23 +27,24 @@ #include #include #include +#include #include "calendar.h" #include "identifier.h" #include "settings.h" #include "value.h" +#include "format.h" #include +#include #include #include -#include #include #include #include - #include "c-ctype.h" +#include "c-strtod.h" #include "minmax.h" -#include "size_max.h" #include "xalloc.h" #include "gettext.h" @@ -54,6 +53,7 @@ /* Information about parsing one data field. */ struct data_in { + const char *encoding; /* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ int implied_decimals; /* Number of implied decimal places. */ @@ -65,11 +65,7 @@ struct data_in int last_column; /* Last column. */ }; -/* Integer format used for IB and PIB input. */ -static enum integer_format input_integer_format = INTEGER_NATIVE; -/* Floating-point format used for RB and RBHEX input. */ -static enum float_format input_float_format = FLOAT_NATIVE_DOUBLE; typedef bool data_in_parser_func (struct data_in *); #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ @@ -87,21 +83,26 @@ static bool trim_spaces_and_check_missing (struct data_in *); static int hexit_value (int c); -/* Parses the characters in INPUT according to FORMAT. Stores - the parsed representation in OUTPUT, which has the given WIDTH - (0 for a numeric field, otherwise the string width). +/* Parses the characters in INPUT, which are encoded in the given + ENCODING, according to FORMAT. Stores the parsed + representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, + otherwise the string width). If no decimal point is included in a numeric format, then IMPLIED_DECIMALS decimal places are implied. Specify 0 if no decimal places should be implied. - If FIRST_COLUMN is nonzero, then it should be the 1-based - column number of the first character in INPUT, used in error - messages. */ + If FIRST_COLUMN and LAST_COLUMN are nonzero, then they should + be the 1-based column number of the first and + one-past-the-last-character in INPUT, for use in error + messages. (LAST_COLUMN cannot always be calculated from + FIRST_COLUMN plus the length of the input because of the + possibility of escaped quotes in strings, etc.) */ bool -data_in (struct substring input, +data_in (struct substring input, const char *encoding, enum fmt_type format, int implied_decimals, - int first_column, union value *output, int width) + int first_column, int last_column, union value *output, int width) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -110,11 +111,25 @@ data_in (struct substring input, }; struct data_in i; + void *copy = NULL; bool ok; assert ((width != 0) == fmt_is_string (format)); - i.input = input; + if (0 == strcmp (encoding, LEGACY_NATIVE) + || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)) + { + i.input = input; + i.encoding = encoding; + } + else + { + ss_alloc_uninit (&i.input, ss_length (input)); + legacy_recode (encoding, ss_data (input), LEGACY_NATIVE, + ss_data (i.input), ss_length (input)); + i.encoding = LEGACY_NATIVE; + copy = ss_data (i.input); + } i.format = format; i.implied_decimals = implied_decimals; @@ -122,7 +137,7 @@ data_in (struct substring input, i.width = width; i.first_column = first_column; - i.last_column = first_column + ss_length (input) - 1; + i.last_column = last_column; if (!ss_is_empty (i.input)) { @@ -136,39 +151,12 @@ data_in (struct substring input, ok = true; } - return ok; -} - -/* Returns the integer format used for IB and PIB input. */ -enum integer_format -data_in_get_integer_format (void) -{ - return input_integer_format; -} - -/* Sets the integer format used for IB and PIB input to - FORMAT. */ -void -data_in_set_integer_format (enum integer_format format) -{ - input_integer_format = format; -} + if (copy) + free (copy); -/* Returns the floating-point format used for RB and RBHEX - input. */ -enum float_format -data_in_get_float_format (void) -{ - return input_float_format; + return ok; } -/* Sets the floating-point format used for RB and RBHEX input to - FORMAT. */ -void -data_in_set_float_format (enum float_format format) -{ - input_float_format = format; -} /* Format parsers. */ @@ -176,7 +164,8 @@ data_in_set_float_format (enum float_format format) static bool parse_number (struct data_in *i) { - const struct fmt_number_style *style = fmt_get_style (i->format); + const struct fmt_number_style *style = + settings_get_style (i->format); struct string tmp; @@ -184,7 +173,10 @@ parse_number (struct data_in *i) int save_errno; char *tail; - assert (fmt_get_category (i->format) != FMT_CAT_CUSTOM); + if (fmt_get_category (i->format) == FMT_CAT_CUSTOM) + { + style = settings_get_style (FMT_F); + } /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) @@ -275,10 +267,10 @@ parse_number (struct data_in *i) return false; } - /* Let strtod() do the conversion. */ + /* Let c_strtod() do the conversion. */ save_errno = errno; errno = 0; - i->output->f = strtod (ds_cstr (&tmp), &tail); + i->output->f = c_strtod (ds_cstr (&tmp), &tail); if (*tail != '\0') { data_warning (i, _("Invalid numeric syntax.")); @@ -465,10 +457,10 @@ parse_Z (struct data_in *i) return false; } - /* Let strtod() do the conversion. */ + /* Let c_strtod() do the conversion. */ save_errno = errno; errno = 0; - i->output->f = strtod (ds_cstr (&tmp), NULL); + i->output->f = c_strtod (ds_cstr (&tmp), NULL); if (errno == ERANGE) { if (fabs (i->output->f) > 1) @@ -502,7 +494,7 @@ parse_IB (struct data_in *i) uint64_t sign_bit; bytes = MIN (8, ss_length (i->input)); - value = integer_get (input_integer_format, ss_data (i->input), bytes); + value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes); sign_bit = UINT64_C(1) << (8 * bytes - 1); if (!(value & sign_bit)) @@ -523,7 +515,7 @@ parse_IB (struct data_in *i) static bool parse_PIB (struct data_in *i) { - i->output->f = integer_get (input_integer_format, ss_data (i->input), + i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input), MIN (8, ss_length (i->input))); apply_implied_decimals (i); @@ -599,9 +591,10 @@ parse_PK (struct data_in *i) static bool parse_RB (struct data_in *i) { - size_t size = float_get_size (input_float_format); + enum float_format ff = settings_get_input_float_format (); + size_t size = float_get_size (ff); if (ss_length (i->input) >= size) - float_convert (input_float_format, ss_data (i->input), + float_convert (ff, ss_data (i->input), FLOAT_NATIVE_DOUBLE, &i->output->f); else i->output->f = SYSMIS; @@ -613,8 +606,17 @@ parse_RB (struct data_in *i) static bool parse_A (struct data_in *i) { - buf_copy_rpad (i->output->s, i->width, - ss_data (i->input), ss_length (i->input)); + /* This is equivalent to buf_copy_rpad, except that we posibly + do a character set recoding in the middle. */ + char *dst = value_str_rw (i->output, i->width); + size_t dst_size = i->width; + const char *src = ss_data (i->input); + size_t src_size = ss_length (i->input); + + legacy_recode (i->encoding, src, LEGACY_NATIVE, dst, MIN (src_size, dst_size)); + if (dst_size > src_size) + memset (&dst[src_size], ' ', dst_size - src_size); + return true; } @@ -622,6 +624,7 @@ parse_A (struct data_in *i) static bool parse_AHEX (struct data_in *i) { + char *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) @@ -636,6 +639,11 @@ parse_AHEX (struct data_in *i) return false; } + if (0 != strcmp (i->encoding, LEGACY_NATIVE)) + { + hi = legacy_to_native (i->encoding, hi); + lo = legacy_to_native (i->encoding, lo); + } if (!c_isxdigit (hi) || !c_isxdigit (lo)) { data_warning (i, _("Field must contain only hex digits.")); @@ -643,10 +651,10 @@ parse_AHEX (struct data_in *i) } if (j < i->width) - i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - memset (i->output->s + j, ' ', i->width - j); + memset (&s[j], ' ', i->width - j); return true; } @@ -765,7 +773,7 @@ parse_name_token (struct data_in *i) exact matches (except for case) are allowed. Returns true if successful, false otherwise. */ static bool -match_name (struct substring token, const char **names, long *output) +match_name (struct substring token, const char *const *names, long *output) { int i; @@ -794,14 +802,14 @@ parse_month (struct data_in *i, long *month) } else { - static const char *english_names[] = + static const char *const english_names[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", NULL, }; - static const char *roman_names[] = + static const char *const roman_names[] = { "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", @@ -830,7 +838,7 @@ parse_year (struct data_in *i, long *year, size_t max_digits) if (*year >= 0 && *year <= 99) { - int epoch = get_epoch (); + int epoch = settings_get_epoch (); int epoch_century = ROUND_DOWN (epoch, 100); int epoch_offset = epoch - epoch_century; if (*year >= epoch_offset) @@ -965,7 +973,7 @@ parse_minute_second (struct data_in *i, double *time) cp = buf; while (c_isdigit (ss_first (i->input))) *cp++ = ss_get_char (&i->input); - if (ss_match_char (&i->input, fmt_decimal_char (FMT_F))) + if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F))) *cp++ = '.'; while (c_isdigit (ss_first (i->input))) *cp++ = ss_get_char (&i->input); @@ -982,7 +990,7 @@ parse_minute_second (struct data_in *i, double *time) static bool parse_weekday (struct data_in *i, long *weekday) { - static const char *weekday_names[] = + static const char *const weekday_names[] = { "su", "mo", "tu", "we", "th", "fr", "sa", NULL, @@ -1169,11 +1177,11 @@ vdata_warning (const struct data_in *i, const char *format, va_list args) ds_put_char (&text, '('); if (i->first_column != 0) { - if (i->first_column == i->last_column) + if (i->first_column == i->last_column - 1) ds_put_format (&text, _("column %d"), i->first_column); else ds_put_format (&text, _("columns %d-%d"), - i->first_column, i->last_column); + i->first_column, i->last_column - 1); ds_put_cstr (&text, ", "); } ds_put_format (&text, _("%s field) "), fmt_name (i->format)); @@ -1214,9 +1222,9 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (i->output->s, ' ', i->width); + memset (value_str_rw (i->output, i->width), ' ', i->width); else - i->output->f = get_blanks (); + i->output->f = settings_get_blanks (); } /* Trims leading and trailing spaces from I.