X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=d734eb911da8d910f871fda8d27d57aad8a18e0c;hb=a22af84523eb716b947123186bd4f89a3d92945e;hp=d36aefb5e8144bb1e2207fb613d2ec6d897b63c2;hpb=a258e53c63a08b0ec48aea8f03808eb651729424;p=pspp diff --git a/src/data/data-in.c b/src/data/data-in.c index d36aefb5e8..d734eb911d 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,7 +37,6 @@ #include "libpspp/compiler.h" #include "libpspp/i18n.h" #include "libpspp/integer-format.h" -#include "libpspp/legacy-encoding.h" #include "libpspp/misc.h" #include "libpspp/str.h" #include "settings.h" @@ -54,6 +53,8 @@ /* Information about parsing one data field. */ struct data_in { + const struct fmt_settings *settings; + struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ @@ -77,10 +78,14 @@ static int hexit_value (int c); Stores the parsed representation in OUTPUT, which the caller must have initialized with the given WIDTH (0 for a numeric field, otherwise the string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the - correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */ + correct encoding for OUTPUT (normally obtained via dict_get_encoding()). + + If successful NULL is the return value. Otherwise a string describing + the problem is returned. The caller must free this string. + */ char * data_in (struct substring input, const char *input_encoding, - enum fmt_type format, + enum fmt_type format, const struct fmt_settings *settings, union value *output, int width, const char *output_encoding) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = @@ -98,6 +103,8 @@ data_in (struct substring input, const char *input_encoding, assert ((width != 0) == fmt_is_string (format)); + i.settings = settings; + i.format = format; i.output = output; @@ -110,13 +117,13 @@ data_in (struct substring input, const char *input_encoding, } cat = fmt_get_category (format); - if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL + if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL | FMT_CAT_CUSTOM | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)) { /* We're going to parse these into numbers. For this purpose we want to deal with them in the local "C" encoding. Any character not in that encoding wouldn't be valid anyhow. */ - dest_encoding = LEGACY_NATIVE; + dest_encoding = C_ENCODING; } else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY)) { @@ -130,7 +137,7 @@ data_in (struct substring input, const char *input_encoding, { /* We want the hex digits in the local "C" encoding, even though the result may not be in that encoding. */ - dest_encoding = LEGACY_NATIVE; + dest_encoding = C_ENCODING; } else { @@ -162,14 +169,14 @@ data_in (struct substring input, const char *input_encoding, bool data_in_msg (struct substring input, const char *input_encoding, - enum fmt_type format, + enum fmt_type format, const struct fmt_settings *settings, union value *output, int width, const char *output_encoding) { - char *error = data_in (input, input_encoding, format, + char *error = data_in (input, input_encoding, format, settings, output, width, output_encoding); if (error != NULL) { - msg (SW,_("Data is not valid as format %s: %s"), + msg (SW, _("Data is not valid as format %s: %s"), fmt_name (format), error); free (error); return false; @@ -179,9 +186,10 @@ data_in_msg (struct substring input, const char *input_encoding, } static bool -number_has_implied_decimals (const char *s, enum fmt_type type) +number_has_implied_decimals (const struct fmt_settings *settings, + const char *s, enum fmt_type type) { - int decimal = settings_get_style (type)->decimal; + int decimal = fmt_settings_get_style (settings, type)->decimal; bool got_digit = false; for (;;) { @@ -218,7 +226,8 @@ number_has_implied_decimals (const char *s, enum fmt_type type) static bool has_implied_decimals (struct substring input, const char *input_encoding, - enum fmt_type format) + enum fmt_type format, + const struct fmt_settings *settings) { bool retval; char *s; @@ -245,11 +254,11 @@ has_implied_decimals (struct substring input, const char *input_encoding, return false; } - s = recode_string (LEGACY_NATIVE, input_encoding, + s = recode_string (C_ENCODING, input_encoding, ss_data (input), ss_length (input)); retval = (format == FMT_Z ? strchr (s, '.') == NULL - : number_has_implied_decimals (s, format)); + : number_has_implied_decimals (settings, s, format)); free (s); return retval; @@ -264,10 +273,12 @@ has_implied_decimals (struct substring input, const char *input_encoding, If it is appropriate, this function modifies the numeric value in OUTPUT. */ void data_in_imply_decimals (struct substring input, const char *input_encoding, - enum fmt_type format, int d, union value *output) + enum fmt_type format, int d, + const struct fmt_settings *settings, + union value *output) { if (d > 0 && output->f != SYSMIS - && has_implied_decimals (input, input_encoding, format)) + && has_implied_decimals (input, input_encoding, format, settings)) output->f /= pow (10., d); } @@ -277,20 +288,15 @@ data_in_imply_decimals (struct substring input, const char *input_encoding, static char * parse_number (struct data_in *i) { - const struct fmt_number_style *style = - settings_get_style (i->format); + const struct fmt_number_style *style = fmt_settings_get_style ( + i->settings, + fmt_get_category (i->format) == FMT_CAT_CUSTOM ? FMT_F : i->format); struct string tmp; - bool explicit_decimals = false; int save_errno; char *tail; - if (fmt_get_category (i->format) == FMT_CAT_CUSTOM) - { - style = settings_get_style (FMT_F); - } - /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) return NULL; @@ -299,46 +305,45 @@ parse_number (struct data_in *i) ds_extend (&tmp, 64); /* Prefix character may precede sign. */ - if (!ss_is_empty (style->prefix)) + if (style->prefix.s[0] != '\0') { - ss_match_char (&i->input, ss_first (style->prefix)); + ss_match_byte (&i->input, style->prefix.s[0]); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Sign. */ - if (ss_match_char (&i->input, '-')) + if (ss_match_byte (&i->input, '-')) { - ds_put_char (&tmp, '-'); + ds_put_byte (&tmp, '-'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } else { - ss_match_char (&i->input, '+'); + ss_match_byte (&i->input, '+'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Prefix character may follow sign. */ - if (!ss_is_empty (style->prefix)) + if (style->prefix.s[0] != '\0') { - ss_match_char (&i->input, ss_first (style->prefix)); + ss_match_byte (&i->input, style->prefix.s[0]); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Digits before decimal point. */ while (c_isdigit (ss_first (i->input))) { - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); if (style->grouping != 0) - ss_match_char (&i->input, style->grouping); + ss_match_byte (&i->input, style->grouping); } /* Decimal point and following digits. */ - if (ss_match_char (&i->input, style->decimal)) + if (ss_match_byte (&i->input, style->decimal)) { - explicit_decimals = true; - ds_put_char (&tmp, '.'); + ds_put_byte (&tmp, '.'); while (c_isdigit (ss_first (i->input))) - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); } /* Exponent. */ @@ -346,29 +351,28 @@ parse_number (struct data_in *i) && !ss_is_empty (i->input) && strchr ("eEdD-+", ss_first (i->input))) { - explicit_decimals = true; - ds_put_char (&tmp, 'e'); + ds_put_byte (&tmp, 'e'); if (strchr ("eEdD", ss_first (i->input))) { ss_advance (&i->input, 1); - ss_match_char (&i->input, ' '); + ss_match_byte (&i->input, ' '); } if (ss_first (i->input) == '-' || ss_first (i->input) == '+') { - if (ss_get_char (&i->input) == '-') - ds_put_char (&tmp, '-'); - ss_match_char (&i->input, ' '); + if (ss_get_byte (&i->input) == '-') + ds_put_byte (&tmp, '-'); + ss_match_byte (&i->input, ' '); } while (c_isdigit (ss_first (i->input))) - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); } /* Suffix character. */ - if (!ss_is_empty (style->suffix)) - ss_match_char (&i->input, ss_first (style->suffix)); + if (style->suffix.s[0] != '\0') + ss_match_byte (&i->input, style->suffix.s[0]); if (!ss_is_empty (i->input)) { @@ -420,7 +424,7 @@ parse_N (struct data_in *i) int c; i->output->f = 0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_byte (&i->input)) != EOF) { if (!c_isdigit (c)) return xstrdup (_("All characters in field must be digits.")); @@ -439,7 +443,7 @@ parse_PIBHEX (struct data_in *i) n = 0.0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_byte (&i->input)) != EOF) { if (!c_isxdigit (c)) return xstrdup (_("Unrecognized character in field.")); @@ -460,8 +464,8 @@ parse_RBHEX (struct data_in *i) memset (&d, 0, sizeof d); for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) { - int hi = ss_get_char (&i->input); - int lo = ss_get_char (&i->input); + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); if (lo == EOF) return xstrdup (_("Field must have even length.")); else if (!c_isxdigit (hi) || !c_isxdigit (lo)) @@ -520,22 +524,22 @@ parse_Z (struct data_in *i) ds_init_empty (&tmp); ds_extend (&tmp, 64); - ds_put_char (&tmp, '+'); + ds_put_byte (&tmp, '+'); while (!ss_is_empty (i->input)) { - int c = ss_get_char (&i->input); + int c = ss_get_byte (&i->input); if (c_isdigit (c) && !got_final_digit) - ds_put_char (&tmp, c); + ds_put_byte (&tmp, c); else if (is_z_digit (c) && !got_final_digit) { - ds_put_char (&tmp, z_digit_value (c) + '0'); + ds_put_byte (&tmp, z_digit_value (c) + '0'); if (is_negative_z_digit (c)) ds_data (&tmp)[0] = '-'; got_final_digit = true; } else if (c == '.' && !got_dot) { - ds_put_char (&tmp, '.'); + ds_put_byte (&tmp, '.'); got_dot = true; } else @@ -623,7 +627,7 @@ parse_PIB (struct data_in *i) static void get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) { - int c = ss_get_char (s); + int c = ss_get_byte (s); assert (c != EOF); *high_nibble = (c >> 4) & 15; *low_nibble = c & 15; @@ -699,7 +703,7 @@ parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - uint8_t *dst = value_str_rw (i->output, i->width); + uint8_t *dst = i->output->s; size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); @@ -716,13 +720,13 @@ parse_A (struct data_in *i) static char * parse_AHEX (struct data_in *i) { - uint8_t *s = value_str_rw (i->output, i->width); + uint8_t *s = i->output->s; size_t j; for (j = 0; ; j++) { - int hi = ss_get_char (&i->input); - int lo = ss_get_char (&i->input); + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); if (hi == EOF) break; else if (lo == EOF) @@ -783,30 +787,34 @@ parse_day (struct data_in *i, long *day) return xasprintf (_("Day (%ld) must be between 1 and 31."), *day); } +/* If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the + time and sets *TIME_SIGN. Otherwise, does not allow a sign. */ +static void +parse_time_sign (struct data_in *i, enum time_sign *time_sign) +{ + if (*time_sign == SIGN_NO_TIME) + { + if (ss_match_byte (&i->input, '-')) + *time_sign = SIGN_NEGATIVE; + else + { + ss_match_byte (&i->input, '+'); + *time_sign = SIGN_POSITIVE; + } + } +} + /* Parses an integer from the beginning of I. Adds SECONDS_PER_UNIT times the absolute value of the integer to *TIME. - If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the - time and sets *TIME_SIGN. Otherwise, does not allow a sign. Returns true if successful, false if no integer was present. */ static char * -parse_time_units (struct data_in *i, double seconds_per_unit, - enum time_sign *time_sign, double *time) +parse_time_units (struct data_in *i, double seconds_per_unit, double *time) { char *error; long units; - if (*time_sign == SIGN_NO_TIME) - { - if (ss_match_char (&i->input, '-')) - *time_sign = SIGN_NEGATIVE; - else - { - ss_match_char (&i->input, '+'); - *time_sign = SIGN_POSITIVE; - } - } error = parse_int (i, &units, SIZE_MAX); if (error != NULL) return error; @@ -839,7 +847,7 @@ static struct substring parse_name_token (struct data_in *i) { struct substring token; - ss_get_chars (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); + ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); return token; } @@ -915,7 +923,7 @@ parse_year (struct data_in *i, long *year, size_t max_digits) if (*year >= 0 && *year <= 99) { - int epoch = settings_get_epoch (); + int epoch = fmt_settings_get_epoch (i->settings); int epoch_century = ROUND_DOWN (epoch, 100); int epoch_offset = epoch - epoch_century; if (*year >= epoch_offset) @@ -923,7 +931,7 @@ parse_year (struct data_in *i, long *year, size_t max_digits) else *year += epoch_century + 100; } - if (*year >= 1582 || *year <= 19999) + if (*year >= 1582 && *year <= 19999) return NULL; return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year); @@ -949,7 +957,7 @@ parse_yday (struct data_in *i, long *yday) struct substring num_s; long num; - ss_get_chars (&i->input, 3, &num_s); + ss_get_bytes (&i->input, 3, &num_s); if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3) return xstrdup (_("Julian day must have exactly three digits.")); else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366) @@ -1029,7 +1037,7 @@ parse_minute_second (struct data_in *i, double *time) error = parse_int (i, &minute, SIZE_MAX); if (error != NULL) return error; - if (minute < 0 || minute > 59) + if (i->format != FMT_MTIME && (minute < 0 || minute > 59)) return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute); *time += 60. * minute; @@ -1041,14 +1049,14 @@ parse_minute_second (struct data_in *i, double *time) /* Parse seconds. */ cp = buf; while (c_isdigit (ss_first (i->input))) - *cp++ = ss_get_char (&i->input); - if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F))) + *cp++ = ss_get_byte (&i->input); + if (ss_match_byte (&i->input, i->settings->decimal)) *cp++ = '.'; while (c_isdigit (ss_first (i->input))) - *cp++ = ss_get_char (&i->input); + *cp++ = ss_get_byte (&i->input); *cp = '\0'; - *time += strtod (buf, NULL); + *time += c_strtod (buf, NULL); return NULL; } @@ -1080,7 +1088,7 @@ parse_weekday (struct data_in *i, long *weekday) static char * parse_WKDAY (struct data_in *i) { - long weekday; + long weekday = 0; char *error; if (trim_spaces_and_check_missing (i)) @@ -1113,7 +1121,7 @@ parse_MONTH (struct data_in *i) } /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR, - DATETIME, TIME and DTIME formats. */ + DATETIME, YMDHMS, MTIME, TIME, and DTIME formats. */ static char * parse_date (struct data_in *i) { @@ -1124,7 +1132,7 @@ parse_date (struct data_in *i) double time = 0, date = 0; enum time_sign time_sign = SIGN_NO_TIME; - const char *template = fmt_date_template (i->format); + const char *template = fmt_date_template (i->format, 0); size_t template_width = strlen (template); char *error; @@ -1170,30 +1178,39 @@ parse_date (struct data_in *i) error = parse_week (i, &yday); break; case 'D': - error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); + parse_time_sign (i, &time_sign); + error = parse_time_units (i, 60. * 60. * 24., &time); break; case 'H': - error = parse_time_units (i, 60. * 60., &time_sign, &time); + parse_time_sign (i, &time_sign); + error = parse_time_units (i, 60. * 60., &time); break; case 'M': + if (i->format == FMT_MTIME) + parse_time_sign (i, &time_sign); error = parse_minute_second (i, &time); break; case '-': case '/': case '.': - case 'X': error = parse_date_delimiter (i); break; case ':': error = parse_time_delimiter (i); + break; case ' ': - parse_spaces (i); - error = NULL; + if (i->format != FMT_MOYR) + { + parse_spaces (i); + error = NULL; + } + else + error = parse_date_delimiter (i); break; default: assert (count == 1); - if (!ss_match_char (&i->input, c_toupper (ch)) - && !ss_match_char (&i->input, c_tolower (ch))) + if (!ss_match_byte (&i->input, c_toupper (ch)) + && !ss_match_byte (&i->input, c_tolower (ch))) error = xasprintf (_("`%c' expected in date field."), ch); else error = NULL; @@ -1211,7 +1228,8 @@ parse_date (struct data_in *i) char *error; double ofs; - ofs = calendar_gregorian_to_offset (year, month, day, &error); + ofs = calendar_gregorian_to_offset ( + year, month, day, settings_get_fmt_settings (), &error); if (ofs == SYSMIS) return error; date = (yday - 1 + ofs) * 60. * 60. * 24.; @@ -1233,7 +1251,7 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (value_str_rw (i->output, i->width), ' ', i->width); + memset (i->output->s, ' ', i->width); else i->output->f = settings_get_blanks (); }