X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=33eeb111384971179087e368c3bff10005ce8b54;hb=424aa0c4f6d8ff6dc123e4452f37af7e6cd2d5d3;hp=d36aefb5e8144bb1e2207fb613d2ec6d897b63c2;hpb=3da49359c52cb783db907cc197847bbd5e721c97;p=pspp diff --git a/src/data/data-in.c b/src/data/data-in.c index d36aefb5e8..33eeb11138 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -37,7 +37,6 @@ #include "libpspp/compiler.h" #include "libpspp/i18n.h" #include "libpspp/integer-format.h" -#include "libpspp/legacy-encoding.h" #include "libpspp/misc.h" #include "libpspp/str.h" #include "settings.h" @@ -77,7 +76,11 @@ static int hexit_value (int c); Stores the parsed representation in OUTPUT, which the caller must have initialized with the given WIDTH (0 for a numeric field, otherwise the string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the - correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */ + correct encoding for OUTPUT (normally obtained via dict_get_encoding()). + + If successful NULL is the return value. Otherwise a string describing + the problem is returned. The caller must free this string. + */ char * data_in (struct substring input, const char *input_encoding, enum fmt_type format, @@ -110,13 +113,13 @@ data_in (struct substring input, const char *input_encoding, } cat = fmt_get_category (format); - if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL + if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL | FMT_CAT_CUSTOM | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)) { /* We're going to parse these into numbers. For this purpose we want to deal with them in the local "C" encoding. Any character not in that encoding wouldn't be valid anyhow. */ - dest_encoding = LEGACY_NATIVE; + dest_encoding = C_ENCODING; } else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY)) { @@ -130,7 +133,7 @@ data_in (struct substring input, const char *input_encoding, { /* We want the hex digits in the local "C" encoding, even though the result may not be in that encoding. */ - dest_encoding = LEGACY_NATIVE; + dest_encoding = C_ENCODING; } else { @@ -169,7 +172,7 @@ data_in_msg (struct substring input, const char *input_encoding, output, width, output_encoding); if (error != NULL) { - msg (SW,_("Data is not valid as format %s: %s"), + msg (SW, _("Data is not valid as format %s: %s"), fmt_name (format), error); free (error); return false; @@ -245,7 +248,7 @@ has_implied_decimals (struct substring input, const char *input_encoding, return false; } - s = recode_string (LEGACY_NATIVE, input_encoding, + s = recode_string (C_ENCODING, input_encoding, ss_data (input), ss_length (input)); retval = (format == FMT_Z ? strchr (s, '.') == NULL @@ -282,7 +285,6 @@ parse_number (struct data_in *i) struct string tmp; - bool explicit_decimals = false; int save_errno; char *tail; @@ -299,46 +301,45 @@ parse_number (struct data_in *i) ds_extend (&tmp, 64); /* Prefix character may precede sign. */ - if (!ss_is_empty (style->prefix)) + if (style->prefix.s[0] != '\0') { - ss_match_char (&i->input, ss_first (style->prefix)); + ss_match_byte (&i->input, style->prefix.s[0]); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Sign. */ - if (ss_match_char (&i->input, '-')) + if (ss_match_byte (&i->input, '-')) { - ds_put_char (&tmp, '-'); + ds_put_byte (&tmp, '-'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } else { - ss_match_char (&i->input, '+'); + ss_match_byte (&i->input, '+'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Prefix character may follow sign. */ - if (!ss_is_empty (style->prefix)) + if (style->prefix.s[0] != '\0') { - ss_match_char (&i->input, ss_first (style->prefix)); + ss_match_byte (&i->input, style->prefix.s[0]); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Digits before decimal point. */ while (c_isdigit (ss_first (i->input))) { - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); if (style->grouping != 0) - ss_match_char (&i->input, style->grouping); + ss_match_byte (&i->input, style->grouping); } /* Decimal point and following digits. */ - if (ss_match_char (&i->input, style->decimal)) + if (ss_match_byte (&i->input, style->decimal)) { - explicit_decimals = true; - ds_put_char (&tmp, '.'); + ds_put_byte (&tmp, '.'); while (c_isdigit (ss_first (i->input))) - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); } /* Exponent. */ @@ -346,29 +347,28 @@ parse_number (struct data_in *i) && !ss_is_empty (i->input) && strchr ("eEdD-+", ss_first (i->input))) { - explicit_decimals = true; - ds_put_char (&tmp, 'e'); + ds_put_byte (&tmp, 'e'); if (strchr ("eEdD", ss_first (i->input))) { ss_advance (&i->input, 1); - ss_match_char (&i->input, ' '); + ss_match_byte (&i->input, ' '); } if (ss_first (i->input) == '-' || ss_first (i->input) == '+') { - if (ss_get_char (&i->input) == '-') - ds_put_char (&tmp, '-'); - ss_match_char (&i->input, ' '); + if (ss_get_byte (&i->input) == '-') + ds_put_byte (&tmp, '-'); + ss_match_byte (&i->input, ' '); } while (c_isdigit (ss_first (i->input))) - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); } /* Suffix character. */ - if (!ss_is_empty (style->suffix)) - ss_match_char (&i->input, ss_first (style->suffix)); + if (style->suffix.s[0] != '\0') + ss_match_byte (&i->input, style->suffix.s[0]); if (!ss_is_empty (i->input)) { @@ -420,7 +420,7 @@ parse_N (struct data_in *i) int c; i->output->f = 0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_byte (&i->input)) != EOF) { if (!c_isdigit (c)) return xstrdup (_("All characters in field must be digits.")); @@ -439,7 +439,7 @@ parse_PIBHEX (struct data_in *i) n = 0.0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_byte (&i->input)) != EOF) { if (!c_isxdigit (c)) return xstrdup (_("Unrecognized character in field.")); @@ -460,8 +460,8 @@ parse_RBHEX (struct data_in *i) memset (&d, 0, sizeof d); for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) { - int hi = ss_get_char (&i->input); - int lo = ss_get_char (&i->input); + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); if (lo == EOF) return xstrdup (_("Field must have even length.")); else if (!c_isxdigit (hi) || !c_isxdigit (lo)) @@ -520,22 +520,22 @@ parse_Z (struct data_in *i) ds_init_empty (&tmp); ds_extend (&tmp, 64); - ds_put_char (&tmp, '+'); + ds_put_byte (&tmp, '+'); while (!ss_is_empty (i->input)) { - int c = ss_get_char (&i->input); + int c = ss_get_byte (&i->input); if (c_isdigit (c) && !got_final_digit) - ds_put_char (&tmp, c); + ds_put_byte (&tmp, c); else if (is_z_digit (c) && !got_final_digit) { - ds_put_char (&tmp, z_digit_value (c) + '0'); + ds_put_byte (&tmp, z_digit_value (c) + '0'); if (is_negative_z_digit (c)) ds_data (&tmp)[0] = '-'; got_final_digit = true; } else if (c == '.' && !got_dot) { - ds_put_char (&tmp, '.'); + ds_put_byte (&tmp, '.'); got_dot = true; } else @@ -623,7 +623,7 @@ parse_PIB (struct data_in *i) static void get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) { - int c = ss_get_char (s); + int c = ss_get_byte (s); assert (c != EOF); *high_nibble = (c >> 4) & 15; *low_nibble = c & 15; @@ -699,7 +699,7 @@ parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - uint8_t *dst = value_str_rw (i->output, i->width); + uint8_t *dst = i->output->s; size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); @@ -716,13 +716,13 @@ parse_A (struct data_in *i) static char * parse_AHEX (struct data_in *i) { - uint8_t *s = value_str_rw (i->output, i->width); + uint8_t *s = i->output->s; size_t j; for (j = 0; ; j++) { - int hi = ss_get_char (&i->input); - int lo = ss_get_char (&i->input); + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); if (hi == EOF) break; else if (lo == EOF) @@ -783,30 +783,34 @@ parse_day (struct data_in *i, long *day) return xasprintf (_("Day (%ld) must be between 1 and 31."), *day); } +/* If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the + time and sets *TIME_SIGN. Otherwise, does not allow a sign. */ +static void +parse_time_sign (struct data_in *i, enum time_sign *time_sign) +{ + if (*time_sign == SIGN_NO_TIME) + { + if (ss_match_byte (&i->input, '-')) + *time_sign = SIGN_NEGATIVE; + else + { + ss_match_byte (&i->input, '+'); + *time_sign = SIGN_POSITIVE; + } + } +} + /* Parses an integer from the beginning of I. Adds SECONDS_PER_UNIT times the absolute value of the integer to *TIME. - If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the - time and sets *TIME_SIGN. Otherwise, does not allow a sign. Returns true if successful, false if no integer was present. */ static char * -parse_time_units (struct data_in *i, double seconds_per_unit, - enum time_sign *time_sign, double *time) +parse_time_units (struct data_in *i, double seconds_per_unit, double *time) { char *error; long units; - if (*time_sign == SIGN_NO_TIME) - { - if (ss_match_char (&i->input, '-')) - *time_sign = SIGN_NEGATIVE; - else - { - ss_match_char (&i->input, '+'); - *time_sign = SIGN_POSITIVE; - } - } error = parse_int (i, &units, SIZE_MAX); if (error != NULL) return error; @@ -839,7 +843,7 @@ static struct substring parse_name_token (struct data_in *i) { struct substring token; - ss_get_chars (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); + ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); return token; } @@ -923,7 +927,7 @@ parse_year (struct data_in *i, long *year, size_t max_digits) else *year += epoch_century + 100; } - if (*year >= 1582 || *year <= 19999) + if (*year >= 1582 && *year <= 19999) return NULL; return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year); @@ -949,7 +953,7 @@ parse_yday (struct data_in *i, long *yday) struct substring num_s; long num; - ss_get_chars (&i->input, 3, &num_s); + ss_get_bytes (&i->input, 3, &num_s); if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3) return xstrdup (_("Julian day must have exactly three digits.")); else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366) @@ -1029,7 +1033,7 @@ parse_minute_second (struct data_in *i, double *time) error = parse_int (i, &minute, SIZE_MAX); if (error != NULL) return error; - if (minute < 0 || minute > 59) + if (i->format != FMT_MTIME && (minute < 0 || minute > 59)) return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute); *time += 60. * minute; @@ -1041,14 +1045,14 @@ parse_minute_second (struct data_in *i, double *time) /* Parse seconds. */ cp = buf; while (c_isdigit (ss_first (i->input))) - *cp++ = ss_get_char (&i->input); - if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F))) + *cp++ = ss_get_byte (&i->input); + if (ss_match_byte (&i->input, settings_get_decimal_char (FMT_F))) *cp++ = '.'; while (c_isdigit (ss_first (i->input))) - *cp++ = ss_get_char (&i->input); + *cp++ = ss_get_byte (&i->input); *cp = '\0'; - *time += strtod (buf, NULL); + *time += c_strtod (buf, NULL); return NULL; } @@ -1113,7 +1117,7 @@ parse_MONTH (struct data_in *i) } /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR, - DATETIME, TIME and DTIME formats. */ + DATETIME, YMDHMS, MTIME, TIME, and DTIME formats. */ static char * parse_date (struct data_in *i) { @@ -1124,7 +1128,7 @@ parse_date (struct data_in *i) double time = 0, date = 0; enum time_sign time_sign = SIGN_NO_TIME; - const char *template = fmt_date_template (i->format); + const char *template = fmt_date_template (i->format, 0); size_t template_width = strlen (template); char *error; @@ -1170,30 +1174,39 @@ parse_date (struct data_in *i) error = parse_week (i, &yday); break; case 'D': - error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); + parse_time_sign (i, &time_sign); + error = parse_time_units (i, 60. * 60. * 24., &time); break; case 'H': - error = parse_time_units (i, 60. * 60., &time_sign, &time); + parse_time_sign (i, &time_sign); + error = parse_time_units (i, 60. * 60., &time); break; case 'M': + if (i->format == FMT_MTIME) + parse_time_sign (i, &time_sign); error = parse_minute_second (i, &time); break; case '-': case '/': case '.': - case 'X': error = parse_date_delimiter (i); break; case ':': error = parse_time_delimiter (i); + break; case ' ': - parse_spaces (i); - error = NULL; + if (i->format != FMT_MOYR) + { + parse_spaces (i); + error = NULL; + } + else + error = parse_date_delimiter (i); break; default: assert (count == 1); - if (!ss_match_char (&i->input, c_toupper (ch)) - && !ss_match_char (&i->input, c_tolower (ch))) + if (!ss_match_byte (&i->input, c_toupper (ch)) + && !ss_match_byte (&i->input, c_tolower (ch))) error = xasprintf (_("`%c' expected in date field."), ch); else error = NULL; @@ -1233,7 +1246,7 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (value_str_rw (i->output, i->width), ' ', i->width); + memset (i->output->s, ' ', i->width); else i->output->f = settings_get_blanks (); }