X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=b6fa5167f008a8aaa471b3d3fe9762fed7301fb5;hb=bdebbd4db2d6c539eadb145f726382fe338e4219;hp=28d6da2a42b7b2629779838b28e80000e28f90f0;hpb=c1e28f93006b3151c30c6f5e77d88f177960e6d4;p=pspp-builds.git diff --git a/src/data/data-in.c b/src/data/data-in.c index 28d6da2a..b6fa5167 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,31 +20,32 @@ #include #include +#include #include #include +#include #include #include #include #include -#include #include "calendar.h" +#include "dictionary.h" +#include "format.h" #include "identifier.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/integer-format.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" #include "settings.h" #include "value.h" -#include "format.h" -#include -#include -#include -#include -#include -#include -#include -#include "c-ctype.h" -#include "c-strtod.h" -#include "minmax.h" -#include "xalloc.h" +#include "gl/c-ctype.h" +#include "gl/c-strtod.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -52,55 +53,34 @@ /* Information about parsing one data field. */ struct data_in { - enum legacy_encoding encoding;/* Encoding of source. */ struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ - int implied_decimals; /* Number of implied decimal places. */ union value *output; /* Destination. */ int width; /* Output width. */ - - int first_column; /* First column of field; 0 if inapplicable. */ - int last_column; /* Last column. */ }; - - -typedef bool data_in_parser_func (struct data_in *); +typedef char *data_in_parser_func (struct data_in *); #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ static data_in_parser_func parse_##METHOD; #include "format.def" -static void vdata_warning (const struct data_in *, const char *, va_list) - PRINTF_FORMAT (2, 0); -static void data_warning (const struct data_in *, const char *, ...) - PRINTF_FORMAT (2, 3); - -static void apply_implied_decimals (struct data_in *); static void default_result (struct data_in *); static bool trim_spaces_and_check_missing (struct data_in *); static int hexit_value (int c); /* Parses the characters in INPUT, which are encoded in the given - ENCODING, according to FORMAT. Stores the parsed - representation in OUTPUT, which has the given WIDTH (0 for - a numeric field, otherwise the string width). - - If no decimal point is included in a numeric format, then - IMPLIED_DECIMALS decimal places are implied. Specify 0 if no - decimal places should be implied. - - If FIRST_COLUMN and LAST_COLUMN are nonzero, then they should - be the 1-based column number of the first and - one-past-the-last-character in INPUT, for use in error - messages. (LAST_COLUMN cannot always be calculated from - FIRST_COLUMN plus the length of the input because of the - possibility of escaped quotes in strings, etc.) */ -bool -data_in (struct substring input, enum legacy_encoding encoding, - enum fmt_type format, int implied_decimals, - int first_column, int last_column, union value *output, int width) + INPUT_ENCODING, according to FORMAT. + + Stores the parsed representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, otherwise the + string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the + correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */ +char * +data_in (struct substring input, const char *input_encoding, + enum fmt_type format, + union value *output, int width, const char *output_encoding) { static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { @@ -109,57 +89,191 @@ data_in (struct substring input, enum legacy_encoding encoding, }; struct data_in i; - void *copy = NULL; - bool ok; + + enum fmt_category cat; + const char *dest_encoding; + char *s; + char *error; assert ((width != 0) == fmt_is_string (format)); - if (encoding == LEGACY_NATIVE - || fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)) + i.format = format; + + i.output = output; + i.width = width; + + if (ss_is_empty (input)) { - i.input = input; - i.encoding = encoding; + default_result (&i); + return NULL; + } + + cat = fmt_get_category (format); + if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL + | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)) + { + /* We're going to parse these into numbers. For this purpose we want to + deal with them in the local "C" encoding. Any character not in that + encoding wouldn't be valid anyhow. */ + dest_encoding = C_ENCODING; + } + else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY)) + { + /* Don't recode these binary formats at all, since they are not text. */ + dest_encoding = NULL; } else { - ss_alloc_uninit (&i.input, ss_length (input)); - legacy_recode (encoding, ss_data (input), LEGACY_NATIVE, - ss_data (i.input), ss_length (input)); - i.encoding = LEGACY_NATIVE; - copy = ss_data (i.input); + assert (cat == FMT_CAT_STRING); + if (format == FMT_AHEX) + { + /* We want the hex digits in the local "C" encoding, even though the + result may not be in that encoding. */ + dest_encoding = C_ENCODING; + } + else + { + /* Use the final output encoding. */ + dest_encoding = output_encoding; + } } - i.format = format; - i.implied_decimals = implied_decimals; - i.output = output; - i.width = width; + if (dest_encoding != NULL) + { + i.input = recode_substring_pool (dest_encoding, input_encoding, input, + NULL); + s = i.input.string; + } + else + { + i.input = input; + s = NULL; + } - i.first_column = first_column; - i.last_column = last_column; + error = handlers[i.format] (&i); + if (error != NULL) + default_result (&i); + + free (s); + + return error; +} - if (!ss_is_empty (i.input)) +bool +data_in_msg (struct substring input, const char *input_encoding, + enum fmt_type format, + union value *output, int width, const char *output_encoding) +{ + char *error = data_in (input, input_encoding, format, + output, width, output_encoding); + if (error != NULL) { - ok = handlers[i.format] (&i); - if (!ok) - default_result (&i); + msg (SW,_("Data is not valid as format %s: %s"), + fmt_name (format), error); + free (error); + return false; } else + return true; +} + +static bool +number_has_implied_decimals (const char *s, enum fmt_type type) +{ + int decimal = settings_get_style (type)->decimal; + bool got_digit = false; + for (;;) { - default_result (&i); - ok = true; + switch (*s) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + got_digit = true; + break; + + case '+': case '-': + if (got_digit) + return false; + break; + + case 'e': case 'E': case 'd': case 'D': + return false; + + case '.': case ',': + if (*s == decimal) + return false; + break; + + case '\0': + return true; + + default: + break; + } + + s++; + } +} + +static bool +has_implied_decimals (struct substring input, const char *input_encoding, + enum fmt_type format) +{ + bool retval; + char *s; + + switch (format) + { + case FMT_F: + case FMT_COMMA: + case FMT_DOT: + case FMT_DOLLAR: + case FMT_PCT: + case FMT_E: + case FMT_Z: + break; + + case FMT_N: + case FMT_IB: + case FMT_PIB: + case FMT_P: + case FMT_PK: + return true; + + default: + return false; } - if (copy) - free (copy); + s = recode_string (C_ENCODING, input_encoding, + ss_data (input), ss_length (input)); + retval = (format == FMT_Z + ? strchr (s, '.') == NULL + : number_has_implied_decimals (s, format)); + free (s); - return ok; + return retval; } +/* In some cases, when no decimal point is explicitly included in numeric + input, its position is implied by the number of decimal places in the input + format. In such a case, this function may be called just after data_in(). + Its arguments are a subset of that function's arguments plus D, the number + of decimal places associated with FORMAT. + + If it is appropriate, this function modifies the numeric value in OUTPUT. */ +void +data_in_imply_decimals (struct substring input, const char *input_encoding, + enum fmt_type format, int d, union value *output) +{ + if (d > 0 && output->f != SYSMIS + && has_implied_decimals (input, input_encoding, format)) + output->f /= pow (10., d); +} /* Format parsers. */ /* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */ -static bool +static char * parse_number (struct data_in *i) { const struct fmt_number_style *style = @@ -178,7 +292,7 @@ parse_number (struct data_in *i) /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) - return true; + return NULL; ds_init_empty (&tmp); ds_extend (&tmp, 64); @@ -186,44 +300,44 @@ parse_number (struct data_in *i) /* Prefix character may precede sign. */ if (!ss_is_empty (style->prefix)) { - ss_match_char (&i->input, ss_first (style->prefix)); + ss_match_byte (&i->input, ss_first (style->prefix)); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Sign. */ - if (ss_match_char (&i->input, '-')) + if (ss_match_byte (&i->input, '-')) { - ds_put_char (&tmp, '-'); + ds_put_byte (&tmp, '-'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } else { - ss_match_char (&i->input, '+'); + ss_match_byte (&i->input, '+'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Prefix character may follow sign. */ if (!ss_is_empty (style->prefix)) { - ss_match_char (&i->input, ss_first (style->prefix)); + ss_match_byte (&i->input, ss_first (style->prefix)); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Digits before decimal point. */ while (c_isdigit (ss_first (i->input))) { - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); if (style->grouping != 0) - ss_match_char (&i->input, style->grouping); + ss_match_byte (&i->input, style->grouping); } /* Decimal point and following digits. */ - if (ss_match_char (&i->input, style->decimal)) + if (ss_match_byte (&i->input, style->decimal)) { explicit_decimals = true; - ds_put_char (&tmp, '.'); + ds_put_byte (&tmp, '.'); while (c_isdigit (ss_first (i->input))) - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); } /* Exponent. */ @@ -232,37 +346,38 @@ parse_number (struct data_in *i) && strchr ("eEdD-+", ss_first (i->input))) { explicit_decimals = true; - ds_put_char (&tmp, 'e'); + ds_put_byte (&tmp, 'e'); if (strchr ("eEdD", ss_first (i->input))) { ss_advance (&i->input, 1); - ss_match_char (&i->input, ' '); + ss_match_byte (&i->input, ' '); } if (ss_first (i->input) == '-' || ss_first (i->input) == '+') { - if (ss_get_char (&i->input) == '-') - ds_put_char (&tmp, '-'); - ss_match_char (&i->input, ' '); + if (ss_get_byte (&i->input) == '-') + ds_put_byte (&tmp, '-'); + ss_match_byte (&i->input, ' '); } while (c_isdigit (ss_first (i->input))) - ds_put_char (&tmp, ss_get_char (&i->input)); + ds_put_byte (&tmp, ss_get_byte (&i->input)); } /* Suffix character. */ if (!ss_is_empty (style->suffix)) - ss_match_char (&i->input, ss_first (style->suffix)); + ss_match_byte (&i->input, ss_first (style->suffix)); if (!ss_is_empty (i->input)) { + char *error; if (ds_is_empty (&tmp)) - data_warning (i, _("Field contents are not numeric.")); + error = xstrdup (_("Field contents are not numeric.")); else - data_warning (i, _("Number followed by garbage.")); + error = xstrdup (_("Number followed by garbage.")); ds_destroy (&tmp); - return false; + return error; } /* Let c_strtod() do the conversion. */ @@ -271,58 +386,51 @@ parse_number (struct data_in *i) i->output->f = c_strtod (ds_cstr (&tmp), &tail); if (*tail != '\0') { - data_warning (i, _("Invalid numeric syntax.")); errno = save_errno; ds_destroy (&tmp); - return false; + return xstrdup (_("Invalid numeric syntax.")); } else if (errno == ERANGE) { if (fabs (i->output->f) > 1) { - data_warning (i, _("Too-large number set to system-missing.")); i->output->f = SYSMIS; + ds_destroy (&tmp); + return xstrdup (_("Too-large number set to system-missing.")); } else { - data_warning (i, _("Too-small number set to zero.")); i->output->f = 0.0; + ds_destroy (&tmp); + return xstrdup (_("Too-small number set to zero.")); } } else - { - errno = save_errno; - if (!explicit_decimals) - apply_implied_decimals (i); - } + errno = save_errno; ds_destroy (&tmp); - return true; + return NULL; } /* Parses N format. */ -static bool +static char * parse_N (struct data_in *i) { int c; i->output->f = 0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_byte (&i->input)) != EOF) { if (!c_isdigit (c)) - { - data_warning (i, _("All characters in field must be digits.")); - return false; - } + return xstrdup (_("All characters in field must be digits.")); i->output->f = i->output->f * 10.0 + (c - '0'); } - apply_implied_decimals (i); - return true; + return NULL; } /* Parses PIBHEX format. */ -static bool +static char * parse_PIBHEX (struct data_in *i) { double n; @@ -330,22 +438,19 @@ parse_PIBHEX (struct data_in *i) n = 0.0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_byte (&i->input)) != EOF) { if (!c_isxdigit (c)) - { - data_warning (i, _("Unrecognized character in field.")); - return false; - } + return xstrdup (_("Unrecognized character in field.")); n = n * 16.0 + hexit_value (c); } i->output->f = n; - return true; + return NULL; } /* Parses RBHEX format. */ -static bool +static char * parse_RBHEX (struct data_in *i) { double d; @@ -354,24 +459,18 @@ parse_RBHEX (struct data_in *i) memset (&d, 0, sizeof d); for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) { - int hi = ss_get_char (&i->input); - int lo = ss_get_char (&i->input); + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); if (lo == EOF) - { - data_warning (i, _("Field must have even length.")); - return false; - } + return xstrdup (_("Field must have even length.")); else if (!c_isxdigit (hi) || !c_isxdigit (lo)) - { - data_warning (i, _("Field must contain only hex digits.")); - return false; - } + return xstrdup (_("Field must contain only hex digits.")); ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo); } i->output->f = d; - return true; + return NULL; } /* Digits for Z format. */ @@ -403,7 +502,7 @@ is_negative_z_digit (int c) } /* Parses Z format. */ -static bool +static char * parse_Z (struct data_in *i) { struct string tmp; @@ -415,44 +514,47 @@ parse_Z (struct data_in *i) /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) - return true; + return NULL; ds_init_empty (&tmp); ds_extend (&tmp, 64); - ds_put_char (&tmp, '+'); + ds_put_byte (&tmp, '+'); while (!ss_is_empty (i->input)) { - int c = ss_get_char (&i->input); + int c = ss_get_byte (&i->input); if (c_isdigit (c) && !got_final_digit) - ds_put_char (&tmp, c); + ds_put_byte (&tmp, c); else if (is_z_digit (c) && !got_final_digit) { - ds_put_char (&tmp, z_digit_value (c) + '0'); + ds_put_byte (&tmp, z_digit_value (c) + '0'); if (is_negative_z_digit (c)) ds_data (&tmp)[0] = '-'; got_final_digit = true; } else if (c == '.' && !got_dot) { - ds_put_char (&tmp, '.'); + ds_put_byte (&tmp, '.'); got_dot = true; } else { ds_destroy (&tmp); - return false; + return xstrdup (_("Invalid zoned decimal syntax.")); } } if (!ss_is_empty (i->input)) { + char *error; + if (ds_length (&tmp) == 1) - data_warning (i, _("Field contents are not numeric.")); + error = xstrdup (_("Field contents are not numeric.")); else - data_warning (i, _("Number followed by garbage.")); + error = xstrdup (_("Number followed by garbage.")); + ds_destroy (&tmp); - return false; + return error; } /* Let c_strtod() do the conversion. */ @@ -463,28 +565,26 @@ parse_Z (struct data_in *i) { if (fabs (i->output->f) > 1) { - data_warning (i, _("Too-large number set to system-missing.")); i->output->f = SYSMIS; + ds_destroy (&tmp); + return xstrdup (_("Too-large number set to system-missing.")); } else { - data_warning (i, _("Too-small number set to zero.")); i->output->f = 0.0; + ds_destroy (&tmp); + return xstrdup (_("Too-small number set to zero.")); } } else - { - errno = save_errno; - if (!got_dot) - apply_implied_decimals (i); - } + errno = save_errno; ds_destroy (&tmp); - return true; + return NULL; } /* Parses IB format. */ -static bool +static char * parse_IB (struct data_in *i) { size_t bytes; @@ -504,21 +604,17 @@ parse_IB (struct data_in *i) i->output->f = -(double) -value; } - apply_implied_decimals (i); - - return true; + return NULL; } /* Parses PIB format. */ -static bool +static char * parse_PIB (struct data_in *i) { i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input), MIN (8, ss_length (i->input))); - apply_implied_decimals (i); - - return true; + return NULL; } /* Consumes the first character of S. Stores its high 4 bits in @@ -526,14 +622,14 @@ parse_PIB (struct data_in *i) static void get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) { - int c = ss_get_char (s); + int c = ss_get_byte (s); assert (c != EOF); *high_nibble = (c >> 4) & 15; *low_nibble = c & 15; } /* Parses P format. */ -static bool +static char * parse_P (struct data_in *i) { int high_nibble, low_nibble; @@ -544,26 +640,24 @@ parse_P (struct data_in *i) { get_nibbles (&i->input, &high_nibble, &low_nibble); if (high_nibble > 9 || low_nibble > 9) - return false; + return xstrdup (_("Invalid syntax for P field.")); i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } get_nibbles (&i->input, &high_nibble, &low_nibble); if (high_nibble > 9) - return false; + return xstrdup (_("Invalid syntax for P field.")); i->output->f = (10 * i->output->f) + high_nibble; if (low_nibble < 10) i->output->f = (10 * i->output->f) + low_nibble; else if (low_nibble == 0xb || low_nibble == 0xd) i->output->f = -i->output->f; - apply_implied_decimals (i); - - return true; + return NULL; } /* Parses PK format. */ -static bool +static char * parse_PK (struct data_in *i) { i->output->f = 0.0; @@ -575,18 +669,16 @@ parse_PK (struct data_in *i) if (high_nibble > 9 || low_nibble > 9) { i->output->f = SYSMIS; - return true; + return NULL; } i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } - apply_implied_decimals (i); - - return true; + return NULL; } /* Parses RB format. */ -static bool +static char * parse_RB (struct data_in *i) { enum float_format ff = settings_get_input_float_format (); @@ -597,63 +689,54 @@ parse_RB (struct data_in *i) else i->output->f = SYSMIS; - return true; + return NULL; } /* Parses A format. */ -static bool +static char * parse_A (struct data_in *i) { /* This is equivalent to buf_copy_rpad, except that we posibly do a character set recoding in the middle. */ - char *dst = i->output->s; + uint8_t *dst = value_str_rw (i->output, i->width); size_t dst_size = i->width; const char *src = ss_data (i->input); size_t src_size = ss_length (i->input); - legacy_recode (i->encoding, src, LEGACY_NATIVE, dst, MIN (src_size, dst_size)); + memcpy (dst, src, MIN (src_size, dst_size)); + if (dst_size > src_size) memset (&dst[src_size], ' ', dst_size - src_size); - return true; + return NULL; } /* Parses AHEX format. */ -static bool +static char * parse_AHEX (struct data_in *i) { + uint8_t *s = value_str_rw (i->output, i->width); size_t j; for (j = 0; ; j++) { - int hi = ss_get_char (&i->input); - int lo = ss_get_char (&i->input); + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); if (hi == EOF) break; else if (lo == EOF) - { - data_warning (i, _("Field must have even length.")); - return false; - } + return xstrdup (_("Field must have even length.")); - if (i->encoding != LEGACY_NATIVE) - { - hi = legacy_to_native (i->encoding, hi); - lo = legacy_to_native (i->encoding, lo); - } if (!c_isxdigit (hi) || !c_isxdigit (lo)) - { - data_warning (i, _("Field must contain only hex digits.")); - return false; - } + return xstrdup (_("Field must contain only hex digits.")); if (j < i->width) - i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - memset (i->output->s + j, ' ', i->width - j); + memset (&s[j], ' ', i->width - j); - return true; + return NULL; } /* Date & time format components. */ @@ -670,7 +753,7 @@ enum time_sign MAX_DIGITS characters in I, storing the result into *RESULT. Returns true if successful, false if no integer was present. */ -static bool +static char * WARN_UNUSED_RESULT parse_int (struct data_in *i, long *result, size_t max_digits) { struct substring head = ss_head (i->input, max_digits); @@ -678,28 +761,25 @@ parse_int (struct data_in *i, long *result, size_t max_digits) if (n) { ss_advance (&i->input, n); - return true; + return NULL; } else - { - data_warning (i, _("Syntax error in date field.")); - return false; - } + return xstrdup (_("Syntax error in date field.")); } /* Parses a date integer between 1 and 31 from I, storing it into *DAY. Returns true if successful, false if no date was present. */ -static bool +static char * parse_day (struct data_in *i, long *day) { - if (!parse_int (i, day, SIZE_MAX)) - return false; + char *error = parse_int (i, day, SIZE_MAX); + if (error != NULL) + return error; if (*day >= 1 && *day <= 31) - return true; + return NULL; - data_warning (i, _("Day (%ld) must be between 1 and 31."), *day); - return false; + return xasprintf (_("Day (%ld) must be between 1 and 31."), *day); } /* Parses an integer from the beginning of I. @@ -708,45 +788,43 @@ parse_day (struct data_in *i, long *day) If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the time and sets *TIME_SIGN. Otherwise, does not allow a sign. Returns true if successful, false if no integer was present. */ -static bool +static char * parse_time_units (struct data_in *i, double seconds_per_unit, enum time_sign *time_sign, double *time) { + char *error; long units; if (*time_sign == SIGN_NO_TIME) { - if (ss_match_char (&i->input, '-')) + if (ss_match_byte (&i->input, '-')) *time_sign = SIGN_NEGATIVE; else { - ss_match_char (&i->input, '+'); + ss_match_byte (&i->input, '+'); *time_sign = SIGN_POSITIVE; } } - if (!parse_int (i, &units, SIZE_MAX)) - return false; + error = parse_int (i, &units, SIZE_MAX); + if (error != NULL) + return error; if (units < 0) - { - data_warning (i, _("Syntax error in date field.")); - return false; - } + return xstrdup (_("Syntax error in date field.")); *time += units * seconds_per_unit; - return true; + return NULL; } /* Parses a data delimiter from the beginning of I. Returns true if successful, false if no delimiter was present. */ -static bool +static char * parse_date_delimiter (struct data_in *i) { if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES))) - return true; + return NULL; - data_warning (i, _("Delimiter expected between fields in date.")); - return false; + return xstrdup (_("Delimiter expected between fields in date.")); } /* Parses spaces at the beginning of I. */ @@ -760,7 +838,7 @@ static struct substring parse_name_token (struct data_in *i) { struct substring token; - ss_get_chars (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); + ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); return token; } @@ -787,15 +865,16 @@ match_name (struct substring token, const char *const *names, long *output) /* Parses a month name or number from the beginning of I, storing the month (in range 1...12) into *MONTH. Returns true if successful, false if no month was present. */ -static bool +static char * parse_month (struct data_in *i, long *month) { if (c_isdigit (ss_first (i->input))) { - if (!parse_int (i, month, SIZE_MAX)) - return false; + char *error = parse_int (i, month, SIZE_MAX); + if (error != NULL) + return error; if (*month >= 1 && *month <= 12) - return true; + return NULL; } else { @@ -816,22 +895,22 @@ parse_month (struct data_in *i, long *month) struct substring token = parse_name_token (i); if (match_name (ss_head (token, 3), english_names, month) || match_name (ss_head (token, 4), roman_names, month)) - return true; + return NULL; } - data_warning (i, _("Unrecognized month format. Months may be specified " - "as Arabic or Roman numerals or as at least 3 letters " - "of their English names.")); - return false; + return xstrdup (_("Unrecognized month format. Months may be specified " + "as Arabic or Roman numerals or as at least 3 letters " + "of their English names.")); } /* Parses a year of at most MAX_DIGITS from the beginning of I, storing a "4-digit" year into *YEAR. */ -static bool +static char * parse_year (struct data_in *i, long *year, size_t max_digits) { - if (!parse_int (i, year, max_digits)) - return false; + char *error = parse_int (i, year, max_digits); + if (error != NULL) + return error; if (*year >= 0 && *year <= 99) { @@ -844,147 +923,139 @@ parse_year (struct data_in *i, long *year, size_t max_digits) *year += epoch_century + 100; } if (*year >= 1582 || *year <= 19999) - return true; + return NULL; - data_warning (i, _("Year (%ld) must be between 1582 and 19999."), *year); - return false; + return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year); } /* Returns true if input in I has been exhausted, false otherwise. */ -static bool +static char * parse_trailer (struct data_in *i) { if (ss_is_empty (i->input)) - return true; + return NULL; - data_warning (i, _("Trailing garbage \"%.*s\" following date."), - (int) ss_length (i->input), ss_data (i->input)); - return false; + return xasprintf (_("Trailing garbage `%.*s' following date."), + (int) ss_length (i->input), ss_data (i->input)); } /* Parses a 3-digit Julian day-of-year value from I into *YDAY. Returns true if successful, false on failure. */ -static bool +static char * parse_yday (struct data_in *i, long *yday) { struct substring num_s; long num; - ss_get_chars (&i->input, 3, &num_s); + ss_get_bytes (&i->input, 3, &num_s); if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3) - { - data_warning (i, _("Julian day must have exactly three digits.")); - return false; - } + return xstrdup (_("Julian day must have exactly three digits.")); else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366) - { - data_warning (i, _("Julian day (%ld) must be between 1 and 366."), num); - return false; - } + return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num); *yday = num; - return true; + return NULL; } /* Parses a quarter-of-year integer between 1 and 4 from I. Stores the corresponding month into *MONTH. Returns true if successful, false if no quarter was present. */ -static bool +static char * parse_quarter (struct data_in *i, long int *month) { long quarter; + char *error; - if (!parse_int (i, &quarter, SIZE_MAX)) - return false; + error = parse_int (i, &quarter, SIZE_MAX); + if (error != NULL) + return error; if (quarter >= 1 && quarter <= 4) { *month = (quarter - 1) * 3 + 1; - return true; + return NULL; } - data_warning (i, _("Quarter (%ld) must be between 1 and 4."), quarter); - return false; + return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter); } /* Parses a week-of-year integer between 1 and 53 from I, Stores the corresponding year-of-day into *YDAY. Returns true if successful, false if no week was present. */ -static bool +static char * parse_week (struct data_in *i, long int *yday) { + char *error; long week; - if (!parse_int (i, &week, SIZE_MAX)) - return false; + error = parse_int (i, &week, SIZE_MAX); + if (error != NULL) + return error; if (week >= 1 && week <= 53) { *yday = (week - 1) * 7 + 1; - return true; + return NULL; } - data_warning (i, _("Week (%ld) must be between 1 and 53."), week); - return false; + return xasprintf (_("Week (%ld) must be between 1 and 53."), week); } /* Parses a time delimiter from the beginning of I. Returns true if successful, false if no delimiter was present. */ -static bool +static char * parse_time_delimiter (struct data_in *i) { if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0) - return true; + return NULL; - data_warning (i, _("Delimiter expected between fields in time.")); - return false; + return xstrdup (_("Delimiter expected between fields in time.")); } /* Parses minutes and optional seconds from the beginning of I. The time is converted into seconds, which are added to *TIME. Returns true if successful, false if an error was found. */ -static bool +static char * parse_minute_second (struct data_in *i, double *time) { long minute; char buf[64]; + char *error; char *cp; /* Parse minutes. */ - if (!parse_int (i, &minute, SIZE_MAX)) - return false; + error = parse_int (i, &minute, SIZE_MAX); + if (error != NULL) + return error; if (minute < 0 || minute > 59) - { - data_warning (i, _("Minute (%ld) must be between 0 and 59."), minute); - return false; - } + return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute); *time += 60. * minute; /* Check for seconds. */ if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0 || !c_isdigit (ss_first (i->input))) - return true; + return NULL; /* Parse seconds. */ cp = buf; while (c_isdigit (ss_first (i->input))) - *cp++ = ss_get_char (&i->input); - if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F))) + *cp++ = ss_get_byte (&i->input); + if (ss_match_byte (&i->input, settings_get_decimal_char (FMT_F))) *cp++ = '.'; while (c_isdigit (ss_first (i->input))) - *cp++ = ss_get_char (&i->input); + *cp++ = ss_get_byte (&i->input); *cp = '\0'; *time += strtod (buf, NULL); - return true; + return NULL; } /* Parses a weekday name from the beginning of I, storing a value of 1=Sunday...7=Saturday into *WEEKDAY. Returns true if successful, false if an error was found. */ -static bool +static char * parse_weekday (struct data_in *i, long *weekday) { static const char *const weekday_names[] = @@ -996,64 +1067,53 @@ parse_weekday (struct data_in *i, long *weekday) struct substring token = parse_name_token (i); bool ok = match_name (ss_head (token, 2), weekday_names, weekday); if (!ok) - data_warning (i, _("Unrecognized weekday name. At least the first two " - "letters of an English weekday name must be " - "specified.")); - return ok; + return xstrdup (_("Unrecognized weekday name. At least the first two " + "letters of an English weekday name must be " + "specified.")); + return NULL; } /* Date & time formats. */ -/* Helper function for passing to - calendar_gregorian_to_offset. */ -static void -calendar_error (void *i_, const char *format, ...) -{ - struct data_in *i = i_; - va_list args; - - va_start (args, format); - vdata_warning (i, format, args); - va_end (args); -} - /* Parses WKDAY format. */ -static bool +static char * parse_WKDAY (struct data_in *i) { long weekday; + char *error; if (trim_spaces_and_check_missing (i)) - return true; + return NULL; - if (!parse_weekday (i, &weekday) - || !parse_trailer (i)) - return false; + error = parse_weekday (i, &weekday); + if (error == NULL) + error = parse_trailer (i); i->output->f = weekday; - return true; + return error; } /* Parses MONTH format. */ -static bool +static char * parse_MONTH (struct data_in *i) { long month; + char *error; if (trim_spaces_and_check_missing (i)) - return true; + return NULL; - if (!parse_month (i, &month) - || !parse_trailer (i)) - return false; + error = parse_month (i, &month); + if (error == NULL) + error = parse_trailer (i); i->output->f = month; - return true; + return error; } /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR, DATETIME, TIME and DTIME formats. */ -static bool +static char * parse_date (struct data_in *i) { long int year = INT_MIN; @@ -1065,28 +1125,27 @@ parse_date (struct data_in *i) const char *template = fmt_date_template (i->format); size_t template_width = strlen (template); + char *error; if (trim_spaces_and_check_missing (i)) - return true; + return NULL; while (*template != '\0') { unsigned char ch = *template; int count = 1; - bool ok; while (template[count] == ch) count++; template += count; - ok = true; switch (ch) { case 'd': - ok = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday); + error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday); break; case 'm': - ok = parse_month (i, &month); + error = parse_month (i, &month); break; case 'y': { @@ -1100,117 +1159,71 @@ parse_date (struct data_in *i) else max_digits = 2; } - ok = parse_year (i, &year, max_digits); + error = parse_year (i, &year, max_digits); } break; case 'q': - ok = parse_quarter (i, &month); + error = parse_quarter (i, &month); break; case 'w': - ok = parse_week (i, &yday); + error = parse_week (i, &yday); break; case 'D': - ok = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); + error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); break; case 'H': - ok = parse_time_units (i, 60. * 60., &time_sign, &time); + error = parse_time_units (i, 60. * 60., &time_sign, &time); break; case 'M': - ok = parse_minute_second (i, &time); + error = parse_minute_second (i, &time); break; case '-': case '/': case '.': case 'X': - ok = parse_date_delimiter (i); + error = parse_date_delimiter (i); break; case ':': - ok = parse_time_delimiter (i); + error = parse_time_delimiter (i); case ' ': parse_spaces (i); + error = NULL; break; default: assert (count == 1); - if (!ss_match_char (&i->input, c_toupper (ch)) - && !ss_match_char (&i->input, c_tolower (ch))) - { - data_warning (i, _("`%c' expected in date field."), ch); - return false; - } + if (!ss_match_byte (&i->input, c_toupper (ch)) + && !ss_match_byte (&i->input, c_tolower (ch))) + error = xasprintf (_("`%c' expected in date field."), ch); + else + error = NULL; break; } - if (!ok) - return false; + if (error != NULL) + return error; } - if (!parse_trailer (i)) - return false; + error = parse_trailer (i); + if (error != NULL) + return error; if (year != INT_MIN) { - double ofs = calendar_gregorian_to_offset (year, month, day, - calendar_error, i); + char *error; + double ofs; + + ofs = calendar_gregorian_to_offset (year, month, day, &error); if (ofs == SYSMIS) - return false; + return error; date = (yday - 1 + ofs) * 60. * 60. * 24.; } else date = 0.; i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time); - return true; + return NULL; } /* Utility functions. */ -/* Outputs FORMAT with the given ARGS as a warning for input - I. */ -static void -vdata_warning (const struct data_in *i, const char *format, va_list args) -{ - struct msg m; - struct string text; - - ds_init_empty (&text); - ds_put_char (&text, '('); - if (i->first_column != 0) - { - if (i->first_column == i->last_column - 1) - ds_put_format (&text, _("column %d"), i->first_column); - else - ds_put_format (&text, _("columns %d-%d"), - i->first_column, i->last_column - 1); - ds_put_cstr (&text, ", "); - } - ds_put_format (&text, _("%s field) "), fmt_name (i->format)); - ds_put_vformat (&text, format, args); - - m.category = MSG_DATA; - m.severity = MSG_WARNING; - m.text = ds_cstr (&text); - - msg_emit (&m); -} - -/* Outputs FORMAT with the given ARGS as a warning for input - I. */ -static void -data_warning (const struct data_in *i, const char *format, ...) -{ - va_list args; - - va_start (args, format); - vdata_warning (i, format, args); - va_end (args); -} - -/* Apply implied decimal places to output. */ -static void -apply_implied_decimals (struct data_in *i) -{ - if (i->implied_decimals > 0) - i->output->f /= pow (10., i->implied_decimals); -} - /* Sets the default result for I. For a numeric format, this is the value set on SET BLANKS (typically system-missing); for a string format, it is all @@ -1219,7 +1232,7 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (i->output->s, ' ', i->width); + memset (value_str_rw (i->output, i->width), ' ', i->width); else i->output->f = settings_get_blanks (); }