X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=b6fa5167f008a8aaa471b3d3fe9762fed7301fb5;hb=bdebbd4db2d6c539eadb145f726382fe338e4219;hp=aa8041d8a814b58c9dd11f94f509885a56c0585e;hpb=2322678e8fddbbf158b01b2720db2636404bba3b;p=pspp-builds.git diff --git a/src/data/data-in.c b/src/data/data-in.c index aa8041d8..b6fa5167 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,1436 +1,1265 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include + #include "data-in.h" -#include "message.h" -#include + #include +#include +#include +#include #include +#include #include +#include #include #include -#include -#include "message.h" + #include "calendar.h" -#include "compiler.h" +#include "dictionary.h" +#include "format.h" #include "identifier.h" -#include "magic.h" -#include "misc.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/integer-format.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" #include "settings.h" -#include "str.h" -#include "variable.h" +#include "value.h" + +#include "gl/c-ctype.h" +#include "gl/c-strtod.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) - -#include "debug-print.h" -/* Specialized error routine. */ +/* Information about parsing one data field. */ +struct data_in + { + struct substring input; /* Source. */ + enum fmt_type format; /* Input format. */ -static void dls_error (const struct data_in *, const char *format, ...) - PRINTF_FORMAT (2, 3); + union value *output; /* Destination. */ + int width; /* Output width. */ + }; -static void -vdls_error (const struct data_in *i, const char *format, va_list args) +typedef char *data_in_parser_func (struct data_in *); +#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ + static data_in_parser_func parse_##METHOD; +#include "format.def" + +static void default_result (struct data_in *); +static bool trim_spaces_and_check_missing (struct data_in *); + +static int hexit_value (int c); + +/* Parses the characters in INPUT, which are encoded in the given + INPUT_ENCODING, according to FORMAT. + + Stores the parsed representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, otherwise the + string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the + correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */ +char * +data_in (struct substring input, const char *input_encoding, + enum fmt_type format, + union value *output, int width, const char *output_encoding) { - struct error e; - struct string title; + static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = + { +#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) parse_##METHOD, +#include "format.def" + }; + + struct data_in i; + + enum fmt_category cat; + const char *dest_encoding; + char *s; + char *error; + + assert ((width != 0) == fmt_is_string (format)); + + i.format = format; - if (i->flags & DI_IGNORE_ERROR) - return; + i.output = output; + i.width = width; + + if (ss_is_empty (input)) + { + default_result (&i); + return NULL; + } + + cat = fmt_get_category (format); + if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL + | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)) + { + /* We're going to parse these into numbers. For this purpose we want to + deal with them in the local "C" encoding. Any character not in that + encoding wouldn't be valid anyhow. */ + dest_encoding = C_ENCODING; + } + else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY)) + { + /* Don't recode these binary formats at all, since they are not text. */ + dest_encoding = NULL; + } + else + { + assert (cat == FMT_CAT_STRING); + if (format == FMT_AHEX) + { + /* We want the hex digits in the local "C" encoding, even though the + result may not be in that encoding. */ + dest_encoding = C_ENCODING; + } + else + { + /* Use the final output encoding. */ + dest_encoding = output_encoding; + } + } - ds_init (&title, 64); - if (i->f1 == i->f2) - ds_printf (&title, _("(column %d"), i->f1); + if (dest_encoding != NULL) + { + i.input = recode_substring_pool (dest_encoding, input_encoding, input, + NULL); + s = i.input.string; + } else - ds_printf (&title, _("(columns %d-%d"), i->f1, i->f2); - ds_printf (&title, _(", field type %s) "), fmt_to_string (&i->format)); - - e.class = DE; - err_location (&e.where); - e.title = ds_c_str (&title); + { + i.input = input; + s = NULL; + } + + error = handlers[i.format] (&i); + if (error != NULL) + default_result (&i); - err_vmsg (&e, format, args); + free (s); - ds_destroy (&title); + return error; } -static void -dls_error (const struct data_in *i, const char *format, ...) +bool +data_in_msg (struct substring input, const char *input_encoding, + enum fmt_type format, + union value *output, int width, const char *output_encoding) { - va_list args; - - va_start (args, format); - vdls_error (i, format, args); - va_end (args); + char *error = data_in (input, input_encoding, format, + output, width, output_encoding); + if (error != NULL) + { + msg (SW,_("Data is not valid as format %s: %s"), + fmt_name (format), error); + free (error); + return false; + } + else + return true; } - -/* Parsing utility functions. */ -/* Excludes leading and trailing whitespace from I by adjusting - pointers. */ -static void -trim_whitespace (struct data_in *i) +static bool +number_has_implied_decimals (const char *s, enum fmt_type type) { - while (i->s < i->e && isspace ((unsigned char) i->s[0])) - i->s++; + int decimal = settings_get_style (type)->decimal; + bool got_digit = false; + for (;;) + { + switch (*s) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + got_digit = true; + break; - while (i->s < i->e && isspace ((unsigned char) i->e[-1])) - i->e--; + case '+': case '-': + if (got_digit) + return false; + break; + + case 'e': case 'E': case 'd': case 'D': + return false; + + case '.': case ',': + if (*s == decimal) + return false; + break; + + case '\0': + return true; + + default: + break; + } + + s++; + } } -/* Returns nonzero if we're not at the end of the string being - parsed. */ -static inline bool -have_char (struct data_in *i) +static bool +has_implied_decimals (struct substring input, const char *input_encoding, + enum fmt_type format) { - return i->s < i->e; + bool retval; + char *s; + + switch (format) + { + case FMT_F: + case FMT_COMMA: + case FMT_DOT: + case FMT_DOLLAR: + case FMT_PCT: + case FMT_E: + case FMT_Z: + break; + + case FMT_N: + case FMT_IB: + case FMT_PIB: + case FMT_P: + case FMT_PK: + return true; + + default: + return false; + } + + s = recode_string (C_ENCODING, input_encoding, + ss_data (input), ss_length (input)); + retval = (format == FMT_Z + ? strchr (s, '.') == NULL + : number_has_implied_decimals (s, format)); + free (s); + + return retval; } -/* If implied decimal places are enabled, apply them to - I->v->f. */ -static void -apply_implied_decimals (struct data_in *i) +/* In some cases, when no decimal point is explicitly included in numeric + input, its position is implied by the number of decimal places in the input + format. In such a case, this function may be called just after data_in(). + Its arguments are a subset of that function's arguments plus D, the number + of decimal places associated with FORMAT. + + If it is appropriate, this function modifies the numeric value in OUTPUT. */ +void +data_in_imply_decimals (struct substring input, const char *input_encoding, + enum fmt_type format, int d, union value *output) { - if ((i->flags & DI_IMPLIED_DECIMALS) && i->format.d > 0) - i->v->f /= pow (10., i->format.d); + if (d > 0 && output->f != SYSMIS + && has_implied_decimals (input, input_encoding, format)) + output->f /= pow (10., d); } -/* Format parsers. */ +/* Format parsers. */ -static bool parse_int (struct data_in *i, long *result); - -/* This function is based on strtod() from the GNU C library. */ -static bool -parse_numeric (struct data_in *i) +/* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */ +static char * +parse_number (struct data_in *i) { - int sign; /* +1 or -1. */ - double num; /* The number so far. */ + const struct fmt_number_style *style = + settings_get_style (i->format); - bool got_dot; /* Found a decimal point. */ - size_t digit_cnt; /* Count of digits. */ + struct string tmp; - int decimal; /* Decimal point character. */ - int grouping; /* Grouping character. */ + bool explicit_decimals = false; + int save_errno; + char *tail; + + if (fmt_get_category (i->format) == FMT_CAT_CUSTOM) + { + style = settings_get_style (FMT_F); + } - long int exponent; /* Number's exponent. */ - int type; /* Usually same as i->format.type. */ + /* Trim spaces and check for missing value representation. */ + if (trim_spaces_and_check_missing (i)) + return NULL; - trim_whitespace (i); + ds_init_empty (&tmp); + ds_extend (&tmp, 64); - type = i->format.type; - if (type == FMT_DOLLAR && have_char (i) && *i->s == '$') + /* Prefix character may precede sign. */ + if (!ss_is_empty (style->prefix)) { - i->s++; - type = FMT_COMMA; + ss_match_byte (&i->input, ss_first (style->prefix)); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } - /* Get the sign. */ - if (have_char (i)) + /* Sign. */ + if (ss_match_byte (&i->input, '-')) { - sign = *i->s == '-' ? -1 : 1; - if (*i->s == '-' || *i->s == '+') - i->s++; + ds_put_byte (&tmp, '-'); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } else - sign = 1; - - if (type != FMT_DOT) { - decimal = get_decimal(); - grouping = get_grouping(); + ss_match_byte (&i->input, '+'); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } - else + + /* Prefix character may follow sign. */ + if (!ss_is_empty (style->prefix)) { - decimal = get_grouping(); - grouping = get_decimal(); + ss_match_byte (&i->input, ss_first (style->prefix)); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } - i->v->f = SYSMIS; - num = 0.0; - got_dot = false; - digit_cnt = 0; - exponent = 0; - for (; have_char (i); i->s++) + /* Digits before decimal point. */ + while (c_isdigit (ss_first (i->input))) { - if (isdigit ((unsigned char) *i->s)) - { - digit_cnt++; - - /* Make sure that multiplication by 10 will not overflow. */ - if (num > DBL_MAX * 0.1) - /* The value of the digit doesn't matter, since we have already - gotten as many digits as can be represented in a `double'. - This doesn't necessarily mean the result will overflow. - The exponent may reduce it to within range. - - We just need to record that there was another - digit so that we can multiply by 10 later. */ - ++exponent; - else - num = (num * 10.0) + (*i->s - '0'); - - /* Keep track of the number of digits after the decimal point. - If we just divided by 10 here, we would lose precision. */ - if (got_dot) - --exponent; - } - else if (!got_dot && *i->s == decimal) - /* Record that we have found the decimal point. */ - got_dot = true; - else if ((type != FMT_COMMA && type != FMT_DOT) || *i->s != grouping) - /* Any other character terminates the number. */ - break; + ds_put_byte (&tmp, ss_get_byte (&i->input)); + if (style->grouping != 0) + ss_match_byte (&i->input, style->grouping); } - if (!digit_cnt) + /* Decimal point and following digits. */ + if (ss_match_byte (&i->input, style->decimal)) { - if (got_dot) - { - i->v->f = SYSMIS; - return true; - } - dls_error (i, _("Field does not form a valid floating-point constant.")); - i->v->f = SYSMIS; - return false; + explicit_decimals = true; + ds_put_byte (&tmp, '.'); + while (c_isdigit (ss_first (i->input))) + ds_put_byte (&tmp, ss_get_byte (&i->input)); } - - if (have_char (i) && strchr ("eEdD-+", *i->s)) + + /* Exponent. */ + if (!ds_is_empty (&tmp) + && !ss_is_empty (i->input) + && strchr ("eEdD-+", ss_first (i->input))) { - /* Get the exponent specified after the `e' or `E'. */ - long exp; + explicit_decimals = true; + ds_put_byte (&tmp, 'e'); - if (isalpha ((unsigned char) *i->s)) - i->s++; - if (!parse_int (i, &exp)) + if (strchr ("eEdD", ss_first (i->input))) { - i->v->f = SYSMIS; - return false; + ss_advance (&i->input, 1); + ss_match_byte (&i->input, ' '); + } + + if (ss_first (i->input) == '-' || ss_first (i->input) == '+') + { + if (ss_get_byte (&i->input) == '-') + ds_put_byte (&tmp, '-'); + ss_match_byte (&i->input, ' '); } - exponent += exp; + while (c_isdigit (ss_first (i->input))) + ds_put_byte (&tmp, ss_get_byte (&i->input)); } - else if (!got_dot && (i->flags & DI_IMPLIED_DECIMALS)) - exponent -= i->format.d; - if (type == FMT_PCT && have_char (i) && *i->s == '%') - i->s++; - if (i->s < i->e) + /* Suffix character. */ + if (!ss_is_empty (style->suffix)) + ss_match_byte (&i->input, ss_first (style->suffix)); + + if (!ss_is_empty (i->input)) { - dls_error (i, _("Field contents followed by garbage.")); - i->v->f = SYSMIS; - return false; + char *error; + if (ds_is_empty (&tmp)) + error = xstrdup (_("Field contents are not numeric.")); + else + error = xstrdup (_("Number followed by garbage.")); + ds_destroy (&tmp); + return error; } - if (num == 0.0) + /* Let c_strtod() do the conversion. */ + save_errno = errno; + errno = 0; + i->output->f = c_strtod (ds_cstr (&tmp), &tail); + if (*tail != '\0') { - i->v->f = 0.0; - return true; + errno = save_errno; + ds_destroy (&tmp); + return xstrdup (_("Invalid numeric syntax.")); } - - /* Multiply NUM by 10 to the EXPONENT power, checking for overflow - and underflow. */ - if (exponent < 0) + else if (errno == ERANGE) { - if (-exponent + digit_cnt > -(DBL_MIN_10_EXP) + 5 - || num < DBL_MIN * pow (10.0, (double) -exponent)) + if (fabs (i->output->f) > 1) { - dls_error (i, _("Underflow in floating-point constant.")); - i->v->f = 0.0; - return false; + i->output->f = SYSMIS; + ds_destroy (&tmp); + return xstrdup (_("Too-large number set to system-missing.")); } - - num *= pow (10.0, (double) exponent); - } - else if (exponent > 0) - { - if (num > DBL_MAX * pow (10.0, (double) -exponent)) + else { - dls_error (i, _("Overflow in floating-point constant.")); - i->v->f = SYSMIS; - return false; + i->output->f = 0.0; + ds_destroy (&tmp); + return xstrdup (_("Too-small number set to zero.")); } - - num *= pow (10.0, (double) exponent); } + else + errno = save_errno; - i->v->f = sign > 0 ? num : -num; - return true; + ds_destroy (&tmp); + return NULL; } -/* Returns the integer value of hex digit C. */ -static inline int -hexit_value (int c) -{ - const char s[] = "0123456789abcdef"; - const char *cp = strchr (s, tolower ((unsigned char) c)); - - assert (cp != NULL); - return cp - s; -} - -static inline bool +/* Parses N format. */ +static char * parse_N (struct data_in *i) { - const char *cp; + int c; - i->v->f = 0; - for (cp = i->s; cp < i->e; cp++) + i->output->f = 0; + while ((c = ss_get_byte (&i->input)) != EOF) { - if (!isdigit ((unsigned char) *cp)) - { - dls_error (i, _("All characters in field must be digits.")); - return false; - } - - i->v->f = i->v->f * 10.0 + (*cp - '0'); + if (!c_isdigit (c)) + return xstrdup (_("All characters in field must be digits.")); + i->output->f = i->output->f * 10.0 + (c - '0'); } - apply_implied_decimals (i); - return true; + return NULL; } -static inline bool +/* Parses PIBHEX format. */ +static char * parse_PIBHEX (struct data_in *i) { double n; - const char *cp; - - trim_whitespace (i); + int c; n = 0.0; - for (cp = i->s; cp < i->e; cp++) - { - if (!isxdigit ((unsigned char) *cp)) - { - dls_error (i, _("Unrecognized character in field.")); - return false; - } - n = n * 16.0 + hexit_value (*cp); + while ((c = ss_get_byte (&i->input)) != EOF) + { + if (!c_isxdigit (c)) + return xstrdup (_("Unrecognized character in field.")); + n = n * 16.0 + hexit_value (c); } - - i->v->f = n; - return true; + + i->output->f = n; + return NULL; } -static inline bool +/* Parses RBHEX format. */ +static char * parse_RBHEX (struct data_in *i) { - /* Validate input. */ - trim_whitespace (i); - if ((i->e - i->s) % 2) + double d; + size_t j; + + memset (&d, 0, sizeof d); + for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) { - dls_error (i, _("Field must have even length.")); - return false; + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); + if (lo == EOF) + return xstrdup (_("Field must have even length.")); + else if (!c_isxdigit (hi) || !c_isxdigit (lo)) + return xstrdup (_("Field must contain only hex digits.")); + ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo); } - - { - const char *cp; - - for (cp = i->s; cp < i->e; cp++) - if (!isxdigit ((unsigned char) *cp)) - { - dls_error (i, _("Field must contain only hex digits.")); - return false; - } - } - - /* Parse input. */ - { - union - { - double d; - unsigned char c[sizeof (double)]; - } - u; - int j; + i->output->f = d; + + return NULL; +} - memset (u.c, 0, sizeof u.c); - for (j = 0; j < min ((i->e - i->s) / 2, sizeof u.d); j++) - u.c[j] = 16 * hexit_value (i->s[j * 2]) + hexit_value (i->s[j * 2 + 1]); +/* Digits for Z format. */ +static const char z_digits[] = "0123456789{ABCDEFGHI}JKLMNOPQR"; - i->v->f = u.d; - } - - return true; +/* Returns true if C is a Z format digit, false otherwise. */ +static bool +is_z_digit (int c) +{ + return c > 0 && strchr (z_digits, c) != NULL; } -static inline bool -parse_Z (struct data_in *i) +/* Returns the (absolute value of the) value of C as a Z format + digit. */ +static int +z_digit_value (int c) { - char buf[64]; - bool got_dot = false; + assert (is_z_digit (c)); + return (strchr (z_digits, c) - z_digits) % 10; +} - /* Warn user that we suck. */ - { - static bool warned; +/* Returns true if Z format digit C represents a negative value, + false otherwise. */ +static bool +is_negative_z_digit (int c) +{ + assert (is_z_digit (c)); + return (strchr (z_digits, c) - z_digits) >= 20; +} - if (!warned) - { - msg (MW, - _("Quality of zoned decimal (Z) input format code is " - "suspect. Check your results three times. Report bugs " - "to %s."),PACKAGE_BUGREPORT); - warned = true; - } - } +/* Parses Z format. */ +static char * +parse_Z (struct data_in *i) +{ + struct string tmp; - /* Validate input. */ - trim_whitespace (i); + int save_errno; - if (i->e - i->s < 2) - { - dls_error (i, _("Zoned decimal field contains fewer than 2 " - "characters.")); - return false; - } + bool got_dot = false; + bool got_final_digit = false; - /* Copy sign into buf[0]. */ - if ((i->e[-1] & 0xc0) != 0xc0) - { - dls_error (i, _("Bad sign byte in zoned decimal number.")); - return false; - } - buf[0] = (i->e[-1] ^ (i->e[-1] >> 1)) & 0x10 ? '-' : '+'; + /* Trim spaces and check for missing value representation. */ + if (trim_spaces_and_check_missing (i)) + return NULL; - /* Copy digits into buf[1 ... len - 1] and terminate string. */ - { - const char *sp; - char *dp; + ds_init_empty (&tmp); + ds_extend (&tmp, 64); - for (sp = i->s, dp = buf + 1; sp < i->e - 1; sp++, dp++) - if (*sp == '.') + ds_put_byte (&tmp, '+'); + while (!ss_is_empty (i->input)) + { + int c = ss_get_byte (&i->input); + if (c_isdigit (c) && !got_final_digit) + ds_put_byte (&tmp, c); + else if (is_z_digit (c) && !got_final_digit) + { + ds_put_byte (&tmp, z_digit_value (c) + '0'); + if (is_negative_z_digit (c)) + ds_data (&tmp)[0] = '-'; + got_final_digit = true; + } + else if (c == '.' && !got_dot) { - *dp = '.'; + ds_put_byte (&tmp, '.'); got_dot = true; } - else if ((*sp & 0xf0) == 0xf0 && (*sp & 0xf) < 10) - *dp = (*sp & 0xf) + '0'; else - { - dls_error (i, _("Format error in zoned decimal number.")); - return false; - } + { + ds_destroy (&tmp); + return xstrdup (_("Invalid zoned decimal syntax.")); + } + } - *dp = '\0'; - } + if (!ss_is_empty (i->input)) + { + char *error; - /* Parse as number. */ - { - char *tail; - - i->v->f = strtod (buf, &tail); - if (tail != i->e) - { - dls_error (i, _("Error in syntax of zoned decimal number.")); - return false; - } - } + if (ds_length (&tmp) == 1) + error = xstrdup (_("Field contents are not numeric.")); + else + error = xstrdup (_("Number followed by garbage.")); - if (!got_dot) - apply_implied_decimals (i); + ds_destroy (&tmp); + return error; + } - return true; + /* Let c_strtod() do the conversion. */ + save_errno = errno; + errno = 0; + i->output->f = c_strtod (ds_cstr (&tmp), NULL); + if (errno == ERANGE) + { + if (fabs (i->output->f) > 1) + { + i->output->f = SYSMIS; + ds_destroy (&tmp); + return xstrdup (_("Too-large number set to system-missing.")); + } + else + { + i->output->f = 0.0; + ds_destroy (&tmp); + return xstrdup (_("Too-small number set to zero.")); + } + } + else + errno = save_errno; + + ds_destroy (&tmp); + return NULL; } -static inline bool +/* Parses IB format. */ +static char * parse_IB (struct data_in *i) { -#ifndef WORDS_BIGENDIAN - char buf[64]; -#endif - const unsigned char *p; - - unsigned char xor; - - /* We want the data to be in big-endian format. If this is a - little-endian machine, reverse the byte order. */ -#ifdef WORDS_BIGENDIAN - p = (const unsigned char *) i->s; -#else - memcpy (buf, i->s, i->e - i->s); - buf_reverse (buf, i->e - i->s); - p = (const unsigned char *) buf; -#endif - - /* If the value is negative, we need to logical-NOT each value - before adding it. */ - if (p[0] & 0x80) - xor = 0xff; - else - xor = 0x00; - - { - int j; + size_t bytes; + uint64_t value; + uint64_t sign_bit; - i->v->f = 0.0; - for (j = 0; j < i->e - i->s; j++) - i->v->f = i->v->f * 256.0 + (p[j] ^ xor); - } + bytes = MIN (8, ss_length (i->input)); + value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes); - /* If the value is negative, add 1 and set the sign, to complete a - two's-complement negation. */ - if (p[0] & 0x80) - i->v->f = -(i->v->f + 1.0); - - apply_implied_decimals (i); + sign_bit = UINT64_C(1) << (8 * bytes - 1); + if (!(value & sign_bit)) + i->output->f = value; + else + { + /* Sign-extend to full 64 bits. */ + value -= sign_bit << 1; + i->output->f = -(double) -value; + } - return true; + return NULL; } -static inline bool +/* Parses PIB format. */ +static char * parse_PIB (struct data_in *i) { - int j; - - i->v->f = 0.0; -#if WORDS_BIGENDIAN - for (j = 0; j < i->e - i->s; j++) - i->v->f = i->v->f * 256.0 + (unsigned char) i->s[j]; -#else - for (j = i->e - i->s - 1; j >= 0; j--) - i->v->f = i->v->f * 256.0 + (unsigned char) i->s[j]; -#endif + i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input), + MIN (8, ss_length (i->input))); - apply_implied_decimals (i); + return NULL; +} - return true; +/* Consumes the first character of S. Stores its high 4 bits in + HIGH_NIBBLE and its low 4 bits in LOW_NIBBLE. */ +static void +get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) +{ + int c = ss_get_byte (s); + assert (c != EOF); + *high_nibble = (c >> 4) & 15; + *low_nibble = c & 15; } -static inline bool +/* Parses P format. */ +static char * parse_P (struct data_in *i) { - const char *cp; + int high_nibble, low_nibble; + + i->output->f = 0.0; - i->v->f = 0.0; - for (cp = i->s; cp < i->e - 1; cp++) + while (ss_length (i->input) > 1) { - i->v->f = i->v->f * 10 + ((*cp >> 4) & 15); - i->v->f = i->v->f * 10 + (*cp & 15); + get_nibbles (&i->input, &high_nibble, &low_nibble); + if (high_nibble > 9 || low_nibble > 9) + return xstrdup (_("Invalid syntax for P field.")); + i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } - i->v->f = i->v->f * 10 + ((*cp >> 4) & 15); - if ((*cp ^ (*cp >> 1)) & 0x10) - i->v->f = -i->v->f; - apply_implied_decimals (i); + get_nibbles (&i->input, &high_nibble, &low_nibble); + if (high_nibble > 9) + return xstrdup (_("Invalid syntax for P field.")); + i->output->f = (10 * i->output->f) + high_nibble; + if (low_nibble < 10) + i->output->f = (10 * i->output->f) + low_nibble; + else if (low_nibble == 0xb || low_nibble == 0xd) + i->output->f = -i->output->f; - return true; + return NULL; } -static inline bool +/* Parses PK format. */ +static char * parse_PK (struct data_in *i) { - const char *cp; - - i->v->f = 0.0; - for (cp = i->s; cp < i->e; cp++) + i->output->f = 0.0; + while (!ss_is_empty (i->input)) { - i->v->f = i->v->f * 10 + ((*cp >> 4) & 15); - i->v->f = i->v->f * 10 + (*cp & 15); - } + int high_nibble, low_nibble; - apply_implied_decimals (i); + get_nibbles (&i->input, &high_nibble, &low_nibble); + if (high_nibble > 9 || low_nibble > 9) + { + i->output->f = SYSMIS; + return NULL; + } + i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; + } - return true; + return NULL; } -static inline bool +/* Parses RB format. */ +static char * parse_RB (struct data_in *i) { - union - { - double d; - unsigned char c[sizeof (double)]; - } - u; - - memset (u.c, 0, sizeof u.c); - memcpy (u.c, i->s, min (sizeof u.c, (size_t) (i->e - i->s))); - i->v->f = u.d; + enum float_format ff = settings_get_input_float_format (); + size_t size = float_get_size (ff); + if (ss_length (i->input) >= size) + float_convert (ff, ss_data (i->input), + FLOAT_NATIVE_DOUBLE, &i->output->f); + else + i->output->f = SYSMIS; - return true; + return NULL; } -static inline bool +/* Parses A format. */ +static char * parse_A (struct data_in *i) { - buf_copy_rpad (i->v->s, i->format.w, i->s, i->e - i->s); - return true; + /* This is equivalent to buf_copy_rpad, except that we posibly + do a character set recoding in the middle. */ + uint8_t *dst = value_str_rw (i->output, i->width); + size_t dst_size = i->width; + const char *src = ss_data (i->input); + size_t src_size = ss_length (i->input); + + memcpy (dst, src, MIN (src_size, dst_size)); + + if (dst_size > src_size) + memset (&dst[src_size], ' ', dst_size - src_size); + + return NULL; } -static inline bool +/* Parses AHEX format. */ +static char * parse_AHEX (struct data_in *i) { - /* Validate input. */ - trim_whitespace (i); - if ((i->e - i->s) % 2) + uint8_t *s = value_str_rw (i->output, i->width); + size_t j; + + for (j = 0; ; j++) { - dls_error (i, _("Field must have even length.")); - return false; + int hi = ss_get_byte (&i->input); + int lo = ss_get_byte (&i->input); + if (hi == EOF) + break; + else if (lo == EOF) + return xstrdup (_("Field must have even length.")); + + if (!c_isxdigit (hi) || !c_isxdigit (lo)) + return xstrdup (_("Field must contain only hex digits.")); + + if (j < i->width) + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - { - const char *cp; - - for (cp = i->s; cp < i->e; cp++) - if (!isxdigit ((unsigned char) *cp)) - { - dls_error (i, _("Field must contain only hex digits.")); - return false; - } - } - - { - int j; - - /* Parse input. */ - for (j = 0; j < min (i->e - i->s, i->format.w); j += 2) - i->v->s[j / 2] = hexit_value (i->s[j]) * 16 + hexit_value (i->s[j + 1]); - memset (i->v->s + (i->e - i->s) / 2, ' ', (i->format.w - (i->e - i->s)) / 2); - } - - return true; + memset (&s[j], ' ', i->width - j); + + return NULL; } /* Date & time format components. */ -/* Advances *CP past any whitespace characters. */ -static inline void -skip_whitespace (struct data_in *i) -{ - while (isspace ((unsigned char) *i->s)) - i->s++; -} +/* Sign of a time value. */ +enum time_sign + { + SIGN_NO_TIME, /* No time yet encountered. */ + SIGN_POSITIVE, /* Positive time. */ + SIGN_NEGATIVE /* Negative time. */ + }; -static inline bool -parse_leader (struct data_in *i) +/* Parses a signed decimal integer from at most the first + MAX_DIGITS characters in I, storing the result into *RESULT. + Returns true if successful, false if no integer was + present. */ +static char * WARN_UNUSED_RESULT +parse_int (struct data_in *i, long *result, size_t max_digits) { - skip_whitespace (i); - return true; + struct substring head = ss_head (i->input, max_digits); + size_t n = ss_get_long (&head, result); + if (n) + { + ss_advance (&i->input, n); + return NULL; + } + else + return xstrdup (_("Syntax error in date field.")); } -static inline bool -force_have_char (struct data_in *i) +/* Parses a date integer between 1 and 31 from I, storing it into + *DAY. + Returns true if successful, false if no date was present. */ +static char * +parse_day (struct data_in *i, long *day) { - if (have_char (i)) - return true; + char *error = parse_int (i, day, SIZE_MAX); + if (error != NULL) + return error; + if (*day >= 1 && *day <= 31) + return NULL; - dls_error (i, _("Unexpected end of field.")); - return false; + return xasprintf (_("Day (%ld) must be between 1 and 31."), *day); } -static bool -parse_int (struct data_in *i, long *result) -{ - bool negative = false; - - if (!force_have_char (i)) - return false; +/* Parses an integer from the beginning of I. + Adds SECONDS_PER_UNIT times the absolute value of the integer + to *TIME. + If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the + time and sets *TIME_SIGN. Otherwise, does not allow a sign. + Returns true if successful, false if no integer was present. */ +static char * +parse_time_units (struct data_in *i, double seconds_per_unit, + enum time_sign *time_sign, double *time) - if (*i->s == '+') - { - i->s++; - force_have_char (i); - } - else if (*i->s == '-') - { - negative = true; - i->s++; - force_have_char (i); - } - - if (!isdigit ((unsigned char) *i->s)) - { - dls_error (i, _("Digit expected in field.")); - return false; - } +{ + char *error; + long units; - *result = 0; - for (;;) + if (*time_sign == SIGN_NO_TIME) { - *result = *result * 10 + (*i->s++ - '0'); - if (!have_char (i) || !isdigit ((unsigned char) *i->s)) - break; + if (ss_match_byte (&i->input, '-')) + *time_sign = SIGN_NEGATIVE; + else + { + ss_match_byte (&i->input, '+'); + *time_sign = SIGN_POSITIVE; + } } - - if (negative) - *result = -*result; - return true; + error = parse_int (i, &units, SIZE_MAX); + if (error != NULL) + return error; + if (units < 0) + return xstrdup (_("Syntax error in date field.")); + *time += units * seconds_per_unit; + return NULL; } -static bool -parse_day (struct data_in *i, long *day) +/* Parses a data delimiter from the beginning of I. + Returns true if successful, false if no delimiter was + present. */ +static char * +parse_date_delimiter (struct data_in *i) { - if (!parse_int (i, day)) - return false; - if (*day >= 1 && *day <= 31) - return true; + if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES))) + return NULL; - dls_error (i, _("Day (%ld) must be between 1 and 31."), *day); - return false; + return xstrdup (_("Delimiter expected between fields in date.")); } -static bool -parse_day_count (struct data_in *i, long *day_count) +/* Parses spaces at the beginning of I. */ +static void +parse_spaces (struct data_in *i) { - return parse_int (i, day_count); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } -static bool -parse_date_delimiter (struct data_in *i) +static struct substring +parse_name_token (struct data_in *i) { - bool delim = false; - - while (have_char (i) - && (*i->s == '-' || *i->s == '/' || isspace ((unsigned char) *i->s) - || *i->s == '.' || *i->s == ',')) - { - delim = true; - i->s++; - } - if (delim) - return true; - - dls_error (i, _("Delimiter expected between fields in date.")); - return false; + struct substring token; + ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); + return token; } -/* Association between a name and a value. */ -struct enum_name - { - const char *name; /* Name. */ - bool can_abbreviate; /* True if name may be abbreviated. */ - int value; /* Value associated with name. */ - }; - /* Reads a name from I and sets *OUTPUT to the value associated - with that name. Returns true if successful, false otherwise. */ + with that name. If ALLOW_SUFFIXES is true, then names that + begin with one of the names are accepted; otherwise, only + exact matches (except for case) are allowed. + Returns true if successful, false otherwise. */ static bool -parse_enum (struct data_in *i, const char *what, - const struct enum_name *enum_names, - long *output) +match_name (struct substring token, const char *const *names, long *output) { - const char *name; - size_t length; - const struct enum_name *ep; - - /* Consume alphabetic characters. */ - name = i->s; - length = 0; - while (have_char (i) && isalpha ((unsigned char) *i->s)) - { - length++; - i->s++; - } - if (length == 0) - { - dls_error (i, _("Parse error at `%c' expecting %s."), *i->s, what); - return false; - } + int i; - for (ep = enum_names; ep->name != NULL; ep++) - if ((ep->can_abbreviate - && lex_id_match_len (ep->name, strlen (ep->name), name, length)) - || (!ep->can_abbreviate && length == strlen (ep->name) - && !buf_compare_case (name, ep->name, length))) + for (i = 1; *names != NULL; i++) + if (ss_equals_case (ss_cstr (*names++), token)) { - *output = ep->value; + *output = i; return true; } - dls_error (i, _("Unknown %s `%.*s'."), what, (int) length, name); return false; } -static bool +/* Parses a month name or number from the beginning of I, + storing the month (in range 1...12) into *MONTH. + Returns true if successful, false if no month was present. */ +static char * parse_month (struct data_in *i, long *month) { - static const struct enum_name month_names[] = - { - {"january", true, 1}, - {"february", true, 2}, - {"march", true, 3}, - {"april", true, 4}, - {"may", true, 5}, - {"june", true, 6}, - {"july", true, 7}, - {"august", true, 8}, - {"september", true, 9}, - {"october", true, 10}, - {"november", true, 11}, - {"december", true, 12}, - - {"i", false, 1}, - {"ii", false, 2}, - {"iii", false, 3}, - {"iv", false, 4}, - {"iiii", false, 4}, - {"v", false, 5}, - {"vi", false, 6}, - {"vii", false, 7}, - {"viii", false, 8}, - {"ix", false, 9}, - {"viiii", false, 9}, - {"x", false, 10}, - {"xi", false, 11}, - {"xii", false, 12}, - - {NULL, false, 0}, - }; - - if (!force_have_char (i)) - return false; - - if (isdigit ((unsigned char) *i->s)) + if (c_isdigit (ss_first (i->input))) { - if (!parse_int (i, month)) - return false; + char *error = parse_int (i, month, SIZE_MAX); + if (error != NULL) + return error; if (*month >= 1 && *month <= 12) - return true; - - dls_error (i, _("Month (%ld) must be between 1 and 12."), *month); - return false; + return NULL; } - else - return parse_enum (i, _("month"), month_names, month); -} - -static bool -parse_year (struct data_in *i, long *year) -{ - if (!parse_int (i, year)) - return false; - - if (*year >= 0 && *year <= 199) - *year += 1900; - if (*year >= 1582 || *year <= 19999) - return true; + else + { + static const char *const english_names[] = + { + "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec", + NULL, + }; - dls_error (i, _("Year (%ld) must be between 1582 and 19999."), *year); - return false; -} + static const char *const roman_names[] = + { + "i", "ii", "iii", "iv", "v", "vi", + "vii", "viii", "ix", "x", "xi", "xii", + NULL, + }; + + struct substring token = parse_name_token (i); + if (match_name (ss_head (token, 3), english_names, month) + || match_name (ss_head (token, 4), roman_names, month)) + return NULL; + } -static bool -parse_trailer (struct data_in *i) -{ - skip_whitespace (i); - if (!have_char (i)) - return true; - - dls_error (i, _("Trailing garbage \"%s\" following date."), i->s); - return false; + return xstrdup (_("Unrecognized month format. Months may be specified " + "as Arabic or Roman numerals or as at least 3 letters " + "of their English names.")); } -static bool -parse_julian (struct data_in *i, long *julian) +/* Parses a year of at most MAX_DIGITS from the beginning of I, + storing a "4-digit" year into *YEAR. */ +static char * +parse_year (struct data_in *i, long *year, size_t max_digits) { - if (!parse_int (i, julian)) - return false; - - { - int day = *julian % 1000; + char *error = parse_int (i, year, max_digits); + if (error != NULL) + return error; - if (day < 1 || day > 366) - { - dls_error (i, _("Julian day (%d) must be between 1 and 366."), day); - return false; - } - } - - { - int year = *julian / 1000; - - if (year >= 0 && year <= 199) - *julian += 1900000L; - else if (year < 1582 || year > 19999) - { - dls_error (i, _("Year (%d) must be between 1582 and 19999."), year); - return false; - } - } + if (*year >= 0 && *year <= 99) + { + int epoch = settings_get_epoch (); + int epoch_century = ROUND_DOWN (epoch, 100); + int epoch_offset = epoch - epoch_century; + if (*year >= epoch_offset) + *year += epoch_century; + else + *year += epoch_century + 100; + } + if (*year >= 1582 || *year <= 19999) + return NULL; - return true; + return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year); } -static bool -parse_quarter (struct data_in *i, long *quarter) +/* Returns true if input in I has been exhausted, + false otherwise. */ +static char * +parse_trailer (struct data_in *i) { - if (!parse_int (i, quarter)) - return false; - if (*quarter >= 1 && *quarter <= 4) - return true; + if (ss_is_empty (i->input)) + return NULL; - dls_error (i, _("Quarter (%ld) must be between 1 and 4."), *quarter); - return false; + return xasprintf (_("Trailing garbage `%.*s' following date."), + (int) ss_length (i->input), ss_data (i->input)); } -static bool -parse_q_delimiter (struct data_in *i) +/* Parses a 3-digit Julian day-of-year value from I into *YDAY. + Returns true if successful, false on failure. */ +static char * +parse_yday (struct data_in *i, long *yday) { - skip_whitespace (i); - if (!have_char (i) || tolower ((unsigned char) *i->s) != 'q') - { - dls_error (i, _("`Q' expected between quarter and year.")); - return false; - } - i->s++; - skip_whitespace (i); - return true; -} + struct substring num_s; + long num; -static bool -parse_week (struct data_in *i, long *week) -{ - if (!parse_int (i, week)) - return false; - if (*week >= 1 && *week <= 53) - return true; + ss_get_bytes (&i->input, 3, &num_s); + if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3) + return xstrdup (_("Julian day must have exactly three digits.")); + else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366) + return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num); - dls_error (i, _("Week (%ld) must be between 1 and 53."), *week); - return false; + *yday = num; + return NULL; } -static bool -parse_wk_delimiter (struct data_in *i) +/* Parses a quarter-of-year integer between 1 and 4 from I. + Stores the corresponding month into *MONTH. + Returns true if successful, false if no quarter was present. */ +static char * +parse_quarter (struct data_in *i, long int *month) { - skip_whitespace (i); - if (i->s + 1 >= i->e - || tolower ((unsigned char) i->s[0]) != 'w' - || tolower ((unsigned char) i->s[1]) != 'k') + long quarter; + char *error; + + error = parse_int (i, &quarter, SIZE_MAX); + if (error != NULL) + return error; + if (quarter >= 1 && quarter <= 4) { - dls_error (i, _("`WK' expected between week and year.")); - return false; + *month = (quarter - 1) * 3 + 1; + return NULL; } - i->s += 2; - skip_whitespace (i); - return true; + + return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter); } -static bool -parse_time_delimiter (struct data_in *i) +/* Parses a week-of-year integer between 1 and 53 from I, + Stores the corresponding year-of-day into *YDAY. + Returns true if successful, false if no week was present. */ +static char * +parse_week (struct data_in *i, long int *yday) { - bool delim = false; + char *error; + long week; - while (have_char (i) && (*i->s == ':' || *i->s == '.' - || isspace ((unsigned char) *i->s))) + error = parse_int (i, &week, SIZE_MAX); + if (error != NULL) + return error; + if (week >= 1 && week <= 53) { - delim = true; - i->s++; + *yday = (week - 1) * 7 + 1; + return NULL; } - if (delim) - return true; - - dls_error (i, _("Delimiter expected between fields in time.")); - return false; + return xasprintf (_("Week (%ld) must be between 1 and 53."), week); } -static bool -parse_hour (struct data_in *i, long *hour) +/* Parses a time delimiter from the beginning of I. + Returns true if successful, false if no delimiter was + present. */ +static char * +parse_time_delimiter (struct data_in *i) { - if (!parse_int (i, hour)) - return false; - if (*hour >= 0) - return true; - - dls_error (i, _("Hour (%ld) must be positive."), *hour); - return false; -} + if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0) + return NULL; -static bool -parse_minute (struct data_in *i, long *minute) -{ - if (!parse_int (i, minute)) - return false; - if (*minute >= 0 && *minute <= 59) - return true; - - dls_error (i, _("Minute (%ld) must be between 0 and 59."), *minute); - return false; + return xstrdup (_("Delimiter expected between fields in time.")); } -static bool -parse_opt_second (struct data_in *i, double *second) +/* Parses minutes and optional seconds from the beginning of I. + The time is converted into seconds, which are added to + *TIME. + Returns true if successful, false if an error was found. */ +static char * +parse_minute_second (struct data_in *i, double *time) { - bool delim = false; - + long minute; char buf[64]; + char *error; char *cp; - while (have_char (i) - && (*i->s == ':' || *i->s == '.' || isspace ((unsigned char) *i->s))) - { - delim = true; - i->s++; - } - - if (!delim || !isdigit ((unsigned char) *i->s)) - { - *second = 0.0; - return true; - } + /* Parse minutes. */ + error = parse_int (i, &minute, SIZE_MAX); + if (error != NULL) + return error; + if (minute < 0 || minute > 59) + return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute); + *time += 60. * minute; + + /* Check for seconds. */ + if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0 + || !c_isdigit (ss_first (i->input))) + return NULL; + /* Parse seconds. */ cp = buf; - while (have_char (i) && isdigit ((unsigned char) *i->s)) - *cp++ = *i->s++; - if (have_char (i) && *i->s == '.') - *cp++ = *i->s++; - while (have_char (i) && isdigit ((unsigned char) *i->s)) - *cp++ = *i->s++; + while (c_isdigit (ss_first (i->input))) + *cp++ = ss_get_byte (&i->input); + if (ss_match_byte (&i->input, settings_get_decimal_char (FMT_F))) + *cp++ = '.'; + while (c_isdigit (ss_first (i->input))) + *cp++ = ss_get_byte (&i->input); *cp = '\0'; - - *second = strtod (buf, NULL); - return true; -} + *time += strtod (buf, NULL); -static bool -parse_hour24 (struct data_in *i, long *hour24) -{ - if (!parse_int (i, hour24)) - return false; - if (*hour24 >= 0 && *hour24 <= 23) - return true; - - dls_error (i, _("Hour (%ld) must be between 0 and 23."), *hour24); - return false; + return NULL; } - -static bool +/* Parses a weekday name from the beginning of I, + storing a value of 1=Sunday...7=Saturday into *WEEKDAY. + Returns true if successful, false if an error was found. */ +static char * parse_weekday (struct data_in *i, long *weekday) { - static const struct enum_name weekday_names[] = + static const char *const weekday_names[] = { - {"sunday", true, 1}, - {"su", true, 1}, - {"monday", true, 2}, - {"mo", true, 2}, - {"tuesday", true, 3}, - {"tu", true, 3}, - {"wednesday", true, 4}, - {"we", true, 4}, - {"thursday", true, 5}, - {"th", true, 5}, - {"friday", true, 6}, - {"fr", true, 6}, - {"saturday", true, 7}, - {"sa", true, 7}, - - {NULL, false, 0}, + "su", "mo", "tu", "we", "th", "fr", "sa", + NULL, }; - return parse_enum (i, _("weekday"), weekday_names, weekday); -} - -static bool -parse_spaces (struct data_in *i) -{ - skip_whitespace (i); - return true; -} - -static bool -parse_sign (struct data_in *i, int *sign) -{ - if (!force_have_char (i)) - return false; - - switch (*i->s) - { - case '-': - i->s++; - *sign = -1; - break; - - case '+': - i->s++; - /* fall through */ - - default: - *sign = 1; - break; - } - - return true; + struct substring token = parse_name_token (i); + bool ok = match_name (ss_head (token, 2), weekday_names, weekday); + if (!ok) + return xstrdup (_("Unrecognized weekday name. At least the first two " + "letters of an English weekday name must be " + "specified.")); + return NULL; } /* Date & time formats. */ -static void -calendar_error (void *i_, const char *format, ...) -{ - struct data_in *i = i_; - va_list args; - - va_start (args, format); - vdls_error (i, format, args); - va_end (args); -} - -static bool -ymd_to_ofs (struct data_in *i, int year, int month, int day, double *ofs) +/* Parses WKDAY format. */ +static char * +parse_WKDAY (struct data_in *i) { - *ofs = calendar_gregorian_to_offset (year, month, day, calendar_error, i); - return *ofs != SYSMIS; -} + long weekday; + char *error; -static bool -ymd_to_date (struct data_in *i, int year, int month, int day, double *date) -{ - if (ymd_to_ofs (i, year, month, day, date)) - { - *date *= 60. * 60. * 24.; - return true; - } - else - return false; -} + if (trim_spaces_and_check_missing (i)) + return NULL; -static bool -parse_DATE (struct data_in *i) -{ - long day, month, year; - - return (parse_leader (i) - && parse_day (i, &day) - && parse_date_delimiter (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); -} + error = parse_weekday (i, &weekday); + if (error == NULL) + error = parse_trailer (i); -static bool -parse_ADATE (struct data_in *i) -{ - long month, day, year; - - return (parse_leader (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_day (i, &day) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); + i->output->f = weekday; + return error; } -static bool -parse_EDATE (struct data_in *i) +/* Parses MONTH format. */ +static char * +parse_MONTH (struct data_in *i) { - long month, day, year; - - return (parse_leader (i) - && parse_day (i, &day) - && parse_date_delimiter (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); -} + long month; + char *error; -static bool -parse_SDATE (struct data_in *i) -{ - long month, day, year; - - return (parse_leader (i) - && parse_year (i, &year) - && parse_date_delimiter (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_day (i, &day) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); -} + if (trim_spaces_and_check_missing (i)) + return NULL; -static bool -parse_JDATE (struct data_in *i) -{ - long julian; - double ofs; - - if (!parse_leader (i) - || !parse_julian (i, &julian) - || !parse_trailer (i) - || !ymd_to_ofs (i, julian / 1000, 1, 1, &ofs)) - return false; - - i->v->f = (ofs + julian % 1000 - 1) * 60. * 60. * 24.; - return true; -} + error = parse_month (i, &month); + if (error == NULL) + error = parse_trailer (i); -static bool -parse_QYR (struct data_in *i) -{ - long quarter, year; - - return (parse_leader (i) - && parse_quarter (i, &quarter) - && parse_q_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, (quarter - 1) * 3 + 1, 1, &i->v->f)); + i->output->f = month; + return error; } -static bool -parse_MOYR (struct data_in *i) +/* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR, + DATETIME, TIME and DTIME formats. */ +static char * +parse_date (struct data_in *i) { - long month, year; - - return (parse_leader (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, 1, &i->v->f)); -} + long int year = INT_MIN; + long int month = 1; + long int day = 1; + long int yday = 1; + double time = 0, date = 0; + enum time_sign time_sign = SIGN_NO_TIME; -static bool -parse_WKYR (struct data_in *i) -{ - long week, year; - double ofs; + const char *template = fmt_date_template (i->format); + size_t template_width = strlen (template); + char *error; - if (!parse_leader (i) - || !parse_week (i, &week) - || !parse_wk_delimiter (i) - || !parse_year (i, &year) - || !parse_trailer (i)) - return false; + if (trim_spaces_and_check_missing (i)) + return NULL; - if (year != 1582) + while (*template != '\0') { - if (!ymd_to_ofs (i, year, 1, 1, &ofs)) - return false; - } - else - { - if (ymd_to_ofs (i, 1583, 1, 1, &ofs)) - return false; - ofs -= 365; - } + unsigned char ch = *template; + int count = 1; - i->v->f = (ofs + (week - 1) * 7) * 60. * 60. * 24.; - return true; -} + while (template[count] == ch) + count++; + template += count; -static bool -parse_TIME (struct data_in *i) -{ - int sign; - double second; - long hour, minute; - - if (!parse_leader (i) - || !parse_sign (i, &sign) - || !parse_spaces (i) - || !parse_hour (i, &hour) - || !parse_time_delimiter (i) - || !parse_minute (i, &minute) - || !parse_opt_second (i, &second)) - return false; - - i->v->f = (hour * 60. * 60. + minute * 60. + second) * sign; - return true; -} - -static bool -parse_DTIME (struct data_in *i) -{ - int sign; - long day_count, hour; - double second; - long minute; - - if (!parse_leader (i) - || !parse_sign (i, &sign) - || !parse_spaces (i) - || !parse_day_count (i, &day_count) - || !parse_time_delimiter (i) - || !parse_hour (i, &hour) - || !parse_time_delimiter (i) - || !parse_minute (i, &minute) - || !parse_opt_second (i, &second)) - return false; - - i->v->f = (day_count * 60. * 60. * 24. - + hour * 60. * 60. - + minute * 60. - + second) * sign; - return true; -} - -static bool -parse_DATETIME (struct data_in *i) -{ - long day, month, year; - long hour24; - double second; - long minute; - - if (!parse_leader (i) - || !parse_day (i, &day) - || !parse_date_delimiter (i) - || !parse_month (i, &month) - || !parse_date_delimiter (i) - || !parse_year (i, &year) - || !parse_time_delimiter (i) - || !parse_hour24 (i, &hour24) - || !parse_time_delimiter (i) - || !parse_minute (i, &minute) - || !parse_opt_second (i, &second) - || !ymd_to_date (i, year, month, day, &i->v->f)) - return false; - - i->v->f += hour24 * 60. * 60. + minute * 60. + second; - return true; -} - -static bool -parse_WKDAY (struct data_in *i) -{ - long weekday; - - if (!parse_leader (i) - || !parse_weekday (i, &weekday) - || !parse_trailer (i)) - return false; - - i->v->f = weekday; - return true; -} + switch (ch) + { + case 'd': + error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday); + break; + case 'm': + error = parse_month (i, &month); + break; + case 'y': + { + size_t max_digits; + if (!c_isalpha (*template)) + max_digits = SIZE_MAX; + else + { + if (ss_length (i->input) >= template_width + 2) + max_digits = 4; + else + max_digits = 2; + } + error = parse_year (i, &year, max_digits); + } + break; + case 'q': + error = parse_quarter (i, &month); + break; + case 'w': + error = parse_week (i, &yday); + break; + case 'D': + error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); + break; + case 'H': + error = parse_time_units (i, 60. * 60., &time_sign, &time); + break; + case 'M': + error = parse_minute_second (i, &time); + break; + case '-': + case '/': + case '.': + case 'X': + error = parse_date_delimiter (i); + break; + case ':': + error = parse_time_delimiter (i); + case ' ': + parse_spaces (i); + error = NULL; + break; + default: + assert (count == 1); + if (!ss_match_byte (&i->input, c_toupper (ch)) + && !ss_match_byte (&i->input, c_tolower (ch))) + error = xasprintf (_("`%c' expected in date field."), ch); + else + error = NULL; + break; + } + if (error != NULL) + return error; + } + error = parse_trailer (i); + if (error != NULL) + return error; -static bool -parse_MONTH (struct data_in *i) -{ - long month; + if (year != INT_MIN) + { + char *error; + double ofs; - if (!parse_leader (i) - || !parse_month (i, &month) - || !parse_trailer (i)) - return false; + ofs = calendar_gregorian_to_offset (year, month, day, &error); + if (ofs == SYSMIS) + return error; + date = (yday - 1 + ofs) * 60. * 60. * 24.; + } + else + date = 0.; + i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time); - i->v->f = month; - return true; + return NULL; } -/* Main dispatcher. */ +/* Utility functions. */ +/* Sets the default result for I. + For a numeric format, this is the value set on SET BLANKS + (typically system-missing); for a string format, it is all + spaces. */ static void default_result (struct data_in *i) { - const struct fmt_desc *const fmt = &formats[i->format.type]; - - /* Default to SYSMIS or blanks. */ - if (fmt->cat & FCAT_STRING) - memset (i->v->s, ' ', i->format.w); + if (fmt_is_string (i->format)) + memset (value_str_rw (i->output, i->width), ' ', i->width); else - i->v->f = get_blanks(); + i->output->f = settings_get_blanks (); } -bool -data_in (struct data_in *i) +/* Trims leading and trailing spaces from I. + If the result is empty, or a single period character, then + sets the default result and returns true; otherwise, returns + false. */ +static bool +trim_spaces_and_check_missing (struct data_in *i) { - const struct fmt_desc *const fmt = &formats[i->format.type]; - - assert (check_input_specifier (&i->format, 0)); - - /* Check that we've got a string to work with. */ - if (i->e == i->s || i->format.w <= 0) + ss_trim (&i->input, ss_cstr (" ")); + if (ss_is_empty (i->input) || ss_equals (i->input, ss_cstr ("."))) { default_result (i); return true; } - - i->f2 = i->f1 + (i->e - i->s) - 1; - - /* Make sure that the string isn't too long. */ - if (i->format.w > fmt->Imax_w) - { - dls_error (i, _("Field too long (%d characters). Truncated after " - "character %d."), - i->format.w, fmt->Imax_w); - i->format.w = fmt->Imax_w; - } - - if (fmt->cat & FCAT_BLANKS_SYSMIS) - { - const char *cp; - - cp = i->s; - for (;;) - { - if (!isspace ((unsigned char) *cp)) - break; - - if (++cp == i->e) - { - i->v->f = get_blanks(); - return true; - } - } - } - - { - static bool (*const handlers[FMT_NUMBER_OF_FORMATS])(struct data_in *) = - { - parse_numeric, parse_N, parse_numeric, parse_numeric, - parse_numeric, parse_numeric, parse_numeric, - parse_Z, parse_A, parse_AHEX, parse_IB, parse_P, parse_PIB, - parse_PIBHEX, parse_PK, parse_RB, parse_RBHEX, - NULL, NULL, NULL, NULL, NULL, - parse_DATE, parse_EDATE, parse_SDATE, parse_ADATE, parse_JDATE, - parse_QYR, parse_MOYR, parse_WKYR, - parse_DATETIME, parse_TIME, parse_DTIME, - parse_WKDAY, parse_MONTH, - }; - - bool (*handler)(struct data_in *); - bool success; - - handler = handlers[i->format.type]; - assert (handler != NULL); - - success = handler (i); - if (!success) - default_result (i); - - return success; - } + return false; } - -/* Utility function. */ -/* Sets DI->{s,e} appropriately given that LINE has length LEN and the - field starts at one-based column FC and ends at one-based column - LC, inclusive. */ -void -data_in_finite_line (struct data_in *di, const char *line, size_t len, - int fc, int lc) +/* Returns the integer value of hex digit C. */ +static int +hexit_value (int c) { - di->s = line + ((size_t) fc <= len ? fc - 1 : len); - di->e = line + ((size_t) lc <= len ? lc : len); + const char s[] = "0123456789abcdef"; + const char *cp = strchr (s, c_tolower ((unsigned char) c)); + + assert (cp != NULL); + return cp - s; }