X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=src%2Fdata%2Fdata-in.c;h=d36aefb5e8144bb1e2207fb613d2ec6d897b63c2;hb=3da49359c52cb783db907cc197847bbd5e721c97;hp=7ecded2f9333f8235488c8f676692c57ba44b5fd;hpb=0fa141762183890ebd139ccd9264f08db9011539;p=pspp diff --git a/src/data/data-in.c b/src/data/data-in.c index 7ecded2f93..d36aefb5e8 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -1,21 +1,18 @@ -/* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. - Written by Ben Pfaff . +/* PSPP - a program for statistical analysis. + Copyright (C) 1997-9, 2000, 2006, 2009, 2010 Free Software Foundation, Inc. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA - 02110-1301, USA. */ + along with this program. If not, see . */ #include @@ -23,31 +20,33 @@ #include #include +#include #include #include +#include #include #include #include #include -#include #include "calendar.h" +#include "dictionary.h" +#include "format.h" #include "identifier.h" +#include "libpspp/assertion.h" +#include "libpspp/compiler.h" +#include "libpspp/i18n.h" +#include "libpspp/integer-format.h" +#include "libpspp/legacy-encoding.h" +#include "libpspp/misc.h" +#include "libpspp/str.h" #include "settings.h" -#include "variable.h" +#include "value.h" -#include -#include -#include -#include -#include -#include -#include - -#include "c-ctype.h" -#include "minmax.h" -#include "size_max.h" -#include "xalloc.h" +#include "gl/c-ctype.h" +#include "gl/c-strtod.h" +#include "gl/minmax.h" +#include "gl/xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) @@ -57,127 +56,229 @@ struct data_in { struct substring input; /* Source. */ enum fmt_type format; /* Input format. */ - int implied_decimals; /* Number of implied decimal places. */ union value *output; /* Destination. */ int width; /* Output width. */ - - int first_column; /* First column of field; 0 if inapplicable. */ - int last_column; /* Last column. */ }; -/* Integer format used for IB and PIB input. */ -static enum integer_format input_integer_format = INTEGER_NATIVE; - -/* Floating-point format used for RB and RBHEX input. */ -static enum float_format input_float_format = FLOAT_NATIVE_DOUBLE; - -typedef bool data_in_parser_func (struct data_in *); +typedef char *data_in_parser_func (struct data_in *); #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ static data_in_parser_func parse_##METHOD; #include "format.def" -static void vdata_warning (const struct data_in *, const char *, va_list) - PRINTF_FORMAT (2, 0); -static void data_warning (const struct data_in *, const char *, ...) - PRINTF_FORMAT (2, 3); - -static void apply_implied_decimals (struct data_in *); static void default_result (struct data_in *); static bool trim_spaces_and_check_missing (struct data_in *); static int hexit_value (int c); -/* Parses the characters in INPUT according to FORMAT. Stores - the parsed representation in OUTPUT, which has the given WIDTH - (0 for a numeric field, otherwise the string width). - - If no decimal point is included in a numeric format, then - IMPLIED_DECIMALS decimal places are implied. Specify 0 if no - decimal places should be implied. - - If FIRST_COLUMN is nonzero, then it should be the 1-based - column number of the first character in INPUT, used in error - messages. */ -bool -data_in (struct substring input, - enum fmt_type format, int implied_decimals, - int first_column, union value *output, int width) +/* Parses the characters in INPUT, which are encoded in the given + INPUT_ENCODING, according to FORMAT. + + Stores the parsed representation in OUTPUT, which the caller must have + initialized with the given WIDTH (0 for a numeric field, otherwise the + string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the + correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */ +char * +data_in (struct substring input, const char *input_encoding, + enum fmt_type format, + union value *output, int width, const char *output_encoding) { - static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = + static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = { #define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) parse_##METHOD, #include "format.def" }; struct data_in i; - bool ok; + + enum fmt_category cat; + const char *dest_encoding; + char *s; + char *error; assert ((width != 0) == fmt_is_string (format)); - i.input = input; i.format = format; - i.implied_decimals = implied_decimals; i.output = output; i.width = width; - i.first_column = first_column; - i.last_column = first_column + ss_length (input) - 1; + if (ss_is_empty (input)) + { + default_result (&i); + return NULL; + } + + cat = fmt_get_category (format); + if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL + | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT)) + { + /* We're going to parse these into numbers. For this purpose we want to + deal with them in the local "C" encoding. Any character not in that + encoding wouldn't be valid anyhow. */ + dest_encoding = LEGACY_NATIVE; + } + else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY)) + { + /* Don't recode these binary formats at all, since they are not text. */ + dest_encoding = NULL; + } + else + { + assert (cat == FMT_CAT_STRING); + if (format == FMT_AHEX) + { + /* We want the hex digits in the local "C" encoding, even though the + result may not be in that encoding. */ + dest_encoding = LEGACY_NATIVE; + } + else + { + /* Use the final output encoding. */ + dest_encoding = output_encoding; + } + } - if (!ss_is_empty (i.input)) + if (dest_encoding != NULL) { - ok = handlers[i.format] (&i); - if (!ok) - default_result (&i); + i.input = recode_substring_pool (dest_encoding, input_encoding, input, + NULL); + s = i.input.string; } else { - default_result (&i); - ok = true; + i.input = input; + s = NULL; } - return ok; + error = handlers[i.format] (&i); + if (error != NULL) + default_result (&i); + + free (s); + + return error; } -/* Returns the integer format used for IB and PIB input. */ -enum integer_format -data_in_get_integer_format (void) +bool +data_in_msg (struct substring input, const char *input_encoding, + enum fmt_type format, + union value *output, int width, const char *output_encoding) { - return input_integer_format; + char *error = data_in (input, input_encoding, format, + output, width, output_encoding); + if (error != NULL) + { + msg (SW,_("Data is not valid as format %s: %s"), + fmt_name (format), error); + free (error); + return false; + } + else + return true; } -/* Sets the integer format used for IB and PIB input to - FORMAT. */ -void -data_in_set_integer_format (enum integer_format format) +static bool +number_has_implied_decimals (const char *s, enum fmt_type type) { - input_integer_format = format; + int decimal = settings_get_style (type)->decimal; + bool got_digit = false; + for (;;) + { + switch (*s) + { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + got_digit = true; + break; + + case '+': case '-': + if (got_digit) + return false; + break; + + case 'e': case 'E': case 'd': case 'D': + return false; + + case '.': case ',': + if (*s == decimal) + return false; + break; + + case '\0': + return true; + + default: + break; + } + + s++; + } } -/* Returns the floating-point format used for RB and RBHEX - input. */ -enum float_format -data_in_get_float_format (void) +static bool +has_implied_decimals (struct substring input, const char *input_encoding, + enum fmt_type format) { - return input_float_format; + bool retval; + char *s; + + switch (format) + { + case FMT_F: + case FMT_COMMA: + case FMT_DOT: + case FMT_DOLLAR: + case FMT_PCT: + case FMT_E: + case FMT_Z: + break; + + case FMT_N: + case FMT_IB: + case FMT_PIB: + case FMT_P: + case FMT_PK: + return true; + + default: + return false; + } + + s = recode_string (LEGACY_NATIVE, input_encoding, + ss_data (input), ss_length (input)); + retval = (format == FMT_Z + ? strchr (s, '.') == NULL + : number_has_implied_decimals (s, format)); + free (s); + + return retval; } -/* Sets the floating-point format used for RB and RBHEX input to - FORMAT. */ +/* In some cases, when no decimal point is explicitly included in numeric + input, its position is implied by the number of decimal places in the input + format. In such a case, this function may be called just after data_in(). + Its arguments are a subset of that function's arguments plus D, the number + of decimal places associated with FORMAT. + + If it is appropriate, this function modifies the numeric value in OUTPUT. */ void -data_in_set_float_format (enum float_format format) +data_in_imply_decimals (struct substring input, const char *input_encoding, + enum fmt_type format, int d, union value *output) { - input_float_format = format; + if (d > 0 && output->f != SYSMIS + && has_implied_decimals (input, input_encoding, format)) + output->f /= pow (10., d); } -/* Format parsers. */ +/* Format parsers. */ /* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */ -static bool +static char * parse_number (struct data_in *i) { - const struct fmt_number_style *style = fmt_get_style (i->format); + const struct fmt_number_style *style = + settings_get_style (i->format); struct string tmp; @@ -185,36 +286,39 @@ parse_number (struct data_in *i) int save_errno; char *tail; - assert (fmt_get_category (i->format) != FMT_CAT_CUSTOM); + if (fmt_get_category (i->format) == FMT_CAT_CUSTOM) + { + style = settings_get_style (FMT_F); + } /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) - return true; + return NULL; ds_init_empty (&tmp); ds_extend (&tmp, 64); - + /* Prefix character may precede sign. */ - if (!ss_is_empty (style->prefix)) + if (!ss_is_empty (style->prefix)) { ss_match_char (&i->input, ss_first (style->prefix)); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Sign. */ - if (ss_match_char (&i->input, '-')) + if (ss_match_char (&i->input, '-')) { ds_put_char (&tmp, '-'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } - else + else { ss_match_char (&i->input, '+'); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } /* Prefix character may follow sign. */ - if (!ss_is_empty (style->prefix)) + if (!ss_is_empty (style->prefix)) { ss_match_char (&i->input, ss_first (style->prefix)); ss_ltrim (&i->input, ss_cstr (CC_SPACES)); @@ -229,7 +333,7 @@ parse_number (struct data_in *i) } /* Decimal point and following digits. */ - if (ss_match_char (&i->input, style->decimal)) + if (ss_match_char (&i->input, style->decimal)) { explicit_decimals = true; ds_put_char (&tmp, '.'); @@ -245,7 +349,7 @@ parse_number (struct data_in *i) explicit_decimals = true; ds_put_char (&tmp, 'e'); - if (strchr ("eEdD", ss_first (i->input))) + if (strchr ("eEdD", ss_first (i->input))) { ss_advance (&i->input, 1); ss_match_char (&i->input, ' '); @@ -268,72 +372,66 @@ parse_number (struct data_in *i) if (!ss_is_empty (i->input)) { + char *error; if (ds_is_empty (&tmp)) - data_warning (i, _("Field contents are not numeric.")); + error = xstrdup (_("Field contents are not numeric.")); else - data_warning (i, _("Number followed by garbage.")); + error = xstrdup (_("Number followed by garbage.")); ds_destroy (&tmp); - return false; + return error; } - /* Let strtod() do the conversion. */ + /* Let c_strtod() do the conversion. */ save_errno = errno; errno = 0; - i->output->f = strtod (ds_cstr (&tmp), &tail); + i->output->f = c_strtod (ds_cstr (&tmp), &tail); if (*tail != '\0') { - data_warning (i, _("Invalid numeric syntax.")); errno = save_errno; ds_destroy (&tmp); - return false; + return xstrdup (_("Invalid numeric syntax.")); } - else if (errno == ERANGE) + else if (errno == ERANGE) { if (fabs (i->output->f) > 1) { - data_warning (i, _("Too-large number set to system-missing.")); i->output->f = SYSMIS; + ds_destroy (&tmp); + return xstrdup (_("Too-large number set to system-missing.")); } - else + else { - data_warning (i, _("Too-small number set to zero.")); - i->output->f = 0.0; + i->output->f = 0.0; + ds_destroy (&tmp); + return xstrdup (_("Too-small number set to zero.")); } } - else - { - errno = save_errno; - if (!explicit_decimals) - apply_implied_decimals (i); - } + else + errno = save_errno; ds_destroy (&tmp); - return true; + return NULL; } /* Parses N format. */ -static bool +static char * parse_N (struct data_in *i) { int c; i->output->f = 0; - while ((c = ss_get_char (&i->input)) != EOF) + while ((c = ss_get_char (&i->input)) != EOF) { if (!c_isdigit (c)) - { - data_warning (i, _("All characters in field must be digits.")); - return false; - } - i->output->f = i->output->f * 10.0 + (c - '0'); + return xstrdup (_("All characters in field must be digits.")); + i->output->f = i->output->f * 10.0 + (c - '0'); } - apply_implied_decimals (i); - return true; + return NULL; } /* Parses PIBHEX format. */ -static bool +static char * parse_PIBHEX (struct data_in *i) { double n; @@ -344,45 +442,36 @@ parse_PIBHEX (struct data_in *i) while ((c = ss_get_char (&i->input)) != EOF) { if (!c_isxdigit (c)) - { - data_warning (i, _("Unrecognized character in field.")); - return false; - } + return xstrdup (_("Unrecognized character in field.")); n = n * 16.0 + hexit_value (c); } - + i->output->f = n; - return true; + return NULL; } /* Parses RBHEX format. */ -static bool +static char * parse_RBHEX (struct data_in *i) { double d; size_t j; memset (&d, 0, sizeof d); - for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) + for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) { int hi = ss_get_char (&i->input); int lo = ss_get_char (&i->input); if (lo == EOF) - { - data_warning (i, _("Field must have even length.")); - return false; - } + return xstrdup (_("Field must have even length.")); else if (!c_isxdigit (hi) || !c_isxdigit (lo)) - { - data_warning (i, _("Field must contain only hex digits.")); - return false; - } - ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo); + return xstrdup (_("Field must contain only hex digits.")); + ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo); } - + i->output->f = d; - - return true; + + return NULL; } /* Digits for Z format. */ @@ -390,7 +479,7 @@ static const char z_digits[] = "0123456789{ABCDEFGHI}JKLMNOPQR"; /* Returns true if C is a Z format digit, false otherwise. */ static bool -is_z_digit (int c) +is_z_digit (int c) { return c > 0 && strchr (z_digits, c) != NULL; } @@ -407,14 +496,14 @@ z_digit_value (int c) /* Returns true if Z format digit C represents a negative value, false otherwise. */ static bool -is_negative_z_digit (int c) +is_negative_z_digit (int c) { assert (is_z_digit (c)); return (strchr (z_digits, c) - z_digits) >= 20; } /* Parses Z format. */ -static bool +static char * parse_Z (struct data_in *i) { struct string tmp; @@ -423,10 +512,10 @@ parse_Z (struct data_in *i) bool got_dot = false; bool got_final_digit = false; - + /* Trim spaces and check for missing value representation. */ if (trim_spaces_and_check_missing (i)) - return true; + return NULL; ds_init_empty (&tmp); ds_extend (&tmp, 64); @@ -437,65 +526,66 @@ parse_Z (struct data_in *i) int c = ss_get_char (&i->input); if (c_isdigit (c) && !got_final_digit) ds_put_char (&tmp, c); - else if (is_z_digit (c) && !got_final_digit) + else if (is_z_digit (c) && !got_final_digit) { ds_put_char (&tmp, z_digit_value (c) + '0'); if (is_negative_z_digit (c)) ds_data (&tmp)[0] = '-'; got_final_digit = true; } - else if (c == '.' && !got_dot) + else if (c == '.' && !got_dot) { ds_put_char (&tmp, '.'); - got_dot = true; + got_dot = true; } - else + else { ds_destroy (&tmp); - return false; + return xstrdup (_("Invalid zoned decimal syntax.")); } } if (!ss_is_empty (i->input)) { + char *error; + if (ds_length (&tmp) == 1) - data_warning (i, _("Field contents are not numeric.")); + error = xstrdup (_("Field contents are not numeric.")); else - data_warning (i, _("Number followed by garbage.")); + error = xstrdup (_("Number followed by garbage.")); + ds_destroy (&tmp); - return false; + return error; } - /* Let strtod() do the conversion. */ + /* Let c_strtod() do the conversion. */ save_errno = errno; errno = 0; - i->output->f = strtod (ds_cstr (&tmp), NULL); - if (errno == ERANGE) + i->output->f = c_strtod (ds_cstr (&tmp), NULL); + if (errno == ERANGE) { if (fabs (i->output->f) > 1) { - data_warning (i, _("Too-large number set to system-missing.")); i->output->f = SYSMIS; + ds_destroy (&tmp); + return xstrdup (_("Too-large number set to system-missing.")); } - else + else { - data_warning (i, _("Too-small number set to zero.")); - i->output->f = 0.0; + i->output->f = 0.0; + ds_destroy (&tmp); + return xstrdup (_("Too-small number set to zero.")); } } - else - { - errno = save_errno; - if (!got_dot) - apply_implied_decimals (i); - } + else + errno = save_errno; ds_destroy (&tmp); - return true; + return NULL; } /* Parses IB format. */ -static bool +static char * parse_IB (struct data_in *i) { size_t bytes; @@ -503,39 +593,35 @@ parse_IB (struct data_in *i) uint64_t sign_bit; bytes = MIN (8, ss_length (i->input)); - value = integer_get (input_integer_format, ss_data (i->input), bytes); + value = integer_get (settings_get_input_integer_format (), ss_data (i->input), bytes); sign_bit = UINT64_C(1) << (8 * bytes - 1); - if (!(value & sign_bit)) + if (!(value & sign_bit)) i->output->f = value; - else + else { /* Sign-extend to full 64 bits. */ - value -= sign_bit << 1; - i->output->f = -(double) -value; + value -= sign_bit << 1; + i->output->f = -(double) -value; } - apply_implied_decimals (i); - - return true; + return NULL; } /* Parses PIB format. */ -static bool +static char * parse_PIB (struct data_in *i) { - i->output->f = integer_get (input_integer_format, ss_data (i->input), + i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input), MIN (8, ss_length (i->input))); - - apply_implied_decimals (i); - return true; + return NULL; } /* Consumes the first character of S. Stores its high 4 bits in HIGH_NIBBLE and its low 4 bits in LOW_NIBBLE. */ static void -get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) +get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) { int c = ss_get_char (s); assert (c != EOF); @@ -544,37 +630,35 @@ get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) } /* Parses P format. */ -static bool +static char * parse_P (struct data_in *i) { int high_nibble, low_nibble; - + i->output->f = 0.0; while (ss_length (i->input) > 1) { get_nibbles (&i->input, &high_nibble, &low_nibble); if (high_nibble > 9 || low_nibble > 9) - return false; + return xstrdup (_("Invalid syntax for P field.")); i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } get_nibbles (&i->input, &high_nibble, &low_nibble); if (high_nibble > 9) - return false; + return xstrdup (_("Invalid syntax for P field.")); i->output->f = (10 * i->output->f) + high_nibble; - if (low_nibble < 10) + if (low_nibble < 10) i->output->f = (10 * i->output->f) + low_nibble; else if (low_nibble == 0xb || low_nibble == 0xd) i->output->f = -i->output->f; - apply_implied_decimals (i); - - return true; + return NULL; } /* Parses PK format. */ -static bool +static char * parse_PK (struct data_in *i) { i->output->f = 0.0; @@ -586,76 +670,80 @@ parse_PK (struct data_in *i) if (high_nibble > 9 || low_nibble > 9) { i->output->f = SYSMIS; - return true; + return NULL; } i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } - apply_implied_decimals (i); - - return true; + return NULL; } /* Parses RB format. */ -static bool +static char * parse_RB (struct data_in *i) { - size_t size = float_get_size (input_float_format); + enum float_format ff = settings_get_input_float_format (); + size_t size = float_get_size (ff); if (ss_length (i->input) >= size) - float_convert (input_float_format, ss_data (i->input), + float_convert (ff, ss_data (i->input), FLOAT_NATIVE_DOUBLE, &i->output->f); else i->output->f = SYSMIS; - return true; + return NULL; } /* Parses A format. */ -static bool +static char * parse_A (struct data_in *i) { - buf_copy_rpad (i->output->s, i->width, - ss_data (i->input), ss_length (i->input)); - return true; + /* This is equivalent to buf_copy_rpad, except that we posibly + do a character set recoding in the middle. */ + uint8_t *dst = value_str_rw (i->output, i->width); + size_t dst_size = i->width; + const char *src = ss_data (i->input); + size_t src_size = ss_length (i->input); + + memcpy (dst, src, MIN (src_size, dst_size)); + + if (dst_size > src_size) + memset (&dst[src_size], ' ', dst_size - src_size); + + return NULL; } /* Parses AHEX format. */ -static bool +static char * parse_AHEX (struct data_in *i) { + uint8_t *s = value_str_rw (i->output, i->width); size_t j; - - for (j = 0; ; j++) + + for (j = 0; ; j++) { int hi = ss_get_char (&i->input); int lo = ss_get_char (&i->input); if (hi == EOF) break; else if (lo == EOF) - { - data_warning (i, _("Field must have even length.")); - return false; - } + return xstrdup (_("Field must have even length.")); if (!c_isxdigit (hi) || !c_isxdigit (lo)) - { - data_warning (i, _("Field must contain only hex digits.")); - return false; - } - + return xstrdup (_("Field must contain only hex digits.")); + if (j < i->width) - i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + s[j] = hexit_value (hi) * 16 + hexit_value (lo); } - memset (i->output->s + j, ' ', i->width - j); - - return true; + memset (&s[j], ' ', i->width - j); + + return NULL; } /* Date & time format components. */ /* Sign of a time value. */ -enum time_sign +enum time_sign { SIGN_NO_TIME, /* No time yet encountered. */ SIGN_POSITIVE, /* Positive time. */ @@ -666,36 +754,33 @@ enum time_sign MAX_DIGITS characters in I, storing the result into *RESULT. Returns true if successful, false if no integer was present. */ -static bool +static char * WARN_UNUSED_RESULT parse_int (struct data_in *i, long *result, size_t max_digits) { struct substring head = ss_head (i->input, max_digits); size_t n = ss_get_long (&head, result); - if (n) + if (n) { ss_advance (&i->input, n); - return true; - } - else - { - data_warning (i, _("Syntax error in date field.")); - return false; + return NULL; } + else + return xstrdup (_("Syntax error in date field.")); } /* Parses a date integer between 1 and 31 from I, storing it into *DAY. Returns true if successful, false if no date was present. */ -static bool +static char * parse_day (struct data_in *i, long *day) { - if (!parse_int (i, day, SIZE_MAX)) - return false; + char *error = parse_int (i, day, SIZE_MAX); + if (error != NULL) + return error; if (*day >= 1 && *day <= 31) - return true; + return NULL; - data_warning (i, _("Day (%ld) must be between 1 and 31."), *day); - return false; + return xasprintf (_("Day (%ld) must be between 1 and 31."), *day); } /* Parses an integer from the beginning of I. @@ -704,45 +789,43 @@ parse_day (struct data_in *i, long *day) If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the time and sets *TIME_SIGN. Otherwise, does not allow a sign. Returns true if successful, false if no integer was present. */ -static bool +static char * parse_time_units (struct data_in *i, double seconds_per_unit, enum time_sign *time_sign, double *time) { + char *error; long units; - if (*time_sign == SIGN_NO_TIME) + if (*time_sign == SIGN_NO_TIME) { if (ss_match_char (&i->input, '-')) *time_sign = SIGN_NEGATIVE; else { ss_match_char (&i->input, '+'); - *time_sign = SIGN_POSITIVE; + *time_sign = SIGN_POSITIVE; } } - if (!parse_int (i, &units, SIZE_MAX)) - return false; - if (units < 0) - { - data_warning (i, _("Syntax error in date field.")); - return false; - } + error = parse_int (i, &units, SIZE_MAX); + if (error != NULL) + return error; + if (units < 0) + return xstrdup (_("Syntax error in date field.")); *time += units * seconds_per_unit; - return true; + return NULL; } /* Parses a data delimiter from the beginning of I. Returns true if successful, false if no delimiter was present. */ -static bool +static char * parse_date_delimiter (struct data_in *i) { if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES))) - return true; + return NULL; - data_warning (i, _("Delimiter expected between fields in date.")); - return false; + return xstrdup (_("Delimiter expected between fields in date.")); } /* Parses spaces at the beginning of I. */ @@ -753,7 +836,7 @@ parse_spaces (struct data_in *i) } static struct substring -parse_name_token (struct data_in *i) +parse_name_token (struct data_in *i) { struct substring token; ss_get_chars (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); @@ -766,43 +849,44 @@ parse_name_token (struct data_in *i) exact matches (except for case) are allowed. Returns true if successful, false otherwise. */ static bool -match_name (struct substring token, const char **names, long *output) +match_name (struct substring token, const char *const *names, long *output) { int i; - for (i = 1; *names != NULL; i++) + for (i = 1; *names != NULL; i++) if (ss_equals_case (ss_cstr (*names++), token)) { *output = i; return true; } - + return false; } /* Parses a month name or number from the beginning of I, storing the month (in range 1...12) into *MONTH. Returns true if successful, false if no month was present. */ -static bool +static char * parse_month (struct data_in *i, long *month) { if (c_isdigit (ss_first (i->input))) { - if (!parse_int (i, month, SIZE_MAX)) - return false; + char *error = parse_int (i, month, SIZE_MAX); + if (error != NULL) + return error; if (*month >= 1 && *month <= 12) - return true; + return NULL; } - else + else { - static const char *english_names[] = + static const char *const english_names[] = { "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec", NULL, }; - - static const char *roman_names[] = + + static const char *const roman_names[] = { "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x", "xi", "xii", @@ -812,56 +896,54 @@ parse_month (struct data_in *i, long *month) struct substring token = parse_name_token (i); if (match_name (ss_head (token, 3), english_names, month) || match_name (ss_head (token, 4), roman_names, month)) - return true; + return NULL; } - data_warning (i, _("Unrecognized month format. Months may be specified " - "as Arabic or Roman numerals or as at least 3 letters " - "of their English names.")); - return false; + return xstrdup (_("Unrecognized month format. Months may be specified " + "as Arabic or Roman numerals or as at least 3 letters " + "of their English names.")); } /* Parses a year of at most MAX_DIGITS from the beginning of I, storing a "4-digit" year into *YEAR. */ -static bool +static char * parse_year (struct data_in *i, long *year, size_t max_digits) { - if (!parse_int (i, year, max_digits)) - return false; - - if (*year >= 0 && *year <= 99) + char *error = parse_int (i, year, max_digits); + if (error != NULL) + return error; + + if (*year >= 0 && *year <= 99) { - int epoch = get_epoch (); + int epoch = settings_get_epoch (); int epoch_century = ROUND_DOWN (epoch, 100); int epoch_offset = epoch - epoch_century; if (*year >= epoch_offset) *year += epoch_century; else - *year += epoch_century + 100; + *year += epoch_century + 100; } if (*year >= 1582 || *year <= 19999) - return true; + return NULL; - data_warning (i, _("Year (%ld) must be between 1582 and 19999."), *year); - return false; + return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year); } /* Returns true if input in I has been exhausted, false otherwise. */ -static bool +static char * parse_trailer (struct data_in *i) { if (ss_is_empty (i->input)) - return true; - - data_warning (i, _("Trailing garbage \"%.*s\" following date."), - (int) ss_length (i->input), ss_data (i->input)); - return false; + return NULL; + + return xasprintf (_("Trailing garbage `%.*s' following date."), + (int) ss_length (i->input), ss_data (i->input)); } /* Parses a 3-digit Julian day-of-year value from I into *YDAY. Returns true if successful, false on failure. */ -static bool +static char * parse_yday (struct data_in *i, long *yday) { struct substring num_s; @@ -869,121 +951,115 @@ parse_yday (struct data_in *i, long *yday) ss_get_chars (&i->input, 3, &num_s); if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3) - { - data_warning (i, _("Julian day must have exactly three digits.")); - return false; - } + return xstrdup (_("Julian day must have exactly three digits.")); else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366) - { - data_warning (i, _("Julian day (%ld) must be between 1 and 366."), num); - return false; - } + return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num); *yday = num; - return true; + return NULL; } /* Parses a quarter-of-year integer between 1 and 4 from I. Stores the corresponding month into *MONTH. Returns true if successful, false if no quarter was present. */ -static bool +static char * parse_quarter (struct data_in *i, long int *month) { long quarter; - - if (!parse_int (i, &quarter, SIZE_MAX)) - return false; - if (quarter >= 1 && quarter <= 4) + char *error; + + error = parse_int (i, &quarter, SIZE_MAX); + if (error != NULL) + return error; + if (quarter >= 1 && quarter <= 4) { *month = (quarter - 1) * 3 + 1; - return true; + return NULL; } - data_warning (i, _("Quarter (%ld) must be between 1 and 4."), quarter); - return false; + return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter); } /* Parses a week-of-year integer between 1 and 53 from I, Stores the corresponding year-of-day into *YDAY. Returns true if successful, false if no week was present. */ -static bool +static char * parse_week (struct data_in *i, long int *yday) { + char *error; long week; - - if (!parse_int (i, &week, SIZE_MAX)) - return false; - if (week >= 1 && week <= 53) + + error = parse_int (i, &week, SIZE_MAX); + if (error != NULL) + return error; + if (week >= 1 && week <= 53) { *yday = (week - 1) * 7 + 1; - return true; + return NULL; } - data_warning (i, _("Week (%ld) must be between 1 and 53."), week); - return false; + return xasprintf (_("Week (%ld) must be between 1 and 53."), week); } /* Parses a time delimiter from the beginning of I. Returns true if successful, false if no delimiter was present. */ -static bool +static char * parse_time_delimiter (struct data_in *i) { if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0) - return true; - - data_warning (i, _("Delimiter expected between fields in time.")); - return false; + return NULL; + + return xstrdup (_("Delimiter expected between fields in time.")); } /* Parses minutes and optional seconds from the beginning of I. The time is converted into seconds, which are added to - *TIME. + *TIME. Returns true if successful, false if an error was found. */ -static bool +static char * parse_minute_second (struct data_in *i, double *time) { long minute; char buf[64]; + char *error; char *cp; /* Parse minutes. */ - if (!parse_int (i, &minute, SIZE_MAX)) - return false; - if (minute < 0 || minute > 59) - { - data_warning (i, _("Minute (%ld) must be between 0 and 59."), minute); - return false; - } + error = parse_int (i, &minute, SIZE_MAX); + if (error != NULL) + return error; + if (minute < 0 || minute > 59) + return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute); *time += 60. * minute; /* Check for seconds. */ if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0 || !c_isdigit (ss_first (i->input))) - return true; + return NULL; /* Parse seconds. */ cp = buf; while (c_isdigit (ss_first (i->input))) *cp++ = ss_get_char (&i->input); - if (ss_match_char (&i->input, fmt_decimal_char (FMT_F))) + if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F))) *cp++ = '.'; while (c_isdigit (ss_first (i->input))) *cp++ = ss_get_char (&i->input); *cp = '\0'; - + *time += strtod (buf, NULL); - return true; + return NULL; } /* Parses a weekday name from the beginning of I, storing a value of 1=Sunday...7=Saturday into *WEEKDAY. Returns true if successful, false if an error was found. */ -static bool +static char * parse_weekday (struct data_in *i, long *weekday) { - static const char *weekday_names[] = + static const char *const weekday_names[] = { "su", "mo", "tu", "we", "th", "fr", "sa", NULL, @@ -992,64 +1068,53 @@ parse_weekday (struct data_in *i, long *weekday) struct substring token = parse_name_token (i); bool ok = match_name (ss_head (token, 2), weekday_names, weekday); if (!ok) - data_warning (i, _("Unrecognized weekday name. At least the first two " - "letters of an English weekday name must be " - "specified.")); - return ok; + return xstrdup (_("Unrecognized weekday name. At least the first two " + "letters of an English weekday name must be " + "specified.")); + return NULL; } /* Date & time formats. */ -/* Helper function for passing to - calendar_gregorian_to_offset. */ -static void -calendar_error (void *i_, const char *format, ...) -{ - struct data_in *i = i_; - va_list args; - - va_start (args, format); - vdata_warning (i, format, args); - va_end (args); -} - /* Parses WKDAY format. */ -static bool +static char * parse_WKDAY (struct data_in *i) { long weekday; + char *error; if (trim_spaces_and_check_missing (i)) - return true; + return NULL; - if (!parse_weekday (i, &weekday) - || !parse_trailer (i)) - return false; + error = parse_weekday (i, &weekday); + if (error == NULL) + error = parse_trailer (i); i->output->f = weekday; - return true; + return error; } /* Parses MONTH format. */ -static bool +static char * parse_MONTH (struct data_in *i) { long month; + char *error; if (trim_spaces_and_check_missing (i)) - return true; + return NULL; - if (!parse_month (i, &month) - || !parse_trailer (i)) - return false; + error = parse_month (i, &month); + if (error == NULL) + error = parse_trailer (i); i->output->f = month; - return true; + return error; } /* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR, DATETIME, TIME and DTIME formats. */ -static bool +static char * parse_date (struct data_in *i) { long int year = INT_MIN; @@ -1061,152 +1126,105 @@ parse_date (struct data_in *i) const char *template = fmt_date_template (i->format); size_t template_width = strlen (template); + char *error; if (trim_spaces_and_check_missing (i)) - return true; + return NULL; while (*template != '\0') { unsigned char ch = *template; int count = 1; - bool ok; - - while (template[count] == ch) + + while (template[count] == ch) count++; template += count; - - ok = true; + switch (ch) { case 'd': - ok = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday); + error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday); break; case 'm': - ok = parse_month (i, &month); + error = parse_month (i, &month); break; - case 'y': + case 'y': { size_t max_digits; if (!c_isalpha (*template)) max_digits = SIZE_MAX; - else + else { if (ss_length (i->input) >= template_width + 2) max_digits = 4; else - max_digits = 2; + max_digits = 2; } - ok = parse_year (i, &year, max_digits); + error = parse_year (i, &year, max_digits); } break; case 'q': - ok = parse_quarter (i, &month); + error = parse_quarter (i, &month); break; case 'w': - ok = parse_week (i, &yday); + error = parse_week (i, &yday); break; case 'D': - ok = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); + error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); break; case 'H': - ok = parse_time_units (i, 60. * 60., &time_sign, &time); + error = parse_time_units (i, 60. * 60., &time_sign, &time); break; case 'M': - ok = parse_minute_second (i, &time); + error = parse_minute_second (i, &time); break; case '-': case '/': case '.': case 'X': - ok = parse_date_delimiter (i); + error = parse_date_delimiter (i); break; case ':': - ok = parse_time_delimiter (i); + error = parse_time_delimiter (i); case ' ': parse_spaces (i); + error = NULL; break; default: assert (count == 1); if (!ss_match_char (&i->input, c_toupper (ch)) && !ss_match_char (&i->input, c_tolower (ch))) - { - data_warning (i, _("`%c' expected in date field."), ch); - return false; - } + error = xasprintf (_("`%c' expected in date field."), ch); + else + error = NULL; break; } - if (!ok) - return false; + if (error != NULL) + return error; } - if (!parse_trailer (i)) - return false; + error = parse_trailer (i); + if (error != NULL) + return error; - if (year != INT_MIN) + if (year != INT_MIN) { - double ofs = calendar_gregorian_to_offset (year, month, day, - calendar_error, i); + char *error; + double ofs; + + ofs = calendar_gregorian_to_offset (year, month, day, &error); if (ofs == SYSMIS) - return false; + return error; date = (yday - 1 + ofs) * 60. * 60. * 24.; } else date = 0.; i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time); - return true; + return NULL; } /* Utility functions. */ -/* Outputs FORMAT with the given ARGS as a warning for input - I. */ -static void -vdata_warning (const struct data_in *i, const char *format, va_list args) -{ - struct msg m; - struct string text; - - ds_init_empty (&text); - ds_put_char (&text, '('); - if (i->first_column != 0) - { - if (i->first_column == i->last_column) - ds_put_format (&text, _("column %d"), i->first_column); - else - ds_put_format (&text, _("columns %d-%d"), - i->first_column, i->last_column); - ds_put_cstr (&text, ", "); - } - ds_put_format (&text, _("%s field) "), fmt_name (i->format)); - ds_put_vformat (&text, format, args); - - m.category = MSG_DATA; - m.severity = MSG_WARNING; - m.text = ds_cstr (&text); - - msg_emit (&m); -} - -/* Outputs FORMAT with the given ARGS as a warning for input - I. */ -static void -data_warning (const struct data_in *i, const char *format, ...) -{ - va_list args; - - va_start (args, format); - vdata_warning (i, format, args); - va_end (args); -} - -/* Apply implied decimal places to output. */ -static void -apply_implied_decimals (struct data_in *i) -{ - if (i->implied_decimals > 0) - i->output->f /= pow (10., i->implied_decimals); -} - /* Sets the default result for I. For a numeric format, this is the value set on SET BLANKS (typically system-missing); for a string format, it is all @@ -1215,9 +1233,9 @@ static void default_result (struct data_in *i) { if (fmt_is_string (i->format)) - memset (i->output->s, ' ', i->width); + memset (value_str_rw (i->output, i->width), ' ', i->width); else - i->output->f = get_blanks (); + i->output->f = settings_get_blanks (); } /* Trims leading and trailing spaces from I. @@ -1225,7 +1243,7 @@ default_result (struct data_in *i) sets the default result and returns true; otherwise, returns false. */ static bool -trim_spaces_and_check_missing (struct data_in *i) +trim_spaces_and_check_missing (struct data_in *i) { ss_trim (&i->input, ss_cstr (" ")); if (ss_is_empty (i->input) || ss_equals (i->input, ss_cstr (".")))