/* PSPP - a program for statistical analysis.
- Copyright (C) 1997-9, 2000, 2006, 2009 Free Software Foundation, Inc.
+ Copyright (C) 1997-9, 2000, 2006, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <math.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
-#include <stdbool.h>
-#include <limits.h>
#include "calendar.h"
+#include "dictionary.h"
+#include "format.h"
#include "identifier.h"
+#include "libpspp/assertion.h"
+#include "libpspp/compiler.h"
+#include "libpspp/i18n.h"
+#include "libpspp/integer-format.h"
+#include "libpspp/misc.h"
+#include "libpspp/str.h"
#include "settings.h"
#include "value.h"
-#include "format.h"
-#include "dictionary.h"
-#include <libpspp/assertion.h>
-#include <libpspp/legacy-encoding.h>
-#include <libpspp/i18n.h>
-#include <libpspp/compiler.h>
-#include <libpspp/integer-format.h>
-#include <libpspp/message.h>
-#include <libpspp/misc.h>
-#include <libpspp/str.h>
-#include "c-ctype.h"
-#include "c-strtod.h"
-#include "minmax.h"
-#include "xalloc.h"
+#include "gl/c-ctype.h"
+#include "gl/c-strtod.h"
+#include "gl/minmax.h"
+#include "gl/xalloc.h"
#include "gettext.h"
#define _(msgid) gettext (msgid)
/* Information about parsing one data field. */
struct data_in
{
- const char *src_enc; /* Encoding of source. */
struct substring input; /* Source. */
enum fmt_type format; /* Input format. */
- int implied_decimals; /* Number of implied decimal places. */
union value *output; /* Destination. */
int width; /* Output width. */
-
- int first_column; /* First column of field; 0 if inapplicable. */
- int last_column; /* Last column. */
};
-
-
-typedef bool data_in_parser_func (struct data_in *);
+typedef char *data_in_parser_func (struct data_in *);
#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \
static data_in_parser_func parse_##METHOD;
#include "format.def"
-static void vdata_warning (const struct data_in *, const char *, va_list)
- PRINTF_FORMAT (2, 0);
-static void data_warning (const struct data_in *, const char *, ...)
- PRINTF_FORMAT (2, 3);
-
-static void apply_implied_decimals (struct data_in *);
static void default_result (struct data_in *);
static bool trim_spaces_and_check_missing (struct data_in *);
static int hexit_value (int c);
\f
/* Parses the characters in INPUT, which are encoded in the given
- ENCODING, according to FORMAT. Stores the parsed
- representation in OUTPUT, which the caller must have
- initialized with the given WIDTH (0 for a numeric field,
- otherwise the string width).
- Iff FORMAT is a string format, then DICT must be a pointer
- to the dictionary associated with OUTPUT. Otherwise, DICT
- may be null.
-
- If no decimal point is included in a numeric format, then
- IMPLIED_DECIMALS decimal places are implied. Specify 0 if no
- decimal places should be implied.
-
- If FIRST_COLUMN and LAST_COLUMN are nonzero, then they should
- be the 1-based column number of the first and
- one-past-the-last-character in INPUT, for use in error
- messages. (LAST_COLUMN cannot always be calculated from
- FIRST_COLUMN plus the length of the input because of the
- possibility of escaped quotes in strings, etc.) */
-bool
-data_in (struct substring input, const char *encoding,
- enum fmt_type format, int implied_decimals,
- int first_column, int last_column,
- const struct dictionary *dict,
- union value *output, int width)
+ INPUT_ENCODING, according to FORMAT.
+
+ Stores the parsed representation in OUTPUT, which the caller must have
+ initialized with the given WIDTH (0 for a numeric field, otherwise the
+ string width). If FORMAT is FMT_A, then OUTPUT_ENCODING must specify the
+ correct encoding for OUTPUT (normally obtained via dict_get_encoding()). */
+char *
+data_in (struct substring input, const char *input_encoding,
+ enum fmt_type format,
+ union value *output, int width, const char *output_encoding)
{
static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] =
{
struct data_in i;
- bool ok;
+ enum fmt_category cat;
+ const char *dest_encoding;
+ char *s;
+ char *error;
assert ((width != 0) == fmt_is_string (format));
i.format = format;
- i.implied_decimals = implied_decimals;
i.output = output;
i.width = width;
- i.first_column = first_column;
- i.last_column = last_column;
- i.src_enc = encoding;
-
if (ss_is_empty (input))
{
default_result (&i);
- return true;
+ return NULL;
}
- if (fmt_get_category (format) & ( FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL | FMT_CAT_LEGACY))
+ cat = fmt_get_category (format);
+ if (cat & (FMT_CAT_BASIC | FMT_CAT_HEXADECIMAL
+ | FMT_CAT_DATE | FMT_CAT_TIME | FMT_CAT_DATE_COMPONENT))
{
- i.input = input;
+ /* We're going to parse these into numbers. For this purpose we want to
+ deal with them in the local "C" encoding. Any character not in that
+ encoding wouldn't be valid anyhow. */
+ dest_encoding = C_ENCODING;
+ }
+ else if (cat & (FMT_CAT_BINARY | FMT_CAT_LEGACY))
+ {
+ /* Don't recode these binary formats at all, since they are not text. */
+ dest_encoding = NULL;
}
else
{
- const char *dest_encoding;
- char *s = NULL;
- if ( dict == NULL)
- {
- assert (0 == (fmt_get_category (format) & (FMT_CAT_BINARY | FMT_CAT_STRING)));
- dest_encoding = LEGACY_NATIVE;
- }
+ assert (cat == FMT_CAT_STRING);
+ if (format == FMT_AHEX)
+ {
+ /* We want the hex digits in the local "C" encoding, even though the
+ result may not be in that encoding. */
+ dest_encoding = C_ENCODING;
+ }
else
- dest_encoding = dict_get_encoding (dict);
+ {
+ /* Use the final output encoding. */
+ dest_encoding = output_encoding;
+ }
+ }
- s = recode_string (dest_encoding, i.src_enc, ss_data (input), ss_length (input));
- ss_alloc_uninit (&i.input, strlen (s));
- memcpy (ss_data (i.input), s, ss_length (input));
- free (s);
+ if (dest_encoding != NULL)
+ {
+ i.input = recode_substring_pool (dest_encoding, input_encoding, input,
+ NULL);
+ s = i.input.string;
+ }
+ else
+ {
+ i.input = input;
+ s = NULL;
}
- ok = handlers[i.format] (&i);
- if (!ok)
+ error = handlers[i.format] (&i);
+ if (error != NULL)
default_result (&i);
- return ok;
+ free (s);
+
+ return error;
+}
+
+bool
+data_in_msg (struct substring input, const char *input_encoding,
+ enum fmt_type format,
+ union value *output, int width, const char *output_encoding)
+{
+ char *error = data_in (input, input_encoding, format,
+ output, width, output_encoding);
+ if (error != NULL)
+ {
+ msg (SW,_("Data is not valid as format %s: %s"),
+ fmt_name (format), error);
+ free (error);
+ return false;
+ }
+ else
+ return true;
}
+static bool
+number_has_implied_decimals (const char *s, enum fmt_type type)
+{
+ int decimal = settings_get_style (type)->decimal;
+ bool got_digit = false;
+ for (;;)
+ {
+ switch (*s)
+ {
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ got_digit = true;
+ break;
+
+ case '+': case '-':
+ if (got_digit)
+ return false;
+ break;
+
+ case 'e': case 'E': case 'd': case 'D':
+ return false;
+
+ case '.': case ',':
+ if (*s == decimal)
+ return false;
+ break;
+
+ case '\0':
+ return true;
+
+ default:
+ break;
+ }
+
+ s++;
+ }
+}
+
+static bool
+has_implied_decimals (struct substring input, const char *input_encoding,
+ enum fmt_type format)
+{
+ bool retval;
+ char *s;
+
+ switch (format)
+ {
+ case FMT_F:
+ case FMT_COMMA:
+ case FMT_DOT:
+ case FMT_DOLLAR:
+ case FMT_PCT:
+ case FMT_E:
+ case FMT_Z:
+ break;
+
+ case FMT_N:
+ case FMT_IB:
+ case FMT_PIB:
+ case FMT_P:
+ case FMT_PK:
+ return true;
+
+ default:
+ return false;
+ }
+
+ s = recode_string (C_ENCODING, input_encoding,
+ ss_data (input), ss_length (input));
+ retval = (format == FMT_Z
+ ? strchr (s, '.') == NULL
+ : number_has_implied_decimals (s, format));
+ free (s);
+
+ return retval;
+}
+
+/* In some cases, when no decimal point is explicitly included in numeric
+ input, its position is implied by the number of decimal places in the input
+ format. In such a case, this function may be called just after data_in().
+ Its arguments are a subset of that function's arguments plus D, the number
+ of decimal places associated with FORMAT.
+
+ If it is appropriate, this function modifies the numeric value in OUTPUT. */
+void
+data_in_imply_decimals (struct substring input, const char *input_encoding,
+ enum fmt_type format, int d, union value *output)
+{
+ if (d > 0 && output->f != SYSMIS
+ && has_implied_decimals (input, input_encoding, format))
+ output->f /= pow (10., d);
+}
\f
/* Format parsers. */
/* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */
-static bool
+static char *
parse_number (struct data_in *i)
{
const struct fmt_number_style *style =
struct string tmp;
- bool explicit_decimals = false;
int save_errno;
char *tail;
/* Trim spaces and check for missing value representation. */
if (trim_spaces_and_check_missing (i))
- return true;
+ return NULL;
ds_init_empty (&tmp);
ds_extend (&tmp, 64);
/* Prefix character may precede sign. */
- if (!ss_is_empty (style->prefix))
+ if (style->prefix.s[0] != '\0')
{
- ss_match_char (&i->input, ss_first (style->prefix));
+ ss_match_byte (&i->input, style->prefix.s[0]);
ss_ltrim (&i->input, ss_cstr (CC_SPACES));
}
/* Sign. */
- if (ss_match_char (&i->input, '-'))
+ if (ss_match_byte (&i->input, '-'))
{
- ds_put_char (&tmp, '-');
+ ds_put_byte (&tmp, '-');
ss_ltrim (&i->input, ss_cstr (CC_SPACES));
}
else
{
- ss_match_char (&i->input, '+');
+ ss_match_byte (&i->input, '+');
ss_ltrim (&i->input, ss_cstr (CC_SPACES));
}
/* Prefix character may follow sign. */
- if (!ss_is_empty (style->prefix))
+ if (style->prefix.s[0] != '\0')
{
- ss_match_char (&i->input, ss_first (style->prefix));
+ ss_match_byte (&i->input, style->prefix.s[0]);
ss_ltrim (&i->input, ss_cstr (CC_SPACES));
}
/* Digits before decimal point. */
while (c_isdigit (ss_first (i->input)))
{
- ds_put_char (&tmp, ss_get_char (&i->input));
+ ds_put_byte (&tmp, ss_get_byte (&i->input));
if (style->grouping != 0)
- ss_match_char (&i->input, style->grouping);
+ ss_match_byte (&i->input, style->grouping);
}
/* Decimal point and following digits. */
- if (ss_match_char (&i->input, style->decimal))
+ if (ss_match_byte (&i->input, style->decimal))
{
- explicit_decimals = true;
- ds_put_char (&tmp, '.');
+ ds_put_byte (&tmp, '.');
while (c_isdigit (ss_first (i->input)))
- ds_put_char (&tmp, ss_get_char (&i->input));
+ ds_put_byte (&tmp, ss_get_byte (&i->input));
}
/* Exponent. */
&& !ss_is_empty (i->input)
&& strchr ("eEdD-+", ss_first (i->input)))
{
- explicit_decimals = true;
- ds_put_char (&tmp, 'e');
+ ds_put_byte (&tmp, 'e');
if (strchr ("eEdD", ss_first (i->input)))
{
ss_advance (&i->input, 1);
- ss_match_char (&i->input, ' ');
+ ss_match_byte (&i->input, ' ');
}
if (ss_first (i->input) == '-' || ss_first (i->input) == '+')
{
- if (ss_get_char (&i->input) == '-')
- ds_put_char (&tmp, '-');
- ss_match_char (&i->input, ' ');
+ if (ss_get_byte (&i->input) == '-')
+ ds_put_byte (&tmp, '-');
+ ss_match_byte (&i->input, ' ');
}
while (c_isdigit (ss_first (i->input)))
- ds_put_char (&tmp, ss_get_char (&i->input));
+ ds_put_byte (&tmp, ss_get_byte (&i->input));
}
/* Suffix character. */
- if (!ss_is_empty (style->suffix))
- ss_match_char (&i->input, ss_first (style->suffix));
+ if (style->suffix.s[0] != '\0')
+ ss_match_byte (&i->input, style->suffix.s[0]);
if (!ss_is_empty (i->input))
{
+ char *error;
if (ds_is_empty (&tmp))
- data_warning (i, _("Field contents are not numeric."));
+ error = xstrdup (_("Field contents are not numeric."));
else
- data_warning (i, _("Number followed by garbage."));
+ error = xstrdup (_("Number followed by garbage."));
ds_destroy (&tmp);
- return false;
+ return error;
}
/* Let c_strtod() do the conversion. */
i->output->f = c_strtod (ds_cstr (&tmp), &tail);
if (*tail != '\0')
{
- data_warning (i, _("Invalid numeric syntax."));
errno = save_errno;
ds_destroy (&tmp);
- return false;
+ return xstrdup (_("Invalid numeric syntax."));
}
else if (errno == ERANGE)
{
if (fabs (i->output->f) > 1)
{
- data_warning (i, _("Too-large number set to system-missing."));
i->output->f = SYSMIS;
+ ds_destroy (&tmp);
+ return xstrdup (_("Too-large number set to system-missing."));
}
else
{
- data_warning (i, _("Too-small number set to zero."));
i->output->f = 0.0;
+ ds_destroy (&tmp);
+ return xstrdup (_("Too-small number set to zero."));
}
}
else
- {
- errno = save_errno;
- if (!explicit_decimals)
- apply_implied_decimals (i);
- }
+ errno = save_errno;
ds_destroy (&tmp);
- return true;
+ return NULL;
}
/* Parses N format. */
-static bool
+static char *
parse_N (struct data_in *i)
{
int c;
i->output->f = 0;
- while ((c = ss_get_char (&i->input)) != EOF)
+ while ((c = ss_get_byte (&i->input)) != EOF)
{
if (!c_isdigit (c))
- {
- data_warning (i, _("All characters in field must be digits."));
- return false;
- }
+ return xstrdup (_("All characters in field must be digits."));
i->output->f = i->output->f * 10.0 + (c - '0');
}
- apply_implied_decimals (i);
- return true;
+ return NULL;
}
/* Parses PIBHEX format. */
-static bool
+static char *
parse_PIBHEX (struct data_in *i)
{
double n;
n = 0.0;
- while ((c = ss_get_char (&i->input)) != EOF)
+ while ((c = ss_get_byte (&i->input)) != EOF)
{
if (!c_isxdigit (c))
- {
- data_warning (i, _("Unrecognized character in field."));
- return false;
- }
+ return xstrdup (_("Unrecognized character in field."));
n = n * 16.0 + hexit_value (c);
}
i->output->f = n;
- return true;
+ return NULL;
}
/* Parses RBHEX format. */
-static bool
+static char *
parse_RBHEX (struct data_in *i)
{
double d;
memset (&d, 0, sizeof d);
for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++)
{
- int hi = ss_get_char (&i->input);
- int lo = ss_get_char (&i->input);
+ int hi = ss_get_byte (&i->input);
+ int lo = ss_get_byte (&i->input);
if (lo == EOF)
- {
- data_warning (i, _("Field must have even length."));
- return false;
- }
+ return xstrdup (_("Field must have even length."));
else if (!c_isxdigit (hi) || !c_isxdigit (lo))
- {
- data_warning (i, _("Field must contain only hex digits."));
- return false;
- }
+ return xstrdup (_("Field must contain only hex digits."));
((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo);
}
i->output->f = d;
- return true;
+ return NULL;
}
/* Digits for Z format. */
}
/* Parses Z format. */
-static bool
+static char *
parse_Z (struct data_in *i)
{
struct string tmp;
/* Trim spaces and check for missing value representation. */
if (trim_spaces_and_check_missing (i))
- return true;
+ return NULL;
ds_init_empty (&tmp);
ds_extend (&tmp, 64);
- ds_put_char (&tmp, '+');
+ ds_put_byte (&tmp, '+');
while (!ss_is_empty (i->input))
{
- int c = ss_get_char (&i->input);
+ int c = ss_get_byte (&i->input);
if (c_isdigit (c) && !got_final_digit)
- ds_put_char (&tmp, c);
+ ds_put_byte (&tmp, c);
else if (is_z_digit (c) && !got_final_digit)
{
- ds_put_char (&tmp, z_digit_value (c) + '0');
+ ds_put_byte (&tmp, z_digit_value (c) + '0');
if (is_negative_z_digit (c))
ds_data (&tmp)[0] = '-';
got_final_digit = true;
}
else if (c == '.' && !got_dot)
{
- ds_put_char (&tmp, '.');
+ ds_put_byte (&tmp, '.');
got_dot = true;
}
else
{
ds_destroy (&tmp);
- return false;
+ return xstrdup (_("Invalid zoned decimal syntax."));
}
}
if (!ss_is_empty (i->input))
{
+ char *error;
+
if (ds_length (&tmp) == 1)
- data_warning (i, _("Field contents are not numeric."));
+ error = xstrdup (_("Field contents are not numeric."));
else
- data_warning (i, _("Number followed by garbage."));
+ error = xstrdup (_("Number followed by garbage."));
+
ds_destroy (&tmp);
- return false;
+ return error;
}
/* Let c_strtod() do the conversion. */
{
if (fabs (i->output->f) > 1)
{
- data_warning (i, _("Too-large number set to system-missing."));
i->output->f = SYSMIS;
+ ds_destroy (&tmp);
+ return xstrdup (_("Too-large number set to system-missing."));
}
else
{
- data_warning (i, _("Too-small number set to zero."));
i->output->f = 0.0;
+ ds_destroy (&tmp);
+ return xstrdup (_("Too-small number set to zero."));
}
}
else
- {
- errno = save_errno;
- if (!got_dot)
- apply_implied_decimals (i);
- }
+ errno = save_errno;
ds_destroy (&tmp);
- return true;
+ return NULL;
}
/* Parses IB format. */
-static bool
+static char *
parse_IB (struct data_in *i)
{
size_t bytes;
i->output->f = -(double) -value;
}
- apply_implied_decimals (i);
-
- return true;
+ return NULL;
}
/* Parses PIB format. */
-static bool
+static char *
parse_PIB (struct data_in *i)
{
i->output->f = integer_get (settings_get_input_integer_format (), ss_data (i->input),
MIN (8, ss_length (i->input)));
- apply_implied_decimals (i);
-
- return true;
+ return NULL;
}
/* Consumes the first character of S. Stores its high 4 bits in
static void
get_nibbles (struct substring *s, int *high_nibble, int *low_nibble)
{
- int c = ss_get_char (s);
+ int c = ss_get_byte (s);
assert (c != EOF);
*high_nibble = (c >> 4) & 15;
*low_nibble = c & 15;
}
/* Parses P format. */
-static bool
+static char *
parse_P (struct data_in *i)
{
int high_nibble, low_nibble;
{
get_nibbles (&i->input, &high_nibble, &low_nibble);
if (high_nibble > 9 || low_nibble > 9)
- return false;
+ return xstrdup (_("Invalid syntax for P field."));
i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
}
get_nibbles (&i->input, &high_nibble, &low_nibble);
if (high_nibble > 9)
- return false;
+ return xstrdup (_("Invalid syntax for P field."));
i->output->f = (10 * i->output->f) + high_nibble;
if (low_nibble < 10)
i->output->f = (10 * i->output->f) + low_nibble;
else if (low_nibble == 0xb || low_nibble == 0xd)
i->output->f = -i->output->f;
- apply_implied_decimals (i);
-
- return true;
+ return NULL;
}
/* Parses PK format. */
-static bool
+static char *
parse_PK (struct data_in *i)
{
i->output->f = 0.0;
if (high_nibble > 9 || low_nibble > 9)
{
i->output->f = SYSMIS;
- return true;
+ return NULL;
}
i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble;
}
- apply_implied_decimals (i);
-
- return true;
+ return NULL;
}
/* Parses RB format. */
-static bool
+static char *
parse_RB (struct data_in *i)
{
enum float_format ff = settings_get_input_float_format ();
else
i->output->f = SYSMIS;
- return true;
+ return NULL;
}
/* Parses A format. */
-static bool
+static char *
parse_A (struct data_in *i)
{
/* This is equivalent to buf_copy_rpad, except that we posibly
if (dst_size > src_size)
memset (&dst[src_size], ' ', dst_size - src_size);
- return true;
+ return NULL;
}
/* Parses AHEX format. */
-static bool
+static char *
parse_AHEX (struct data_in *i)
{
uint8_t *s = value_str_rw (i->output, i->width);
for (j = 0; ; j++)
{
- int hi = ss_get_char (&i->input);
- int lo = ss_get_char (&i->input);
+ int hi = ss_get_byte (&i->input);
+ int lo = ss_get_byte (&i->input);
if (hi == EOF)
break;
else if (lo == EOF)
- {
- data_warning (i, _("Field must have even length."));
- return false;
- }
+ return xstrdup (_("Field must have even length."));
- if (0 != strcmp (i->src_enc, LEGACY_NATIVE))
- {
- hi = legacy_to_native (i->src_enc, hi);
- lo = legacy_to_native (i->src_enc, lo);
- }
if (!c_isxdigit (hi) || !c_isxdigit (lo))
- {
- data_warning (i, _("Field must contain only hex digits."));
- return false;
- }
+ return xstrdup (_("Field must contain only hex digits."));
if (j < i->width)
s[j] = hexit_value (hi) * 16 + hexit_value (lo);
memset (&s[j], ' ', i->width - j);
- return true;
+ return NULL;
}
\f
/* Date & time format components. */
MAX_DIGITS characters in I, storing the result into *RESULT.
Returns true if successful, false if no integer was
present. */
-static bool
+static char * WARN_UNUSED_RESULT
parse_int (struct data_in *i, long *result, size_t max_digits)
{
struct substring head = ss_head (i->input, max_digits);
if (n)
{
ss_advance (&i->input, n);
- return true;
+ return NULL;
}
else
- {
- data_warning (i, _("Syntax error in date field."));
- return false;
- }
+ return xstrdup (_("Syntax error in date field."));
}
/* Parses a date integer between 1 and 31 from I, storing it into
*DAY.
Returns true if successful, false if no date was present. */
-static bool
+static char *
parse_day (struct data_in *i, long *day)
{
- if (!parse_int (i, day, SIZE_MAX))
- return false;
+ char *error = parse_int (i, day, SIZE_MAX);
+ if (error != NULL)
+ return error;
if (*day >= 1 && *day <= 31)
- return true;
+ return NULL;
- data_warning (i, _("Day (%ld) must be between 1 and 31."), *day);
- return false;
+ return xasprintf (_("Day (%ld) must be between 1 and 31."), *day);
}
/* Parses an integer from the beginning of I.
If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the
time and sets *TIME_SIGN. Otherwise, does not allow a sign.
Returns true if successful, false if no integer was present. */
-static bool
+static char *
parse_time_units (struct data_in *i, double seconds_per_unit,
enum time_sign *time_sign, double *time)
{
+ char *error;
long units;
if (*time_sign == SIGN_NO_TIME)
{
- if (ss_match_char (&i->input, '-'))
+ if (ss_match_byte (&i->input, '-'))
*time_sign = SIGN_NEGATIVE;
else
{
- ss_match_char (&i->input, '+');
+ ss_match_byte (&i->input, '+');
*time_sign = SIGN_POSITIVE;
}
}
- if (!parse_int (i, &units, SIZE_MAX))
- return false;
+ error = parse_int (i, &units, SIZE_MAX);
+ if (error != NULL)
+ return error;
if (units < 0)
- {
- data_warning (i, _("Syntax error in date field."));
- return false;
- }
+ return xstrdup (_("Syntax error in date field."));
*time += units * seconds_per_unit;
- return true;
+ return NULL;
}
/* Parses a data delimiter from the beginning of I.
Returns true if successful, false if no delimiter was
present. */
-static bool
+static char *
parse_date_delimiter (struct data_in *i)
{
if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES)))
- return true;
+ return NULL;
- data_warning (i, _("Delimiter expected between fields in date."));
- return false;
+ return xstrdup (_("Delimiter expected between fields in date."));
}
/* Parses spaces at the beginning of I. */
parse_name_token (struct data_in *i)
{
struct substring token;
- ss_get_chars (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token);
+ ss_get_bytes (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token);
return token;
}
/* Parses a month name or number from the beginning of I,
storing the month (in range 1...12) into *MONTH.
Returns true if successful, false if no month was present. */
-static bool
+static char *
parse_month (struct data_in *i, long *month)
{
if (c_isdigit (ss_first (i->input)))
{
- if (!parse_int (i, month, SIZE_MAX))
- return false;
+ char *error = parse_int (i, month, SIZE_MAX);
+ if (error != NULL)
+ return error;
if (*month >= 1 && *month <= 12)
- return true;
+ return NULL;
}
else
{
struct substring token = parse_name_token (i);
if (match_name (ss_head (token, 3), english_names, month)
|| match_name (ss_head (token, 4), roman_names, month))
- return true;
+ return NULL;
}
- data_warning (i, _("Unrecognized month format. Months may be specified "
- "as Arabic or Roman numerals or as at least 3 letters "
- "of their English names."));
- return false;
+ return xstrdup (_("Unrecognized month format. Months may be specified "
+ "as Arabic or Roman numerals or as at least 3 letters "
+ "of their English names."));
}
/* Parses a year of at most MAX_DIGITS from the beginning of I,
storing a "4-digit" year into *YEAR. */
-static bool
+static char *
parse_year (struct data_in *i, long *year, size_t max_digits)
{
- if (!parse_int (i, year, max_digits))
- return false;
+ char *error = parse_int (i, year, max_digits);
+ if (error != NULL)
+ return error;
if (*year >= 0 && *year <= 99)
{
*year += epoch_century + 100;
}
if (*year >= 1582 || *year <= 19999)
- return true;
+ return NULL;
- data_warning (i, _("Year (%ld) must be between 1582 and 19999."), *year);
- return false;
+ return xasprintf (_("Year (%ld) must be between 1582 and 19999."), *year);
}
/* Returns true if input in I has been exhausted,
false otherwise. */
-static bool
+static char *
parse_trailer (struct data_in *i)
{
if (ss_is_empty (i->input))
- return true;
+ return NULL;
- data_warning (i, _("Trailing garbage \"%.*s\" following date."),
- (int) ss_length (i->input), ss_data (i->input));
- return false;
+ return xasprintf (_("Trailing garbage `%.*s' following date."),
+ (int) ss_length (i->input), ss_data (i->input));
}
/* Parses a 3-digit Julian day-of-year value from I into *YDAY.
Returns true if successful, false on failure. */
-static bool
+static char *
parse_yday (struct data_in *i, long *yday)
{
struct substring num_s;
long num;
- ss_get_chars (&i->input, 3, &num_s);
+ ss_get_bytes (&i->input, 3, &num_s);
if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3)
- {
- data_warning (i, _("Julian day must have exactly three digits."));
- return false;
- }
+ return xstrdup (_("Julian day must have exactly three digits."));
else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366)
- {
- data_warning (i, _("Julian day (%ld) must be between 1 and 366."), num);
- return false;
- }
+ return xasprintf (_("Julian day (%ld) must be between 1 and 366."), num);
*yday = num;
- return true;
+ return NULL;
}
/* Parses a quarter-of-year integer between 1 and 4 from I.
Stores the corresponding month into *MONTH.
Returns true if successful, false if no quarter was present. */
-static bool
+static char *
parse_quarter (struct data_in *i, long int *month)
{
long quarter;
+ char *error;
- if (!parse_int (i, &quarter, SIZE_MAX))
- return false;
+ error = parse_int (i, &quarter, SIZE_MAX);
+ if (error != NULL)
+ return error;
if (quarter >= 1 && quarter <= 4)
{
*month = (quarter - 1) * 3 + 1;
- return true;
+ return NULL;
}
- data_warning (i, _("Quarter (%ld) must be between 1 and 4."), quarter);
- return false;
+ return xasprintf (_("Quarter (%ld) must be between 1 and 4."), quarter);
}
/* Parses a week-of-year integer between 1 and 53 from I,
Stores the corresponding year-of-day into *YDAY.
Returns true if successful, false if no week was present. */
-static bool
+static char *
parse_week (struct data_in *i, long int *yday)
{
+ char *error;
long week;
- if (!parse_int (i, &week, SIZE_MAX))
- return false;
+ error = parse_int (i, &week, SIZE_MAX);
+ if (error != NULL)
+ return error;
if (week >= 1 && week <= 53)
{
*yday = (week - 1) * 7 + 1;
- return true;
+ return NULL;
}
- data_warning (i, _("Week (%ld) must be between 1 and 53."), week);
- return false;
+ return xasprintf (_("Week (%ld) must be between 1 and 53."), week);
}
/* Parses a time delimiter from the beginning of I.
Returns true if successful, false if no delimiter was
present. */
-static bool
+static char *
parse_time_delimiter (struct data_in *i)
{
if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0)
- return true;
+ return NULL;
- data_warning (i, _("Delimiter expected between fields in time."));
- return false;
+ return xstrdup (_("Delimiter expected between fields in time."));
}
/* Parses minutes and optional seconds from the beginning of I.
The time is converted into seconds, which are added to
*TIME.
Returns true if successful, false if an error was found. */
-static bool
+static char *
parse_minute_second (struct data_in *i, double *time)
{
long minute;
char buf[64];
+ char *error;
char *cp;
/* Parse minutes. */
- if (!parse_int (i, &minute, SIZE_MAX))
- return false;
+ error = parse_int (i, &minute, SIZE_MAX);
+ if (error != NULL)
+ return error;
if (minute < 0 || minute > 59)
- {
- data_warning (i, _("Minute (%ld) must be between 0 and 59."), minute);
- return false;
- }
+ return xasprintf (_("Minute (%ld) must be between 0 and 59."), minute);
*time += 60. * minute;
/* Check for seconds. */
if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0
|| !c_isdigit (ss_first (i->input)))
- return true;
+ return NULL;
/* Parse seconds. */
cp = buf;
while (c_isdigit (ss_first (i->input)))
- *cp++ = ss_get_char (&i->input);
- if (ss_match_char (&i->input, settings_get_decimal_char (FMT_F)))
+ *cp++ = ss_get_byte (&i->input);
+ if (ss_match_byte (&i->input, settings_get_decimal_char (FMT_F)))
*cp++ = '.';
while (c_isdigit (ss_first (i->input)))
- *cp++ = ss_get_char (&i->input);
+ *cp++ = ss_get_byte (&i->input);
*cp = '\0';
- *time += strtod (buf, NULL);
+ *time += c_strtod (buf, NULL);
- return true;
+ return NULL;
}
/* Parses a weekday name from the beginning of I,
storing a value of 1=Sunday...7=Saturday into *WEEKDAY.
Returns true if successful, false if an error was found. */
-static bool
+static char *
parse_weekday (struct data_in *i, long *weekday)
{
static const char *const weekday_names[] =
struct substring token = parse_name_token (i);
bool ok = match_name (ss_head (token, 2), weekday_names, weekday);
if (!ok)
- data_warning (i, _("Unrecognized weekday name. At least the first two "
- "letters of an English weekday name must be "
- "specified."));
- return ok;
+ return xstrdup (_("Unrecognized weekday name. At least the first two "
+ "letters of an English weekday name must be "
+ "specified."));
+ return NULL;
}
\f
/* Date & time formats. */
-/* Helper function for passing to
- calendar_gregorian_to_offset. */
-static void
-calendar_error (void *i_, const char *format, ...)
-{
- struct data_in *i = i_;
- va_list args;
-
- va_start (args, format);
- vdata_warning (i, format, args);
- va_end (args);
-}
-
/* Parses WKDAY format. */
-static bool
+static char *
parse_WKDAY (struct data_in *i)
{
long weekday;
+ char *error;
if (trim_spaces_and_check_missing (i))
- return true;
+ return NULL;
- if (!parse_weekday (i, &weekday)
- || !parse_trailer (i))
- return false;
+ error = parse_weekday (i, &weekday);
+ if (error == NULL)
+ error = parse_trailer (i);
i->output->f = weekday;
- return true;
+ return error;
}
/* Parses MONTH format. */
-static bool
+static char *
parse_MONTH (struct data_in *i)
{
long month;
+ char *error;
if (trim_spaces_and_check_missing (i))
- return true;
+ return NULL;
- if (!parse_month (i, &month)
- || !parse_trailer (i))
- return false;
+ error = parse_month (i, &month);
+ if (error == NULL)
+ error = parse_trailer (i);
i->output->f = month;
- return true;
+ return error;
}
/* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR,
DATETIME, TIME and DTIME formats. */
-static bool
+static char *
parse_date (struct data_in *i)
{
long int year = INT_MIN;
double time = 0, date = 0;
enum time_sign time_sign = SIGN_NO_TIME;
- const char *template = fmt_date_template (i->format);
+ const char *template = fmt_date_template (i->format, 0);
size_t template_width = strlen (template);
+ char *error;
if (trim_spaces_and_check_missing (i))
- return true;
+ return NULL;
while (*template != '\0')
{
unsigned char ch = *template;
int count = 1;
- bool ok;
while (template[count] == ch)
count++;
template += count;
- ok = true;
switch (ch)
{
case 'd':
- ok = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday);
+ error = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday);
break;
case 'm':
- ok = parse_month (i, &month);
+ error = parse_month (i, &month);
break;
case 'y':
{
else
max_digits = 2;
}
- ok = parse_year (i, &year, max_digits);
+ error = parse_year (i, &year, max_digits);
}
break;
case 'q':
- ok = parse_quarter (i, &month);
+ error = parse_quarter (i, &month);
break;
case 'w':
- ok = parse_week (i, &yday);
+ error = parse_week (i, &yday);
break;
case 'D':
- ok = parse_time_units (i, 60. * 60. * 24., &time_sign, &time);
+ error = parse_time_units (i, 60. * 60. * 24., &time_sign, &time);
break;
case 'H':
- ok = parse_time_units (i, 60. * 60., &time_sign, &time);
+ error = parse_time_units (i, 60. * 60., &time_sign, &time);
break;
case 'M':
- ok = parse_minute_second (i, &time);
+ error = parse_minute_second (i, &time);
break;
case '-':
case '/':
case '.':
- case 'X':
- ok = parse_date_delimiter (i);
+ error = parse_date_delimiter (i);
break;
case ':':
- ok = parse_time_delimiter (i);
+ error = parse_time_delimiter (i);
case ' ':
- parse_spaces (i);
+ if (i->format != FMT_MOYR)
+ {
+ parse_spaces (i);
+ error = NULL;
+ }
+ else
+ error = parse_date_delimiter (i);
break;
default:
assert (count == 1);
- if (!ss_match_char (&i->input, c_toupper (ch))
- && !ss_match_char (&i->input, c_tolower (ch)))
- {
- data_warning (i, _("`%c' expected in date field."), ch);
- return false;
- }
+ if (!ss_match_byte (&i->input, c_toupper (ch))
+ && !ss_match_byte (&i->input, c_tolower (ch)))
+ error = xasprintf (_("`%c' expected in date field."), ch);
+ else
+ error = NULL;
break;
}
- if (!ok)
- return false;
+ if (error != NULL)
+ return error;
}
- if (!parse_trailer (i))
- return false;
+ error = parse_trailer (i);
+ if (error != NULL)
+ return error;
if (year != INT_MIN)
{
- double ofs = calendar_gregorian_to_offset (year, month, day,
- calendar_error, i);
+ char *error;
+ double ofs;
+
+ ofs = calendar_gregorian_to_offset (year, month, day, &error);
if (ofs == SYSMIS)
- return false;
+ return error;
date = (yday - 1 + ofs) * 60. * 60. * 24.;
}
else
date = 0.;
i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time);
- return true;
+ return NULL;
}
\f
/* Utility functions. */
-/* Outputs FORMAT with the given ARGS as a warning for input
- I. */
-static void
-vdata_warning (const struct data_in *i, const char *format, va_list args)
-{
- struct msg m;
- struct string text;
-
- ds_init_empty (&text);
- ds_put_char (&text, '(');
- if (i->first_column != 0)
- {
- if (i->first_column == i->last_column - 1)
- ds_put_format (&text, _("column %d"), i->first_column);
- else
- ds_put_format (&text, _("columns %d-%d"),
- i->first_column, i->last_column - 1);
- ds_put_cstr (&text, ", ");
- }
- ds_put_format (&text, _("%s field) "), fmt_name (i->format));
- ds_put_vformat (&text, format, args);
-
- m.category = MSG_C_DATA;
- m.severity = MSG_S_WARNING;
- m.text = ds_cstr (&text);
- m.where.file_name = NULL;
- m.where.line_number = -1;
-
- msg_emit (&m);
-}
-
-/* Outputs FORMAT with the given ARGS as a warning for input
- I. */
-static void
-data_warning (const struct data_in *i, const char *format, ...)
-{
- va_list args;
-
- va_start (args, format);
- vdata_warning (i, format, args);
- va_end (args);
-}
-
-/* Apply implied decimal places to output. */
-static void
-apply_implied_decimals (struct data_in *i)
-{
- if (i->implied_decimals > 0)
- i->output->f /= pow (10., i->implied_decimals);
-}
-
/* Sets the default result for I.
For a numeric format, this is the value set on SET BLANKS
(typically system-missing); for a string format, it is all