From: Ben Pfaff Date: Wed, 22 Nov 2006 04:24:02 +0000 (+0000) Subject: Rewrite formatted data input routines to conform to SPSS data formats X-Git-Tag: v0.6.0~691 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0fa141762183890ebd139ccd9264f08db9011539;p=pspp-builds.git Rewrite formatted data input routines to conform to SPSS data formats as closely as possible --- diff --git a/src/data/ChangeLog b/src/data/ChangeLog index c77770c4..9bd3d429 100644 --- a/src/data/ChangeLog +++ b/src/data/ChangeLog @@ -1,3 +1,35 @@ +Sat Nov 18 20:46:35 2006 Ben Pfaff + + * format.c: (fmt_date_template) Distinguish characters for which a + space is output and any date delimiter is allowed on input, from + those for which a space is output and only a space is allowed on + input. The former is represented by X, the latter by a space. + Also, drop distinction between h and H, changing the former to the + latter. + + * data-in.c: Completely rewrite internals to conform to SPSS input + formats as closely as possible. + (data_in) Changed external interface by replacing the structure + that was used as a single argument by a set of arguments. Updated + all callers. + (data_in_finite_line) Removed. Converted all callers to use plain + data_in. + (data_in_get_integer_format) New function. + (data_in_set_integer_format) New function. + (data_in_get_float_format) New function. + (data_in_set_float_format) New function. + + * data-in.h: (enums DI_IGNORE_ERROR, DI_IMPLIED_DECIMALS) Removed. + (struct data_in) Removed. + + * data-out.c: (output_date) Drop each component from the input as + it is output, to allow us to drop the distinction between h (a + count of hours) and H (the hour of day) template characters. + Also, handle new X template character. + (output_scientific) Follow more rational rule on when to drop + fraction introduced between SPSS 13 and 15. Updated test case to + match new behavior. + Sat Nov 11 11:41:26 2006 Ben Pfaff Fix buffer overflow reported by John Darrington. diff --git a/src/data/data-in.c b/src/data/data-in.c index 3cf53a28..7ecded2f 100644 --- a/src/data/data-in.c +++ b/src/data/data-in.c @@ -18,525 +18,577 @@ 02110-1301, USA. */ #include + #include "data-in.h" -#include -#include + #include +#include +#include #include #include +#include #include #include #include -#include + #include "calendar.h" -#include #include "identifier.h" +#include "settings.h" +#include "variable.h" + +#include +#include +#include #include +#include #include -#include "settings.h" #include -#include "variable.h" + +#include "c-ctype.h" +#include "minmax.h" +#include "size_max.h" +#include "xalloc.h" #include "gettext.h" #define _(msgid) gettext (msgid) -/* Specialized error routine. */ +/* Information about parsing one data field. */ +struct data_in + { + struct substring input; /* Source. */ + enum fmt_type format; /* Input format. */ + int implied_decimals; /* Number of implied decimal places. */ + + union value *output; /* Destination. */ + int width; /* Output width. */ + + int first_column; /* First column of field; 0 if inapplicable. */ + int last_column; /* Last column. */ + }; + +/* Integer format used for IB and PIB input. */ +static enum integer_format input_integer_format = INTEGER_NATIVE; + +/* Floating-point format used for RB and RBHEX input. */ +static enum float_format input_float_format = FLOAT_NATIVE_DOUBLE; + +typedef bool data_in_parser_func (struct data_in *); +#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) \ + static data_in_parser_func parse_##METHOD; +#include "format.def" -static void dls_error (const struct data_in *, const char *format, ...) +static void vdata_warning (const struct data_in *, const char *, va_list) + PRINTF_FORMAT (2, 0); +static void data_warning (const struct data_in *, const char *, ...) PRINTF_FORMAT (2, 3); -static void -vdls_error (const struct data_in *i, const char *format, va_list args) +static void apply_implied_decimals (struct data_in *); +static void default_result (struct data_in *); +static bool trim_spaces_and_check_missing (struct data_in *); + +static int hexit_value (int c); + +/* Parses the characters in INPUT according to FORMAT. Stores + the parsed representation in OUTPUT, which has the given WIDTH + (0 for a numeric field, otherwise the string width). + + If no decimal point is included in a numeric format, then + IMPLIED_DECIMALS decimal places are implied. Specify 0 if no + decimal places should be implied. + + If FIRST_COLUMN is nonzero, then it should be the 1-based + column number of the first character in INPUT, used in error + messages. */ +bool +data_in (struct substring input, + enum fmt_type format, int implied_decimals, + int first_column, union value *output, int width) { - struct msg m; - struct string text; - char format_string[FMT_STRING_LEN_MAX + 1]; + static data_in_parser_func *const handlers[FMT_NUMBER_OF_FORMATS] = + { +#define FMT(NAME, METHOD, IMIN, OMIN, IO, CATEGORY) parse_##METHOD, +#include "format.def" + }; - if (i->flags & DI_IGNORE_ERROR) - return; + struct data_in i; + bool ok; - ds_init_empty (&text); - if (i->f1 == i->f2) - ds_put_format (&text, _("(column %d"), i->f1); - else - ds_put_format (&text, _("(columns %d-%d"), i->f1, i->f2); - ds_put_format (&text, _(", field type %s) "), - fmt_to_string (&i->format, format_string)); - ds_put_vformat (&text, format, args); + assert ((width != 0) == fmt_is_string (format)); - m.category = MSG_DATA; - m.severity = MSG_ERROR; - m.text = ds_cstr (&text); + i.input = input; + i.format = format; + i.implied_decimals = implied_decimals; - msg_emit (&m); -} + i.output = output; + i.width = width; -static void -dls_error (const struct data_in *i, const char *format, ...) -{ - va_list args; + i.first_column = first_column; + i.last_column = first_column + ss_length (input) - 1; - va_start (args, format); - vdls_error (i, format, args); - va_end (args); + if (!ss_is_empty (i.input)) + { + ok = handlers[i.format] (&i); + if (!ok) + default_result (&i); + } + else + { + default_result (&i); + ok = true; + } + + return ok; } - -/* Parsing utility functions. */ -/* Excludes leading and trailing whitespace from I by adjusting - pointers. */ -static void -trim_whitespace (struct data_in *i) +/* Returns the integer format used for IB and PIB input. */ +enum integer_format +data_in_get_integer_format (void) { - while (i->s < i->e && isspace ((unsigned char) i->s[0])) - i->s++; + return input_integer_format; +} - while (i->s < i->e && isspace ((unsigned char) i->e[-1])) - i->e--; +/* Sets the integer format used for IB and PIB input to + FORMAT. */ +void +data_in_set_integer_format (enum integer_format format) +{ + input_integer_format = format; } -/* Returns true if we're not at the end of the string being - parsed. */ -static inline bool -have_char (struct data_in *i) +/* Returns the floating-point format used for RB and RBHEX + input. */ +enum float_format +data_in_get_float_format (void) { - return i->s < i->e; + return input_float_format; } -/* If implied decimal places are enabled, apply them to - I->v->f. */ -static void -apply_implied_decimals (struct data_in *i) +/* Sets the floating-point format used for RB and RBHEX input to + FORMAT. */ +void +data_in_set_float_format (enum float_format format) { - if ((i->flags & DI_IMPLIED_DECIMALS) && i->format.d > 0) - i->v->f /= pow (10., i->format.d); + input_float_format = format; } /* Format parsers. */ -static bool parse_int (struct data_in *i, long *result); - -/* This function is based on strtod() from the GNU C library. */ +/* Parses F, COMMA, DOT, DOLLAR, PCT, and E input formats. */ static bool -parse_numeric (struct data_in *i) +parse_number (struct data_in *i) { - int sign; /* +1 or -1. */ - double num; /* The number so far. */ + const struct fmt_number_style *style = fmt_get_style (i->format); - bool got_dot; /* Found a decimal point. */ - size_t digit_cnt; /* Count of digits. */ + struct string tmp; - int decimal; /* Decimal point character. */ - int grouping; /* Grouping character. */ + bool explicit_decimals = false; + int save_errno; + char *tail; - long int exponent; /* Number's exponent. */ - int type; /* Usually same as i->format.type. */ + assert (fmt_get_category (i->format) != FMT_CAT_CUSTOM); - trim_whitespace (i); + /* Trim spaces and check for missing value representation. */ + if (trim_spaces_and_check_missing (i)) + return true; - type = i->format.type; - if (type == FMT_DOLLAR && have_char (i) && *i->s == '$') + ds_init_empty (&tmp); + ds_extend (&tmp, 64); + + /* Prefix character may precede sign. */ + if (!ss_is_empty (style->prefix)) { - i->s++; - type = FMT_COMMA; + ss_match_char (&i->input, ss_first (style->prefix)); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } - /* Get the sign. */ - if (have_char (i)) + /* Sign. */ + if (ss_match_char (&i->input, '-')) { - sign = *i->s == '-' ? -1 : 1; - if (*i->s == '-' || *i->s == '+') - i->s++; + ds_put_char (&tmp, '-'); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); + } + else + { + ss_match_char (&i->input, '+'); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } - else - sign = 1; - decimal = fmt_decimal_char (type); - grouping = fmt_grouping_char (type); + /* Prefix character may follow sign. */ + if (!ss_is_empty (style->prefix)) + { + ss_match_char (&i->input, ss_first (style->prefix)); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); + } - i->v->f = SYSMIS; - num = 0.0; - got_dot = false; - digit_cnt = 0; - exponent = 0; - for (; have_char (i); i->s++) + /* Digits before decimal point. */ + while (c_isdigit (ss_first (i->input))) { - if (isdigit ((unsigned char) *i->s)) - { - digit_cnt++; - - /* Make sure that multiplication by 10 will not overflow. */ - if (num > DBL_MAX * 0.1) - /* The value of the digit doesn't matter, since we have already - gotten as many digits as can be represented in a `double'. - This doesn't necessarily mean the result will overflow. - The exponent may reduce it to within range. - - We just need to record that there was another - digit so that we can multiply by 10 later. */ - ++exponent; - else - num = (num * 10.0) + (*i->s - '0'); - - /* Keep track of the number of digits after the decimal point. - If we just divided by 10 here, we would lose precision. */ - if (got_dot) - --exponent; - } - else if (!got_dot && *i->s == decimal) - /* Record that we have found the decimal point. */ - got_dot = true; - else if ((type != FMT_COMMA && type != FMT_DOT) || *i->s != grouping) - /* Any other character terminates the number. */ - break; + ds_put_char (&tmp, ss_get_char (&i->input)); + if (style->grouping != 0) + ss_match_char (&i->input, style->grouping); } - if (!digit_cnt) + /* Decimal point and following digits. */ + if (ss_match_char (&i->input, style->decimal)) { - if (got_dot) - { - i->v->f = SYSMIS; - return true; - } - dls_error (i, _("Field does not form a valid floating-point constant.")); - i->v->f = SYSMIS; - return false; + explicit_decimals = true; + ds_put_char (&tmp, '.'); + while (c_isdigit (ss_first (i->input))) + ds_put_char (&tmp, ss_get_char (&i->input)); } - - if (have_char (i) && strchr ("eEdD-+", *i->s)) + + /* Exponent. */ + if (!ds_is_empty (&tmp) + && !ss_is_empty (i->input) + && strchr ("eEdD-+", ss_first (i->input))) { - /* Get the exponent specified after the `e' or `E'. */ - long exp; + explicit_decimals = true; + ds_put_char (&tmp, 'e'); - if (isalpha ((unsigned char) *i->s)) - i->s++; - if (!parse_int (i, &exp)) + if (strchr ("eEdD", ss_first (i->input))) { - i->v->f = SYSMIS; - return false; + ss_advance (&i->input, 1); + ss_match_char (&i->input, ' '); + } + + if (ss_first (i->input) == '-' || ss_first (i->input) == '+') + { + if (ss_get_char (&i->input) == '-') + ds_put_char (&tmp, '-'); + ss_match_char (&i->input, ' '); } - exponent += exp; + while (c_isdigit (ss_first (i->input))) + ds_put_char (&tmp, ss_get_char (&i->input)); } - else if (!got_dot && (i->flags & DI_IMPLIED_DECIMALS)) - exponent -= i->format.d; - if (type == FMT_PCT && have_char (i) && *i->s == '%') - i->s++; - if (i->s < i->e) + /* Suffix character. */ + if (!ss_is_empty (style->suffix)) + ss_match_char (&i->input, ss_first (style->suffix)); + + if (!ss_is_empty (i->input)) { - dls_error (i, _("Field contents followed by garbage.")); - i->v->f = SYSMIS; + if (ds_is_empty (&tmp)) + data_warning (i, _("Field contents are not numeric.")); + else + data_warning (i, _("Number followed by garbage.")); + ds_destroy (&tmp); return false; } - if (num == 0.0) + /* Let strtod() do the conversion. */ + save_errno = errno; + errno = 0; + i->output->f = strtod (ds_cstr (&tmp), &tail); + if (*tail != '\0') { - i->v->f = 0.0; - return true; + data_warning (i, _("Invalid numeric syntax.")); + errno = save_errno; + ds_destroy (&tmp); + return false; } - - /* Multiply NUM by 10 to the EXPONENT power, checking for overflow - and underflow. */ - if (exponent < 0) + else if (errno == ERANGE) { - if (-exponent + digit_cnt > -(DBL_MIN_10_EXP) + 5 - || num < DBL_MIN * pow (10.0, (double) -exponent)) + if (fabs (i->output->f) > 1) { - dls_error (i, _("Underflow in floating-point constant.")); - i->v->f = 0.0; - return false; + data_warning (i, _("Too-large number set to system-missing.")); + i->output->f = SYSMIS; } - - num *= pow (10.0, (double) exponent); - } - else if (exponent > 0) - { - if (num > DBL_MAX * pow (10.0, (double) -exponent)) + else { - dls_error (i, _("Overflow in floating-point constant.")); - i->v->f = SYSMIS; - return false; + data_warning (i, _("Too-small number set to zero.")); + i->output->f = 0.0; } - - num *= pow (10.0, (double) exponent); + } + else + { + errno = save_errno; + if (!explicit_decimals) + apply_implied_decimals (i); } - i->v->f = sign > 0 ? num : -num; + ds_destroy (&tmp); return true; } -/* Returns the integer value of hex digit C. */ -static inline int -hexit_value (int c) -{ - const char s[] = "0123456789abcdef"; - const char *cp = strchr (s, tolower ((unsigned char) c)); - - assert (cp != NULL); - return cp - s; -} - -static inline bool +/* Parses N format. */ +static bool parse_N (struct data_in *i) { - const char *cp; + int c; - i->v->f = 0; - for (cp = i->s; cp < i->e; cp++) + i->output->f = 0; + while ((c = ss_get_char (&i->input)) != EOF) { - if (!isdigit ((unsigned char) *cp)) - { - dls_error (i, _("All characters in field must be digits.")); - return false; - } - - i->v->f = i->v->f * 10.0 + (*cp - '0'); + if (!c_isdigit (c)) + { + data_warning (i, _("All characters in field must be digits.")); + return false; + } + i->output->f = i->output->f * 10.0 + (c - '0'); } apply_implied_decimals (i); return true; } -static inline bool +/* Parses PIBHEX format. */ +static bool parse_PIBHEX (struct data_in *i) { double n; - const char *cp; - - trim_whitespace (i); + int c; n = 0.0; - for (cp = i->s; cp < i->e; cp++) - { - if (!isxdigit ((unsigned char) *cp)) - { - dls_error (i, _("Unrecognized character in field.")); - return false; - } - n = n * 16.0 + hexit_value (*cp); + while ((c = ss_get_char (&i->input)) != EOF) + { + if (!c_isxdigit (c)) + { + data_warning (i, _("Unrecognized character in field.")); + return false; + } + n = n * 16.0 + hexit_value (c); } - i->v->f = n; + i->output->f = n; return true; } -static inline bool +/* Parses RBHEX format. */ +static bool parse_RBHEX (struct data_in *i) { - /* Validate input. */ - trim_whitespace (i); - if ((i->e - i->s) % 2) + double d; + size_t j; + + memset (&d, 0, sizeof d); + for (j = 0; !ss_is_empty (i->input) && j < sizeof d; j++) { - dls_error (i, _("Field must have even length.")); - return false; - } - - { - const char *cp; - - for (cp = i->s; cp < i->e; cp++) - if (!isxdigit ((unsigned char) *cp)) + int hi = ss_get_char (&i->input); + int lo = ss_get_char (&i->input); + if (lo == EOF) + { + data_warning (i, _("Field must have even length.")); + return false; + } + else if (!c_isxdigit (hi) || !c_isxdigit (lo)) { - dls_error (i, _("Field must contain only hex digits.")); + data_warning (i, _("Field must contain only hex digits.")); return false; } - } + ((unsigned char *) &d)[j] = 16 * hexit_value (hi) + hexit_value (lo); + } - /* Parse input. */ - { - union - { - double d; - unsigned char c[sizeof (double)]; - } - u; + i->output->f = d; + + return true; +} - int j; +/* Digits for Z format. */ +static const char z_digits[] = "0123456789{ABCDEFGHI}JKLMNOPQR"; - memset (u.c, 0, sizeof u.c); - for (j = 0; j < min ((i->e - i->s) / 2, sizeof u.d); j++) - u.c[j] = 16 * hexit_value (i->s[j * 2]) + hexit_value (i->s[j * 2 + 1]); +/* Returns true if C is a Z format digit, false otherwise. */ +static bool +is_z_digit (int c) +{ + return c > 0 && strchr (z_digits, c) != NULL; +} - i->v->f = u.d; - } - - return true; +/* Returns the (absolute value of the) value of C as a Z format + digit. */ +static int +z_digit_value (int c) +{ + assert (is_z_digit (c)); + return (strchr (z_digits, c) - z_digits) % 10; +} + +/* Returns true if Z format digit C represents a negative value, + false otherwise. */ +static bool +is_negative_z_digit (int c) +{ + assert (is_z_digit (c)); + return (strchr (z_digits, c) - z_digits) >= 20; } -static inline bool +/* Parses Z format. */ +static bool parse_Z (struct data_in *i) { - char buf[64]; - bool got_dot = false; + struct string tmp; - /* Warn user that we suck. */ - { - static bool warned; + int save_errno; - if (!warned) - { - msg (MW, - _("Quality of zoned decimal (Z) input format code is " - "suspect. Check your results three times. Report bugs " - "to %s."),PACKAGE_BUGREPORT); - warned = true; - } - } + bool got_dot = false; + bool got_final_digit = false; + + /* Trim spaces and check for missing value representation. */ + if (trim_spaces_and_check_missing (i)) + return true; - /* Validate input. */ - trim_whitespace (i); + ds_init_empty (&tmp); + ds_extend (&tmp, 64); - if (i->e - i->s < 2) + ds_put_char (&tmp, '+'); + while (!ss_is_empty (i->input)) { - dls_error (i, _("Zoned decimal field contains fewer than 2 " - "characters.")); - return false; + int c = ss_get_char (&i->input); + if (c_isdigit (c) && !got_final_digit) + ds_put_char (&tmp, c); + else if (is_z_digit (c) && !got_final_digit) + { + ds_put_char (&tmp, z_digit_value (c) + '0'); + if (is_negative_z_digit (c)) + ds_data (&tmp)[0] = '-'; + got_final_digit = true; + } + else if (c == '.' && !got_dot) + { + ds_put_char (&tmp, '.'); + got_dot = true; + } + else + { + ds_destroy (&tmp); + return false; + } } - /* Copy sign into buf[0]. */ - if ((i->e[-1] & 0xc0) != 0xc0) + if (!ss_is_empty (i->input)) { - dls_error (i, _("Bad sign byte in zoned decimal number.")); + if (ds_length (&tmp) == 1) + data_warning (i, _("Field contents are not numeric.")); + else + data_warning (i, _("Number followed by garbage.")); + ds_destroy (&tmp); return false; } - buf[0] = (i->e[-1] ^ (i->e[-1] >> 1)) & 0x10 ? '-' : '+'; - - /* Copy digits into buf[1 ... len - 1] and terminate string. */ - { - const char *sp; - char *dp; - for (sp = i->s, dp = buf + 1; sp < i->e - 1; sp++, dp++) - if (*sp == '.') + /* Let strtod() do the conversion. */ + save_errno = errno; + errno = 0; + i->output->f = strtod (ds_cstr (&tmp), NULL); + if (errno == ERANGE) + { + if (fabs (i->output->f) > 1) { - *dp = '.'; - got_dot = true; + data_warning (i, _("Too-large number set to system-missing.")); + i->output->f = SYSMIS; } - else if ((*sp & 0xf0) == 0xf0 && (*sp & 0xf) < 10) - *dp = (*sp & 0xf) + '0'; - else - { - dls_error (i, _("Format error in zoned decimal number.")); - return false; - } - - *dp = '\0'; - } - - /* Parse as number. */ - { - char *tail; - - i->v->f = strtod (buf, &tail); - if (tail != i->e) - { - dls_error (i, _("Error in syntax of zoned decimal number.")); - return false; - } - } - - if (!got_dot) - apply_implied_decimals (i); + else + { + data_warning (i, _("Too-small number set to zero.")); + i->output->f = 0.0; + } + } + else + { + errno = save_errno; + if (!got_dot) + apply_implied_decimals (i); + } + ds_destroy (&tmp); return true; } -static inline bool +/* Parses IB format. */ +static bool parse_IB (struct data_in *i) { -#ifndef WORDS_BIGENDIAN - char buf[64]; -#endif - const unsigned char *p; - - unsigned char xor; - - /* We want the data to be in big-endian format. If this is a - little-endian machine, reverse the byte order. */ -#ifdef WORDS_BIGENDIAN - p = (const unsigned char *) i->s; -#else - memcpy (buf, i->s, i->e - i->s); - buf_reverse (buf, i->e - i->s); - p = (const unsigned char *) buf; -#endif - - /* If the value is negative, we need to logical-NOT each value - before adding it. */ - if (p[0] & 0x80) - xor = 0xff; - else - xor = 0x00; - - { - int j; + size_t bytes; + uint64_t value; + uint64_t sign_bit; - i->v->f = 0.0; - for (j = 0; j < i->e - i->s; j++) - i->v->f = i->v->f * 256.0 + (p[j] ^ xor); - } + bytes = MIN (8, ss_length (i->input)); + value = integer_get (input_integer_format, ss_data (i->input), bytes); - /* If the value is negative, add 1 and set the sign, to complete a - two's-complement negation. */ - if (p[0] & 0x80) - i->v->f = -(i->v->f + 1.0); + sign_bit = UINT64_C(1) << (8 * bytes - 1); + if (!(value & sign_bit)) + i->output->f = value; + else + { + /* Sign-extend to full 64 bits. */ + value -= sign_bit << 1; + i->output->f = -(double) -value; + } apply_implied_decimals (i); return true; } -static inline bool +/* Parses PIB format. */ +static bool parse_PIB (struct data_in *i) { - int j; - - i->v->f = 0.0; -#if WORDS_BIGENDIAN - for (j = 0; j < i->e - i->s; j++) - i->v->f = i->v->f * 256.0 + (unsigned char) i->s[j]; -#else - for (j = i->e - i->s - 1; j >= 0; j--) - i->v->f = i->v->f * 256.0 + (unsigned char) i->s[j]; -#endif - + i->output->f = integer_get (input_integer_format, ss_data (i->input), + MIN (8, ss_length (i->input))); + apply_implied_decimals (i); return true; } -static inline bool +/* Consumes the first character of S. Stores its high 4 bits in + HIGH_NIBBLE and its low 4 bits in LOW_NIBBLE. */ +static void +get_nibbles (struct substring *s, int *high_nibble, int *low_nibble) +{ + int c = ss_get_char (s); + assert (c != EOF); + *high_nibble = (c >> 4) & 15; + *low_nibble = c & 15; +} + +/* Parses P format. */ +static bool parse_P (struct data_in *i) { - const char *cp; + int high_nibble, low_nibble; + + i->output->f = 0.0; - i->v->f = 0.0; - for (cp = i->s; cp < i->e - 1; cp++) + while (ss_length (i->input) > 1) { - i->v->f = i->v->f * 10 + ((*cp >> 4) & 15); - i->v->f = i->v->f * 10 + (*cp & 15); + get_nibbles (&i->input, &high_nibble, &low_nibble); + if (high_nibble > 9 || low_nibble > 9) + return false; + i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } - i->v->f = i->v->f * 10 + ((*cp >> 4) & 15); - if ((*cp ^ (*cp >> 1)) & 0x10) - i->v->f = -i->v->f; + + get_nibbles (&i->input, &high_nibble, &low_nibble); + if (high_nibble > 9) + return false; + i->output->f = (10 * i->output->f) + high_nibble; + if (low_nibble < 10) + i->output->f = (10 * i->output->f) + low_nibble; + else if (low_nibble == 0xb || low_nibble == 0xd) + i->output->f = -i->output->f; apply_implied_decimals (i); return true; } -static inline bool +/* Parses PK format. */ +static bool parse_PK (struct data_in *i) { - const char *cp; - - i->v->f = 0.0; - for (cp = i->s; cp < i->e; cp++) + i->output->f = 0.0; + while (!ss_is_empty (i->input)) { - i->v->f = i->v->f * 10 + ((*cp >> 4) & 15); - i->v->f = i->v->f * 10 + (*cp & 15); + int high_nibble, low_nibble; + + get_nibbles (&i->input, &high_nibble, &low_nibble); + if (high_nibble > 9 || low_nibble > 9) + { + i->output->f = SYSMIS; + return true; + } + i->output->f = (100 * i->output->f) + (10 * high_nibble) + low_nibble; } apply_implied_decimals (i); @@ -544,532 +596,412 @@ parse_PK (struct data_in *i) return true; } -static inline bool +/* Parses RB format. */ +static bool parse_RB (struct data_in *i) { - union - { - double d; - unsigned char c[sizeof (double)]; - } - u; - - memset (u.c, 0, sizeof u.c); - memcpy (u.c, i->s, min (sizeof u.c, (size_t) (i->e - i->s))); - i->v->f = u.d; + size_t size = float_get_size (input_float_format); + if (ss_length (i->input) >= size) + float_convert (input_float_format, ss_data (i->input), + FLOAT_NATIVE_DOUBLE, &i->output->f); + else + i->output->f = SYSMIS; return true; } - -static inline bool +/* Parses A format. */ +static bool parse_A (struct data_in *i) { - buf_copy_rpad (i->v->s, i->format.w, i->s, i->e - i->s); - + buf_copy_rpad (i->output->s, i->width, + ss_data (i->input), ss_length (i->input)); return true; } -static inline bool +/* Parses AHEX format. */ +static bool parse_AHEX (struct data_in *i) { - /* Validate input. */ - trim_whitespace (i); - if ((i->e - i->s) % 2) + size_t j; + + for (j = 0; ; j++) { - dls_error (i, _("Field must have even length.")); - return false; - } + int hi = ss_get_char (&i->input); + int lo = ss_get_char (&i->input); + if (hi == EOF) + break; + else if (lo == EOF) + { + data_warning (i, _("Field must have even length.")); + return false; + } - { - const char *cp; - - for (cp = i->s; cp < i->e; cp++) - if (!isxdigit ((unsigned char) *cp)) + if (!c_isxdigit (hi) || !c_isxdigit (lo)) { - dls_error (i, _("Field must contain only hex digits.")); + data_warning (i, _("Field must contain only hex digits.")); return false; } - } - - { - int j; - - /* Parse input. */ - for (j = 0; j < min (i->e - i->s, i->format.w); j += 2) - i->v->s[j / 2] = hexit_value (i->s[j]) * 16 + hexit_value (i->s[j + 1]); - memset (i->v->s + (i->e - i->s) / 2, ' ', (i->format.w - (i->e - i->s)) / 2); - } + + if (j < i->width) + i->output->s[j] = hexit_value (hi) * 16 + hexit_value (lo); + } + + memset (i->output->s + j, ' ', i->width - j); return true; } /* Date & time format components. */ -/* Advances *CP past any whitespace characters. */ -static inline void -skip_whitespace (struct data_in *i) -{ - while (isspace ((unsigned char) *i->s)) - i->s++; -} +/* Sign of a time value. */ +enum time_sign + { + SIGN_NO_TIME, /* No time yet encountered. */ + SIGN_POSITIVE, /* Positive time. */ + SIGN_NEGATIVE /* Negative time. */ + }; -static inline bool -parse_leader (struct data_in *i) +/* Parses a signed decimal integer from at most the first + MAX_DIGITS characters in I, storing the result into *RESULT. + Returns true if successful, false if no integer was + present. */ +static bool +parse_int (struct data_in *i, long *result, size_t max_digits) { - skip_whitespace (i); - return true; + struct substring head = ss_head (i->input, max_digits); + size_t n = ss_get_long (&head, result); + if (n) + { + ss_advance (&i->input, n); + return true; + } + else + { + data_warning (i, _("Syntax error in date field.")); + return false; + } } -static inline bool -force_have_char (struct data_in *i) +/* Parses a date integer between 1 and 31 from I, storing it into + *DAY. + Returns true if successful, false if no date was present. */ +static bool +parse_day (struct data_in *i, long *day) { - if (have_char (i)) + if (!parse_int (i, day, SIZE_MAX)) + return false; + if (*day >= 1 && *day <= 31) return true; - dls_error (i, _("Unexpected end of field.")); + data_warning (i, _("Day (%ld) must be between 1 and 31."), *day); return false; } +/* Parses an integer from the beginning of I. + Adds SECONDS_PER_UNIT times the absolute value of the integer + to *TIME. + If *TIME_SIGN is SIGN_NO_TIME, allows a sign to precede the + time and sets *TIME_SIGN. Otherwise, does not allow a sign. + Returns true if successful, false if no integer was present. */ static bool -parse_int (struct data_in *i, long *result) +parse_time_units (struct data_in *i, double seconds_per_unit, + enum time_sign *time_sign, double *time) + { - bool negative = false; - - if (!force_have_char (i)) - return false; + long units; - if (*i->s == '+') + if (*time_sign == SIGN_NO_TIME) { - i->s++; - force_have_char (i); + if (ss_match_char (&i->input, '-')) + *time_sign = SIGN_NEGATIVE; + else + { + ss_match_char (&i->input, '+'); + *time_sign = SIGN_POSITIVE; + } } - else if (*i->s == '-') + if (!parse_int (i, &units, SIZE_MAX)) + return false; + if (units < 0) { - negative = true; - i->s++; - force_have_char (i); + data_warning (i, _("Syntax error in date field.")); + return false; } - - if (!isdigit ((unsigned char) *i->s)) - { - dls_error (i, _("Digit expected in field.")); - return false; - } - - *result = 0; - for (;;) - { - *result = *result * 10 + (*i->s++ - '0'); - if (!have_char (i) || !isdigit ((unsigned char) *i->s)) - break; - } - - if (negative) - *result = -*result; + *time += units * seconds_per_unit; return true; } +/* Parses a data delimiter from the beginning of I. + Returns true if successful, false if no delimiter was + present. */ static bool -parse_day (struct data_in *i, long *day) +parse_date_delimiter (struct data_in *i) { - if (!parse_int (i, day)) - return false; - if (*day >= 1 && *day <= 31) + if (ss_ltrim (&i->input, ss_cstr ("-/.," CC_SPACES))) return true; - dls_error (i, _("Day (%ld) must be between 1 and 31."), *day); + data_warning (i, _("Delimiter expected between fields in date.")); return false; } -static bool -parse_day_count (struct data_in *i, long *day_count) +/* Parses spaces at the beginning of I. */ +static void +parse_spaces (struct data_in *i) { - return parse_int (i, day_count); + ss_ltrim (&i->input, ss_cstr (CC_SPACES)); } -static bool -parse_date_delimiter (struct data_in *i) +static struct substring +parse_name_token (struct data_in *i) { - bool delim = false; - - while (have_char (i) - && (*i->s == '-' || *i->s == '/' || isspace ((unsigned char) *i->s) - || *i->s == '.' || *i->s == ',')) - { - delim = true; - i->s++; - } - if (delim) - return true; - - dls_error (i, _("Delimiter expected between fields in date.")); - return false; + struct substring token; + ss_get_chars (&i->input, ss_span (i->input, ss_cstr (CC_LETTERS)), &token); + return token; } -/* Association between a name and a value. */ -struct enum_name - { - const char *name; /* Name. */ - bool can_abbreviate; /* True if name may be abbreviated. */ - int value; /* Value associated with name. */ - }; - /* Reads a name from I and sets *OUTPUT to the value associated - with that name. Returns true if successful, false otherwise. */ + with that name. If ALLOW_SUFFIXES is true, then names that + begin with one of the names are accepted; otherwise, only + exact matches (except for case) are allowed. + Returns true if successful, false otherwise. */ static bool -parse_enum (struct data_in *i, const char *what, - const struct enum_name *enum_names, - long *output) +match_name (struct substring token, const char **names, long *output) { - const char *name; - size_t length; - const struct enum_name *ep; - - /* Consume alphabetic characters. */ - name = i->s; - length = 0; - while (have_char (i) && isalpha ((unsigned char) *i->s)) - { - length++; - i->s++; - } - if (length == 0) - { - dls_error (i, _("Parse error at `%c' expecting %s."), *i->s, what); - return false; - } + int i; - for (ep = enum_names; ep->name != NULL; ep++) - if ((ep->can_abbreviate - && lex_id_match_len (ep->name, strlen (ep->name), name, length)) - || (!ep->can_abbreviate && length == strlen (ep->name) - && !buf_compare_case (name, ep->name, length))) + for (i = 1; *names != NULL; i++) + if (ss_equals_case (ss_cstr (*names++), token)) { - *output = ep->value; + *output = i; return true; } - - dls_error (i, _("Unknown %s `%.*s'."), what, (int) length, name); + return false; } +/* Parses a month name or number from the beginning of I, + storing the month (in range 1...12) into *MONTH. + Returns true if successful, false if no month was present. */ static bool parse_month (struct data_in *i, long *month) { - static const struct enum_name month_names[] = - { - {"january", true, 1}, - {"february", true, 2}, - {"march", true, 3}, - {"april", true, 4}, - {"may", true, 5}, - {"june", true, 6}, - {"july", true, 7}, - {"august", true, 8}, - {"september", true, 9}, - {"october", true, 10}, - {"november", true, 11}, - {"december", true, 12}, - - {"i", false, 1}, - {"ii", false, 2}, - {"iii", false, 3}, - {"iv", false, 4}, - {"iiii", false, 4}, - {"v", false, 5}, - {"vi", false, 6}, - {"vii", false, 7}, - {"viii", false, 8}, - {"ix", false, 9}, - {"viiii", false, 9}, - {"x", false, 10}, - {"xi", false, 11}, - {"xii", false, 12}, - - {NULL, false, 0}, - }; - - if (!force_have_char (i)) - return false; - - if (isdigit ((unsigned char) *i->s)) + if (c_isdigit (ss_first (i->input))) { - if (!parse_int (i, month)) + if (!parse_int (i, month, SIZE_MAX)) return false; if (*month >= 1 && *month <= 12) - return true; - - dls_error (i, _("Month (%ld) must be between 1 and 12."), *month); - return false; + return true; } else - return parse_enum (i, _("month"), month_names, month); + { + static const char *english_names[] = + { + "jan", "feb", "mar", "apr", "may", "jun", + "jul", "aug", "sep", "oct", "nov", "dec", + NULL, + }; + + static const char *roman_names[] = + { + "i", "ii", "iii", "iv", "v", "vi", + "vii", "viii", "ix", "x", "xi", "xii", + NULL, + }; + + struct substring token = parse_name_token (i); + if (match_name (ss_head (token, 3), english_names, month) + || match_name (ss_head (token, 4), roman_names, month)) + return true; + } + + data_warning (i, _("Unrecognized month format. Months may be specified " + "as Arabic or Roman numerals or as at least 3 letters " + "of their English names.")); + return false; } +/* Parses a year of at most MAX_DIGITS from the beginning of I, + storing a "4-digit" year into *YEAR. */ static bool -parse_year (struct data_in *i, long *year) +parse_year (struct data_in *i, long *year, size_t max_digits) { - if (!parse_int (i, year)) + if (!parse_int (i, year, max_digits)) return false; - if (*year >= 0 && *year <= 199) - *year += 1900; + if (*year >= 0 && *year <= 99) + { + int epoch = get_epoch (); + int epoch_century = ROUND_DOWN (epoch, 100); + int epoch_offset = epoch - epoch_century; + if (*year >= epoch_offset) + *year += epoch_century; + else + *year += epoch_century + 100; + } if (*year >= 1582 || *year <= 19999) return true; - dls_error (i, _("Year (%ld) must be between 1582 and 19999."), *year); + data_warning (i, _("Year (%ld) must be between 1582 and 19999."), *year); return false; } +/* Returns true if input in I has been exhausted, + false otherwise. */ static bool parse_trailer (struct data_in *i) { - skip_whitespace (i); - if (!have_char (i)) + if (ss_is_empty (i->input)) return true; - dls_error (i, _("Trailing garbage \"%.*s\" following date."), - (int) (i->e - i->s), i->s); + data_warning (i, _("Trailing garbage \"%.*s\" following date."), + (int) ss_length (i->input), ss_data (i->input)); return false; } +/* Parses a 3-digit Julian day-of-year value from I into *YDAY. + Returns true if successful, false on failure. */ static bool -parse_julian (struct data_in *i, long *julian) +parse_yday (struct data_in *i, long *yday) { - if (!parse_int (i, julian)) - return false; - - { - int day = *julian % 1000; + struct substring num_s; + long num; - if (day < 1 || day > 366) - { - dls_error (i, _("Julian day (%d) must be between 1 and 366."), day); - return false; - } - } - - { - int year = *julian / 1000; - - if (year >= 0 && year <= 199) - *julian += 1900000L; - else if (year < 1582 || year > 19999) - { - dls_error (i, _("Year (%d) must be between 1582 and 19999."), year); - return false; - } - } - - return true; -} - -static bool -parse_quarter (struct data_in *i, long *quarter) -{ - if (!parse_int (i, quarter)) - return false; - if (*quarter >= 1 && *quarter <= 4) - return true; - - dls_error (i, _("Quarter (%ld) must be between 1 and 4."), *quarter); - return false; -} - -static bool -parse_q_delimiter (struct data_in *i) -{ - skip_whitespace (i); - if (!have_char (i) || tolower ((unsigned char) *i->s) != 'q') + ss_get_chars (&i->input, 3, &num_s); + if (ss_span (num_s, ss_cstr (CC_DIGITS)) != 3) { - dls_error (i, _("`Q' expected between quarter and year.")); + data_warning (i, _("Julian day must have exactly three digits.")); return false; } - i->s++; - skip_whitespace (i); - return true; -} - -static bool -parse_week (struct data_in *i, long *week) -{ - if (!parse_int (i, week)) - return false; - if (*week >= 1 && *week <= 53) - return true; - - dls_error (i, _("Week (%ld) must be between 1 and 53."), *week); - return false; -} - -static bool -parse_wk_delimiter (struct data_in *i) -{ - skip_whitespace (i); - if (i->s + 1 >= i->e - || tolower ((unsigned char) i->s[0]) != 'w' - || tolower ((unsigned char) i->s[1]) != 'k') + else if (!ss_get_long (&num_s, &num) || num < 1 || num > 366) { - dls_error (i, _("`WK' expected between week and year.")); + data_warning (i, _("Julian day (%ld) must be between 1 and 366."), num); return false; } - i->s += 2; - skip_whitespace (i); + + *yday = num; return true; } +/* Parses a quarter-of-year integer between 1 and 4 from I. + Stores the corresponding month into *MONTH. + Returns true if successful, false if no quarter was present. */ static bool -parse_time_delimiter (struct data_in *i) +parse_quarter (struct data_in *i, long int *month) { - bool delim = false; - - while (have_char (i) && (*i->s == ':' || *i->s == '.' - || isspace ((unsigned char) *i->s))) + long quarter; + + if (!parse_int (i, &quarter, SIZE_MAX)) + return false; + if (quarter >= 1 && quarter <= 4) { - delim = true; - i->s++; + *month = (quarter - 1) * 3 + 1; + return true; } - if (delim) - return true; - - dls_error (i, _("Delimiter expected between fields in time.")); + data_warning (i, _("Quarter (%ld) must be between 1 and 4."), quarter); return false; } +/* Parses a week-of-year integer between 1 and 53 from I, + Stores the corresponding year-of-day into *YDAY. + Returns true if successful, false if no week was present. */ static bool -parse_hour (struct data_in *i, long *hour) +parse_week (struct data_in *i, long int *yday) { - if (!parse_int (i, hour)) - return false; - if (*hour >= 0) - return true; + long week; - dls_error (i, _("Hour (%ld) must be positive."), *hour); + if (!parse_int (i, &week, SIZE_MAX)) + return false; + if (week >= 1 && week <= 53) + { + *yday = (week - 1) * 7 + 1; + return true; + } + + data_warning (i, _("Week (%ld) must be between 1 and 53."), week); return false; } +/* Parses a time delimiter from the beginning of I. + Returns true if successful, false if no delimiter was + present. */ static bool -parse_minute (struct data_in *i, long *minute) +parse_time_delimiter (struct data_in *i) { - if (!parse_int (i, minute)) - return false; - if (*minute >= 0 && *minute <= 59) + if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) > 0) return true; - dls_error (i, _("Minute (%ld) must be between 0 and 59."), *minute); + data_warning (i, _("Delimiter expected between fields in time.")); return false; } +/* Parses minutes and optional seconds from the beginning of I. + The time is converted into seconds, which are added to + *TIME. + Returns true if successful, false if an error was found. */ static bool -parse_opt_second (struct data_in *i, double *second) +parse_minute_second (struct data_in *i, double *time) { - bool delim = false; - + long minute; char buf[64]; char *cp; - while (have_char (i) - && (*i->s == ':' || *i->s == '.' || isspace ((unsigned char) *i->s))) - { - delim = true; - i->s++; - } - - if (!delim || !isdigit ((unsigned char) *i->s)) + /* Parse minutes. */ + if (!parse_int (i, &minute, SIZE_MAX)) + return false; + if (minute < 0 || minute > 59) { - *second = 0.0; - return true; + data_warning (i, _("Minute (%ld) must be between 0 and 59."), minute); + return false; } + *time += 60. * minute; + /* Check for seconds. */ + if (ss_ltrim (&i->input, ss_cstr (":" CC_SPACES)) == 0 + || !c_isdigit (ss_first (i->input))) + return true; + + /* Parse seconds. */ cp = buf; - while (have_char (i) && isdigit ((unsigned char) *i->s)) - *cp++ = *i->s++; - if (have_char (i) && *i->s == '.') - *cp++ = *i->s++; - while (have_char (i) && isdigit ((unsigned char) *i->s)) - *cp++ = *i->s++; + while (c_isdigit (ss_first (i->input))) + *cp++ = ss_get_char (&i->input); + if (ss_match_char (&i->input, fmt_decimal_char (FMT_F))) + *cp++ = '.'; + while (c_isdigit (ss_first (i->input))) + *cp++ = ss_get_char (&i->input); *cp = '\0'; - *second = strtod (buf, NULL); + *time += strtod (buf, NULL); return true; } -static bool -parse_hour24 (struct data_in *i, long *hour24) -{ - if (!parse_int (i, hour24)) - return false; - if (*hour24 >= 0 && *hour24 <= 23) - return true; - - dls_error (i, _("Hour (%ld) must be between 0 and 23."), *hour24); - return false; -} - - +/* Parses a weekday name from the beginning of I, + storing a value of 1=Sunday...7=Saturday into *WEEKDAY. + Returns true if successful, false if an error was found. */ static bool parse_weekday (struct data_in *i, long *weekday) { - static const struct enum_name weekday_names[] = + static const char *weekday_names[] = { - {"sunday", true, 1}, - {"su", true, 1}, - {"monday", true, 2}, - {"mo", true, 2}, - {"tuesday", true, 3}, - {"tu", true, 3}, - {"wednesday", true, 4}, - {"we", true, 4}, - {"thursday", true, 5}, - {"th", true, 5}, - {"friday", true, 6}, - {"fr", true, 6}, - {"saturday", true, 7}, - {"sa", true, 7}, - - {NULL, false, 0}, + "su", "mo", "tu", "we", "th", "fr", "sa", + NULL, }; - return parse_enum (i, _("weekday"), weekday_names, weekday); -} - -static bool -parse_spaces (struct data_in *i) -{ - skip_whitespace (i); - return true; -} - -static bool -parse_sign (struct data_in *i, int *sign) -{ - if (!force_have_char (i)) - return false; - - switch (*i->s) - { - case '-': - i->s++; - *sign = -1; - break; - - case '+': - i->s++; - /* fall through */ - - default: - *sign = 1; - break; - } - - return true; + struct substring token = parse_name_token (i); + bool ok = match_name (ss_head (token, 2), weekday_names, weekday); + if (!ok) + data_warning (i, _("Unrecognized weekday name. At least the first two " + "letters of an English weekday name must be " + "specified.")); + return ok; } /* Date & time formats. */ +/* Helper function for passing to + calendar_gregorian_to_offset. */ static void calendar_error (void *i_, const char *format, ...) { @@ -1077,423 +1009,240 @@ calendar_error (void *i_, const char *format, ...) va_list args; va_start (args, format); - vdls_error (i, format, args); + vdata_warning (i, format, args); va_end (args); } +/* Parses WKDAY format. */ static bool -ymd_to_ofs (struct data_in *i, int year, int month, int day, double *ofs) +parse_WKDAY (struct data_in *i) { - *ofs = calendar_gregorian_to_offset (year, month, day, calendar_error, i); - return *ofs != SYSMIS; -} + long weekday; -static bool -ymd_to_date (struct data_in *i, int year, int month, int day, double *date) -{ - if (ymd_to_ofs (i, year, month, day, date)) - { - *date *= 60. * 60. * 24.; - return true; - } - else - return false; -} + if (trim_spaces_and_check_missing (i)) + return true; -static bool -parse_DATE (struct data_in *i) -{ - long day, month, year; - - return (parse_leader (i) - && parse_day (i, &day) - && parse_date_delimiter (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); -} + if (!parse_weekday (i, &weekday) + || !parse_trailer (i)) + return false; -static bool -parse_ADATE (struct data_in *i) -{ - long month, day, year; - - return (parse_leader (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_day (i, &day) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); + i->output->f = weekday; + return true; } +/* Parses MONTH format. */ static bool -parse_EDATE (struct data_in *i) +parse_MONTH (struct data_in *i) { - long month, day, year; - - return (parse_leader (i) - && parse_day (i, &day) - && parse_date_delimiter (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); -} + long month; -static bool -parse_SDATE (struct data_in *i) -{ - long month, day, year; - - return (parse_leader (i) - && parse_year (i, &year) - && parse_date_delimiter (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_day (i, &day) - && parse_trailer (i) - && ymd_to_date (i, year, month, day, &i->v->f)); -} + if (trim_spaces_and_check_missing (i)) + return true; -static bool -parse_JDATE (struct data_in *i) -{ - long julian; - double ofs; - - if (!parse_leader (i) - || !parse_julian (i, &julian) - || !parse_trailer (i) - || !ymd_to_ofs (i, julian / 1000, 1, 1, &ofs)) + if (!parse_month (i, &month) + || !parse_trailer (i)) return false; - i->v->f = (ofs + julian % 1000 - 1) * 60. * 60. * 24.; + i->output->f = month; return true; } +/* Parses DATE, ADATE, EDATE, JDATE, SDATE, QYR, MOYR, KWYR, + DATETIME, TIME and DTIME formats. */ static bool -parse_QYR (struct data_in *i) +parse_date (struct data_in *i) { - long quarter, year; - - return (parse_leader (i) - && parse_quarter (i, &quarter) - && parse_q_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, (quarter - 1) * 3 + 1, 1, &i->v->f)); -} + long int year = INT_MIN; + long int month = 1; + long int day = 1; + long int yday = 1; + double time = 0, date = 0; + enum time_sign time_sign = SIGN_NO_TIME; -static bool -parse_MOYR (struct data_in *i) -{ - long month, year; - - return (parse_leader (i) - && parse_month (i, &month) - && parse_date_delimiter (i) - && parse_year (i, &year) - && parse_trailer (i) - && ymd_to_date (i, year, month, 1, &i->v->f)); -} + const char *template = fmt_date_template (i->format); + size_t template_width = strlen (template); -static bool -parse_WKYR (struct data_in *i) -{ - long week, year; - double ofs; - - if (!parse_leader (i) - || !parse_week (i, &week) - || !parse_wk_delimiter (i) - || !parse_year (i, &year) - || !parse_trailer (i)) - return false; + if (trim_spaces_and_check_missing (i)) + return true; - if (year != 1582) + while (*template != '\0') { - if (!ymd_to_ofs (i, year, 1, 1, &ofs)) + unsigned char ch = *template; + int count = 1; + bool ok; + + while (template[count] == ch) + count++; + template += count; + + ok = true; + switch (ch) + { + case 'd': + ok = count < 3 ? parse_day (i, &day) : parse_yday (i, &yday); + break; + case 'm': + ok = parse_month (i, &month); + break; + case 'y': + { + size_t max_digits; + if (!c_isalpha (*template)) + max_digits = SIZE_MAX; + else + { + if (ss_length (i->input) >= template_width + 2) + max_digits = 4; + else + max_digits = 2; + } + ok = parse_year (i, &year, max_digits); + } + break; + case 'q': + ok = parse_quarter (i, &month); + break; + case 'w': + ok = parse_week (i, &yday); + break; + case 'D': + ok = parse_time_units (i, 60. * 60. * 24., &time_sign, &time); + break; + case 'H': + ok = parse_time_units (i, 60. * 60., &time_sign, &time); + break; + case 'M': + ok = parse_minute_second (i, &time); + break; + case '-': + case '/': + case '.': + case 'X': + ok = parse_date_delimiter (i); + break; + case ':': + ok = parse_time_delimiter (i); + case ' ': + parse_spaces (i); + break; + default: + assert (count == 1); + if (!ss_match_char (&i->input, c_toupper (ch)) + && !ss_match_char (&i->input, c_tolower (ch))) + { + data_warning (i, _("`%c' expected in date field."), ch); + return false; + } + break; + } + if (!ok) return false; } - else + if (!parse_trailer (i)) + return false; + + if (year != INT_MIN) { - if (ymd_to_ofs (i, 1583, 1, 1, &ofs)) + double ofs = calendar_gregorian_to_offset (year, month, day, + calendar_error, i); + if (ofs == SYSMIS) return false; - ofs -= 365; + date = (yday - 1 + ofs) * 60. * 60. * 24.; } + else + date = 0.; + i->output->f = date + (time_sign == SIGN_NEGATIVE ? -time : time); - i->v->f = (ofs + (week - 1) * 7) * 60. * 60. * 24.; - return true; -} - -static bool -parse_TIME (struct data_in *i) -{ - int sign; - double second; - long hour, minute; - - if (!parse_leader (i) - || !parse_sign (i, &sign) - || !parse_spaces (i) - || !parse_hour (i, &hour) - || !parse_time_delimiter (i) - || !parse_minute (i, &minute) - || !parse_opt_second (i, &second)) - return false; - - i->v->f = (hour * 60. * 60. + minute * 60. + second) * sign; return true; } + +/* Utility functions. */ -static bool -parse_DTIME (struct data_in *i) +/* Outputs FORMAT with the given ARGS as a warning for input + I. */ +static void +vdata_warning (const struct data_in *i, const char *format, va_list args) { - int sign; - long day_count, hour; - double second; - long minute; - - if (!parse_leader (i) - || !parse_sign (i, &sign) - || !parse_spaces (i) - || !parse_day_count (i, &day_count) - || !parse_time_delimiter (i) - || !parse_hour (i, &hour) - || !parse_time_delimiter (i) - || !parse_minute (i, &minute) - || !parse_opt_second (i, &second)) - return false; - - i->v->f = (day_count * 60. * 60. * 24. - + hour * 60. * 60. - + minute * 60. - + second) * sign; - return true; -} + struct msg m; + struct string text; -static bool -parse_DATETIME (struct data_in *i) -{ - long day, month, year; - long hour24; - double second; - long minute; + ds_init_empty (&text); + ds_put_char (&text, '('); + if (i->first_column != 0) + { + if (i->first_column == i->last_column) + ds_put_format (&text, _("column %d"), i->first_column); + else + ds_put_format (&text, _("columns %d-%d"), + i->first_column, i->last_column); + ds_put_cstr (&text, ", "); + } + ds_put_format (&text, _("%s field) "), fmt_name (i->format)); + ds_put_vformat (&text, format, args); - if (!parse_leader (i) - || !parse_day (i, &day) - || !parse_date_delimiter (i) - || !parse_month (i, &month) - || !parse_date_delimiter (i) - || !parse_year (i, &year) - || !parse_time_delimiter (i) - || !parse_hour24 (i, &hour24) - || !parse_time_delimiter (i) - || !parse_minute (i, &minute) - || !parse_opt_second (i, &second) - || !ymd_to_date (i, year, month, day, &i->v->f)) - return false; + m.category = MSG_DATA; + m.severity = MSG_WARNING; + m.text = ds_cstr (&text); - i->v->f += hour24 * 60. * 60. + minute * 60. + second; - return true; + msg_emit (&m); } -static bool -parse_WKDAY (struct data_in *i) +/* Outputs FORMAT with the given ARGS as a warning for input + I. */ +static void +data_warning (const struct data_in *i, const char *format, ...) { - long weekday; - - if (!parse_leader (i) - || !parse_weekday (i, &weekday) - || !parse_trailer (i)) - return false; + va_list args; - i->v->f = weekday; - return true; + va_start (args, format); + vdata_warning (i, format, args); + va_end (args); } -static bool -parse_MONTH (struct data_in *i) +/* Apply implied decimal places to output. */ +static void +apply_implied_decimals (struct data_in *i) { - long month; - - if (!parse_leader (i) - || !parse_month (i, &month) - || !parse_trailer (i)) - return false; - - i->v->f = month; - return true; + if (i->implied_decimals > 0) + i->output->f /= pow (10., i->implied_decimals); } - -/* Main dispatcher. */ +/* Sets the default result for I. + For a numeric format, this is the value set on SET BLANKS + (typically system-missing); for a string format, it is all + spaces. */ static void default_result (struct data_in *i) { - /* Default to SYSMIS or blanks. */ - if (fmt_is_string (i->format.type)) - memset (i->v->s, ' ', i->format.w); + if (fmt_is_string (i->format)) + memset (i->output->s, ' ', i->width); else - i->v->f = get_blanks(); + i->output->f = get_blanks (); } -bool -data_in (struct data_in *i) +/* Trims leading and trailing spaces from I. + If the result is empty, or a single period character, then + sets the default result and returns true; otherwise, returns + false. */ +static bool +trim_spaces_and_check_missing (struct data_in *i) { - bool success; - - assert (fmt_check_input (&i->format)); - - /* Check that we've got a string to work with. */ - if (i->e == i->s || i->format.w <= 0) + ss_trim (&i->input, ss_cstr (" ")); + if (ss_is_empty (i->input) || ss_equals (i->input, ss_cstr ("."))) { default_result (i); return true; } - - i->f2 = i->f1 + (i->e - i->s) - 1; - - /* Make sure that the string isn't too long. */ - if (i->format.w > fmt_max_input_width (i->format.type)) - { - dls_error (i, _("Field too long (%d characters). Truncated after " - "character %d."), - i->format.w, fmt_max_input_width (i->format.type)); - i->format.w = fmt_max_input_width (i->format.type); - } - - if (!(fmt_get_category (i->format.type) - & (FMT_CAT_STRING | FMT_CAT_BINARY | FMT_CAT_HEXADECIMAL))) - { - const char *cp; - - cp = i->s; - for (;;) - { - if (!isspace ((unsigned char) *cp)) - break; - - if (++cp == i->e) - { - i->v->f = get_blanks(); - return true; - } - } - } - - - switch (i->format.type) - { - case FMT_F: - case FMT_COMMA: - case FMT_DOT: - case FMT_DOLLAR: - case FMT_PCT: - case FMT_E: - success = parse_numeric (i); - break; - case FMT_CCA: - case FMT_CCB: - case FMT_CCC: - case FMT_CCD: - case FMT_CCE: - NOT_REACHED (); - case FMT_N: - success = parse_N (i); - break; - case FMT_Z: - success = parse_Z (i); - break; - case FMT_P: - success = parse_P (i); - break; - case FMT_PK: - success = parse_PK (i); - break; - case FMT_IB: - success = parse_IB (i); - break; - case FMT_PIB: - success = parse_PIB (i); - break; - case FMT_PIBHEX: - success = parse_PIBHEX (i); - break; - case FMT_RB: - success = parse_RB (i); - break; - case FMT_RBHEX: - success = parse_RBHEX (i); - break; - case FMT_DATE: - success = parse_DATE (i); - break; - case FMT_ADATE: - success = parse_ADATE (i); - break; - case FMT_EDATE: - success = parse_EDATE (i); - break; - case FMT_JDATE: - success = parse_JDATE (i); - break; - case FMT_SDATE: - success = parse_SDATE (i); - break; - case FMT_QYR: - success = parse_QYR (i); - break; - case FMT_MOYR: - success = parse_MOYR (i); - break; - case FMT_WKYR: - success = parse_WKYR (i); - break; - case FMT_DATETIME: - success = parse_DATETIME (i); - break; - case FMT_TIME: - success = parse_TIME (i); - break; - case FMT_DTIME: - success = parse_DTIME (i); - break; - case FMT_WKDAY: - success = parse_WKDAY (i); - break; - case FMT_MONTH: - success = parse_MONTH (i); - break; - case FMT_A: - success = parse_A (i); - break; - case FMT_AHEX: - success = parse_AHEX (i); - break; - default: - NOT_REACHED (); - } - if (!success) - default_result (i); - - return success; + return false; } - -/* Utility function. */ -/* Sets DI->{s,e} appropriately given that LINE has length LEN and the - field starts at one-based column FC and ends at one-based column - LC, inclusive. */ -void -data_in_finite_line (struct data_in *di, const char *line, size_t len, - int fc, int lc) +/* Returns the integer value of hex digit C. */ +static int +hexit_value (int c) { - di->s = line + ((size_t) fc <= len ? fc - 1 : len); - di->e = line + ((size_t) lc <= len ? lc : len); + const char s[] = "0123456789abcdef"; + const char *cp = strchr (s, c_tolower ((unsigned char) c)); + + assert (cp != NULL); + return cp - s; } diff --git a/src/data/data-in.h b/src/data/data-in.h index 287b2fbe..6799a60d 100644 --- a/src/data/data-in.h +++ b/src/data/data-in.h @@ -1,5 +1,5 @@ /* PSPP - computes sample statistics. - Copyright (C) 1997-9, 2000 Free Software Foundation, Inc. + Copyright (C) 1997-9, 2000, 2006 Free Software Foundation, Inc. Written by Ben Pfaff . This program is free software; you can redistribute it and/or @@ -22,31 +22,19 @@ #include #include +#include +#include +#include #include "format.h" -/* Flags. */ -enum - { - DI_IGNORE_ERROR = 01, /* Don't report errors to the user. */ - DI_IMPLIED_DECIMALS = 02 /* Insert decimals if no '.' in input. */ - }; +enum integer_format data_in_get_integer_format (void); +void data_in_set_integer_format (enum integer_format); -/* Information about parsing one data field. */ -struct data_in - { - const char *s; /* Source start. */ - const char *e; /* Source end. */ +enum float_format data_in_get_float_format (void); +void data_in_set_float_format (enum float_format); - union value *v; /* Destination. */ - - int flags; /* Zero or more of DI_*. */ - int f1, f2; /* Columns the field was taken from. */ - struct fmt_spec format; /* Format specification to use. */ - }; - -bool data_in (struct data_in *); - -void data_in_finite_line (struct data_in *di, const char *line, size_t len, - int fc, int lc); +bool data_in (struct substring input, + enum fmt_type, int implied_decimals, int first_column, + union value *output, int width); #endif /* data-in.h */ diff --git a/src/data/data-out.c b/src/data/data-out.c index 6622c7d4..8b94da03 100644 --- a/src/data/data-out.c +++ b/src/data/data-out.c @@ -314,7 +314,6 @@ output_date (const union value *input, const struct fmt_spec *format, char *output) { double number = input->f; - double magnitude = fabs (number); int year, month, day, yday; const char *template = fmt_date_template (format->type); @@ -334,6 +333,7 @@ output_date (const union value *input, const struct fmt_spec *format, goto missing; calendar_offset_to_gregorian (number / 60. / 60. / 24., &year, &month, &day, &yday); + number = fmod (number, 60. * 60. * 24.); } else year = month = day = yday = 0; @@ -392,33 +392,33 @@ output_date (const union value *input, const struct fmt_spec *format, p += sprintf (p, "%2d", (yday - 1) / 7 + 1); break; case 'D': - if (number < 0) + if (number < 0) *p++ = '-'; - p += sprintf (p, "%.0f", floor (magnitude / 60. / 60. / 24.)); + number = fabs (number); + p += sprintf (p, "%*.0f", count, floor (number / 60. / 60. / 24.)); + number = fmod (number, 60. * 60. * 24.); break; - case 'h': + case 'H': if (number < 0) *p++ = '-'; - p += sprintf (p, "%.0f", floor (magnitude / 60. / 60.)); - break; - case 'H': - p += sprintf (p, "%02d", - (int) fmod (floor (magnitude / 60. / 60.), 24.)); + number = fabs (number); + p += sprintf (p, "%0*.0f", count, floor (number / 60. / 60.)); + number = fmod (number, 60. * 60.); break; case 'M': - p += sprintf (p, "%02d", - (int) fmod (floor (magnitude / 60.), 60.)); + p += sprintf (p, "%02d", (int) floor (number / 60.)); + number = fmod (number, 60.); excess_width = format->w - (p - tmp); if (excess_width < 0) goto overflow; if (excess_width == 3 || excess_width == 4 || (excess_width >= 5 && format->d == 0)) - p += sprintf (p, ":%02d", (int) fmod (magnitude, 60.)); + p += sprintf (p, ":%02d", (int) number); else if (excess_width >= 5) { int d = MIN (format->d, excess_width - 4); int w = d + 3; - sprintf (p, ":%0*.*f", w, d, fmod (magnitude, 60.)); + sprintf (p, ":%0*.*f", w, d, number); if (fmt_decimal_char (FMT_F) != '.') { char *cp = strchr (p, '.'); @@ -428,6 +428,9 @@ output_date (const union value *input, const struct fmt_spec *format, p += strlen (p); } break; + case 'X': + *p++ = ' '; + break; default: assert (count == 1); *p++ = ch; @@ -658,9 +661,7 @@ output_scientific (double number, const struct fmt_spec *format, decimal point without any digits following; that's what the # flag does in the call to sprintf, below.) */ fraction_width = MIN (MIN (format->d + 1, format->w - width), 16); - if (format->type != FMT_E - && (fraction_width == 1 - || format->w - width + (style->grouping == 0 && number < 0) <= 2)) + if (format->type != FMT_E && fraction_width == 1) fraction_width = 0; width += fraction_width; diff --git a/src/data/format.c b/src/data/format.c index e236aa31..2a0092fc 100644 --- a/src/data/format.c +++ b/src/data/format.c @@ -565,13 +565,13 @@ fmt_date_template (enum fmt_type type) case FMT_QYR: return "q Q yy"; case FMT_MOYR: - return "mmm yy"; + return "mmmXyy"; case FMT_WKYR: return "ww WK yy"; case FMT_DATETIME: return "dd-mmm-yyyy HH:MM"; case FMT_TIME: - return "h:MM"; + return "H:MM"; case FMT_DTIME: return "D HH:MM"; default: diff --git a/src/language/data-io/ChangeLog b/src/language/data-io/ChangeLog index 099c4c5e..b5f8545f 100644 --- a/src/language/data-io/ChangeLog +++ b/src/language/data-io/ChangeLog @@ -1,3 +1,12 @@ +Sun Nov 19 09:17:45 2006 Ben Pfaff + + * data-list.c (parse_free): Follow documented (but odd) rule that + N format is treated as F format for free-field input. + + * data-reader.c (read_file_record): Drop new-line character from + input text lines. This is symmetrical with the recently changed + dfm_put_record semantics. + Thu Nov 2 20:56:03 2006 Ben Pfaff Implement SKIP keyword on DATA LIST. Fixes bug #17099. diff --git a/src/language/data-io/data-list.c b/src/language/data-io/data-list.c index 42837c15..1d55d8b2 100644 --- a/src/language/data-io/data-list.c +++ b/src/language/data-io/data-list.c @@ -469,6 +469,12 @@ parse_free (struct lexer *lexer, struct dictionary *dict, struct pool *tmp_pool, || !fmt_check_input (&input) || !lex_force_match (lexer, ')')) return NULL; + + /* As a special case, N format is treated as F format + for free-field input. */ + if (input.type == FMT_N) + input.type = FMT_F; + output = fmt_for_output_from_input (&input); } else @@ -672,19 +678,9 @@ read_from_data_list_fixed (const struct data_list_pgm *dls, struct ccase *c) line = dfm_get_record (dls->reader); ll_for_each_continue (spec, struct dls_var_spec, ll, &dls->specs) - { - struct data_in di; - - data_in_finite_line (&di, ss_data (line), ss_length (line), - spec->first_column, - spec->first_column + spec->input.w - 1); - di.v = case_data_rw (c, spec->fv); - di.flags = DI_IMPLIED_DECIMALS; - di.f1 = spec->first_column; - di.format = spec->input; - - data_in (&di); - } + data_in (ss_substr (line, spec->first_column - 1, spec->input.w), + spec->input.type, spec->input.d, spec->first_column, + case_data_rw (c, spec->fv), fmt_var_width (&spec->input)); dfm_forward_record (dls->reader); } @@ -703,7 +699,6 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; - struct data_in di; /* Cut out a field and read in a new record if necessary. */ while (!cut_field (dls, &field)) @@ -719,13 +714,9 @@ read_from_data_list_free (const struct data_list_pgm *dls, struct ccase *c) } } - di.s = ss_data (field); - di.e = ss_end (field); - di.v = case_data_rw (c, spec->fv); - di.flags = 0; - di.f1 = dfm_get_column (dls->reader, ss_data (field)); - di.format = spec->input; - data_in (&di); + data_in (field, spec->input.type, 0, + dfm_get_column (dls->reader, ss_data (field)), + case_data_rw (c, spec->fv), fmt_var_width (&spec->input)); } return true; } @@ -744,7 +735,6 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) ll_for_each (spec, struct dls_var_spec, ll, &dls->specs) { struct substring field; - struct data_in di; if (!cut_field (dls, &field)) { @@ -764,13 +754,9 @@ read_from_data_list_list (const struct data_list_pgm *dls, struct ccase *c) break; } - di.s = ss_data (field); - di.e = ss_end (field); - di.v = case_data_rw (c, spec->fv); - di.flags = 0; - di.f1 = dfm_get_column (dls->reader, ss_data (field)); - di.format = spec->input; - data_in (&di); + data_in (field, spec->input.type, 0, + dfm_get_column (dls->reader, ss_data (field)), + case_data_rw (c, spec->fv), fmt_var_width (&spec->input)); } dfm_forward_record (dls->reader); diff --git a/src/language/data-io/data-reader.c b/src/language/data-io/data-reader.c index 0bff8696..cd065ddb 100644 --- a/src/language/data-io/data-reader.c +++ b/src/language/data-io/data-reader.c @@ -210,6 +210,7 @@ read_file_record (struct dfm_reader *r) fh_get_name (r->fh), strerror (errno)); return false; } + ds_chomp (&r->line, '\n'); } else if (fh_get_mode (r->fh) == FH_MODE_BINARY) { diff --git a/src/language/data-io/matrix-data.c b/src/language/data-io/matrix-data.c index a1e5d218..6283279c 100644 --- a/src/language/data-io/matrix-data.c +++ b/src/language/data-io/matrix-data.c @@ -865,17 +865,9 @@ static int s = ss_buffer (start, ss_data (p) - start); if (is_num) - { - struct data_in di; - - di.s = ss_data (s); - di.e = ss_end (s); - di.v = (union value *) &token->number; - di.f1 = dfm_get_column (reader, di.s); - di.format = fmt_for_output (FMT_F, token->length, 0); - - data_in (&di); - } + data_in (s, FMT_F, 0, + dfm_get_column (reader, ss_data (s)), + (union value *) &token->number, 0); else token->type = MSTR; } diff --git a/src/language/dictionary/missing-values.c b/src/language/dictionary/missing-values.c index f6836996..6735d0e5 100644 --- a/src/language/dictionary/missing-values.c +++ b/src/language/dictionary/missing-values.c @@ -84,7 +84,7 @@ cmd_missing_values (struct lexer *lexer, struct dataset *ds) double x, y; bool ok; - if (!parse_num_range (lexer, &x, &y, &v[0]->print)) + if (!parse_num_range (lexer, &x, &y, &v[0]->print.type)) goto done; ok = (x == y diff --git a/src/language/expressions/evaluate.c b/src/language/expressions/evaluate.c index b597fd4c..5395384a 100644 --- a/src/language/expressions/evaluate.c +++ b/src/language/expressions/evaluate.c @@ -90,8 +90,6 @@ expr_evaluate_num (struct expression *e, const struct ccase *c, int case_idx) return d; } - - void expr_evaluate_str (struct expression *e, const struct ccase *c, int case_idx, char *dst, size_t dst_size) diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index 6b4d5221..85e4d6c4 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -566,15 +566,8 @@ string function RTRIM (string s, string c) function NUMBER (string s, ni_format f) { - struct data_in di; union value out; - di.s = s.string; - di.v = &out; - di.flags = DI_IMPLIED_DECIMALS; - di.f1 = 1; - di.format = *f; - di.e = s.string + min (s.length, di.format.w); - data_in (&di); + data_in (ss_head (s, f->w), f->type, f->d, 0, &out, 0); return out.f; } diff --git a/src/language/lexer/ChangeLog b/src/language/lexer/ChangeLog index fd98ea9c..ebbf2544 100644 --- a/src/language/lexer/ChangeLog +++ b/src/language/lexer/ChangeLog @@ -1,3 +1,10 @@ +Sun Nov 19 09:20:42 2006 Ben Pfaff + + * range-parser.c (parse_num_range): Because data_in takes an enum + fmt_type now, not a struct fmt_spec, change the type of the + corresponding argument. Updated all callers. + (parse_number) Ditto. + Sun Nov 12 06:34:06 WST 2006 John Darrrington * format-parser.c format-parser.h lexer.c lexer.h q2c.c range-parser.c diff --git a/src/language/lexer/range-parser.c b/src/language/lexer/range-parser.c index ae25b631..d6d352b8 100644 --- a/src/language/lexer/range-parser.c +++ b/src/language/lexer/range-parser.c @@ -31,29 +31,30 @@ #define _(msgid) gettext (msgid) #define N_(msgid) msgid -static bool parse_number (struct lexer *, double *, const struct fmt_spec *); +static bool parse_number (struct lexer *, double *, const enum fmt_type *); /* Parses and stores a numeric value, or a range of the form "x THRU y". Open-ended ranges may be specified as "LO(WEST) THRU y" or "x THRU HI(GHEST)". Sets *X and *Y to the range or the value and returns success. - Numeric values are always accepted. If F is nonnull, then - string values are also accepted, and converted to numeric - values using the specified format. */ + Numeric values are always accepted. If FORMAT is nonnull, + then string values are also accepted, and converted to numeric + values using *FORMAT. */ bool -parse_num_range (struct lexer *lexer, double *x, double *y, const struct fmt_spec *f) +parse_num_range (struct lexer *lexer, + double *x, double *y, const enum fmt_type *format) { if (lex_match_id (lexer, "LO") || lex_match_id (lexer, "LOWEST")) *x = LOWEST; - else if (!parse_number (lexer, x, f)) + else if (!parse_number (lexer, x, format)) return false; if (lex_match_id (lexer, "THRU")) { if (lex_match_id (lexer, "HI") || lex_match_id (lexer, "HIGHEST")) *y = HIGHEST; - else if (!parse_number (lexer, y, f)) + else if (!parse_number (lexer, y, format)) return false; if (*y < *x) @@ -86,11 +87,11 @@ parse_num_range (struct lexer *lexer, double *x, double *y, const struct fmt_spe /* Parses a number and stores it in *X. Returns success. - Numeric values are always accepted. If F is nonnull, then - string values are also accepted, and converted to numeric - values using the specified format. */ + Numeric values are always accepted. If FORMAT is nonnull, + then string values are also accepted, and converted to numeric + values using *FORMAT. */ static bool -parse_number (struct lexer *lexer, double *x, const struct fmt_spec *f) +parse_number (struct lexer *lexer, double *x, const enum fmt_type *format) { if (lex_is_number (lexer)) { @@ -98,18 +99,10 @@ parse_number (struct lexer *lexer, double *x, const struct fmt_spec *f) lex_get (lexer); return true; } - else if (lex_token (lexer) == T_STRING && f != NULL) + else if (lex_token (lexer) == T_STRING && format != NULL) { - struct data_in di; union value v; - di.s = ds_data (lex_tokstr (lexer)); - di.e = ds_end (lex_tokstr (lexer)); - di.v = &v; - di.flags = 0; - di.f1 = 1; - di.f2 = ds_length (lex_tokstr (lexer)); - di.format = *f; - data_in (&di); + data_in (ds_ss (lex_tokstr (lexer)), *format, 0, 0, &v, 0); lex_get (lexer); *x = v.f; if (*x == SYSMIS) @@ -121,7 +114,7 @@ parse_number (struct lexer *lexer, double *x, const struct fmt_spec *f) } else { - if (f != NULL) + if (format != NULL) lex_error (lexer, _("expecting number or data string")); else lex_force_num (lexer); diff --git a/src/language/lexer/range-parser.h b/src/language/lexer/range-parser.h index 030a1927..986cd5f0 100644 --- a/src/language/lexer/range-parser.h +++ b/src/language/lexer/range-parser.h @@ -21,9 +21,10 @@ #define RANGE_PRS_H 1 #include +#include -struct fmt_spec; struct lexer; -bool parse_num_range (struct lexer *, double *, double *, const struct fmt_spec *fmt); +bool parse_num_range (struct lexer *, + double *x, double *y, const enum fmt_type *fmt); #endif /* range-prs.h */ diff --git a/src/language/utilities/ChangeLog b/src/language/utilities/ChangeLog index e05deb42..6e98f435 100644 --- a/src/language/utilities/ChangeLog +++ b/src/language/utilities/ChangeLog @@ -1,3 +1,12 @@ +Sun Nov 19 09:21:39 2006 Ben Pfaff + + * set.q: Add RIB, RRB settings to control binary formats used by + data_out. + (cmd_set) Implement SET RIB, RRB. + (show_rib) New function. + (show_rrb) New function. + (static var show_table[]) Add SHOW RIB, RRB. + Sat Nov 4 16:05:47 2006 Ben Pfaff * set.q: Add WIB, WRB settings to control binary formats used by diff --git a/src/language/utilities/set.q b/src/language/utilities/set.q index ed7bbbf7..8a79b194 100644 --- a/src/language/utilities/set.q +++ b/src/language/utilities/set.q @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -102,6 +103,8 @@ int tgetnum (const char *); printback=prtbck:on/off; prompt=string; results=res:on/off/terminal/listing/both/on/none/off; + rib=rib:msbfirst/lsbfirst/vax/native; + rrb=rrb:native/isl/isb/idl/idb/vf/vd/vg/zs/zl; safer=safe:on; scompression=scompress:on/off; scripttab=string "x==1" "one character long"; @@ -177,6 +180,10 @@ cmd_set (struct lexer *lexer, struct dataset *ds) set_mxwarns (cmd.n_mxwarns[0]); if (cmd.sbc_nulline) set_nulline (cmd.null == STC_ON); + if (cmd.sbc_rib) + data_in_set_integer_format (stc_to_integer_format (cmd.rib)); + if (cmd.sbc_rrb) + data_in_set_float_format (stc_to_float_format (cmd.rrb)); if (cmd.sbc_safer) set_safer_mode (); if (cmd.sbc_scompression) @@ -730,6 +737,18 @@ show_float_format (const char *setting, enum float_format float_format) float_format == FLOAT_NATIVE_DOUBLE ? "NATIVE" : "nonnative"); } +static void +show_rib (const struct dataset *ds UNUSED) +{ + show_integer_format ("RIB", data_in_get_integer_format ()); +} + +static void +show_rrb (const struct dataset *ds UNUSED) +{ + show_float_format ("RRB", data_in_get_float_format ()); +} + static void show_scompression (const struct dataset *ds UNUSED) { @@ -798,6 +817,8 @@ const struct show_sbc show_table[] = {"MXERRS", show_mxerrs}, {"MXLOOPS", show_mxloops}, {"MXWARNS", show_mxwarns}, + {"RIB", show_rib}, + {"RRB", show_rrb}, {"SCOMPRESSION", show_scompression}, {"UNDEFINED", show_undefined}, {"WEIGHT", show_weight}, diff --git a/src/language/xforms/recode.c b/src/language/xforms/recode.c index 7fc187a7..dfd8145e 100644 --- a/src/language/xforms/recode.c +++ b/src/language/xforms/recode.c @@ -598,17 +598,10 @@ find_src_string (struct recode_trns *trns, const char *value, int width) case MAP_CONVERT: { union value uv; - struct data_in di; - - di.s = value; - di.e = value + width; - di.v = &uv; - di.flags = DI_IGNORE_ERROR; - di.f1 = di.f2 = 0; - di.format.type = FMT_F; - di.format.w = width; - di.format.d = 0; - match = data_in (&di); + + msg_disable (); + match = data_in (ss_buffer (value, width), FMT_F, 0, 0, &uv, 0); + msg_enable (); out->value.f = uv.f; break; } diff --git a/src/libpspp/ChangeLog b/src/libpspp/ChangeLog index d3d1553c..e7801687 100644 --- a/src/libpspp/ChangeLog +++ b/src/libpspp/ChangeLog @@ -1,3 +1,10 @@ +Sun Nov 19 09:22:26 2006 Ben Pfaff + + * str.c (ss_get_long): New function. + (ss_compare_case) Ditto. + (ss_equals) Ditto. + (ss_equals_case) Ditto. + Tue Oct 31 19:28:19 2006 Ben Pfaff * str.h: [!HAVE_STRCHR] Drop compatibility code, because now we diff --git a/src/libpspp/str.c b/src/libpspp/str.c index 965e3b30..5a554b7b 100644 --- a/src/libpspp/str.c +++ b/src/libpspp/str.c @@ -529,6 +529,36 @@ ss_get_chars (struct substring *ss, size_t cnt, struct substring *out) return cnt; } +/* Parses and removes an optionally signed decimal integer from + the beginning of SS. Returns 0 if an error occurred, + otherwise the number of characters removed from SS. Stores + the integer's value into *VALUE. */ +size_t +ss_get_long (struct substring *ss, long *value) +{ + char tmp[64]; + size_t length; + + length = ss_span (*ss, ss_cstr ("+-")); + length += ss_span (ss_substr (*ss, length, SIZE_MAX), ss_cstr (CC_DIGITS)); + if (length > 0 && length < sizeof tmp) + { + char *tail; + + memcpy (tmp, ss_data (*ss), length); + tmp[length] = '\0'; + + *value = strtol (tmp, &tail, 10); + if (tail - tmp == length) + { + ss_advance (ss, length); + return length; + } + } + *value = 0; + return 0; +} + /* Returns true if SS is empty (contains no characters), false otherwise. */ bool @@ -627,6 +657,34 @@ ss_compare (struct substring a, struct substring b) return retval; } +/* Compares A and B case-insensitively and returns a + strcmp()-type comparison result. */ +int +ss_compare_case (struct substring a, struct substring b) +{ + int retval = memcasecmp (a.string, b.string, MIN (a.length, b.length)); + if (retval == 0) + retval = a.length < b.length ? -1 : a.length > b.length; + return retval; +} + +/* Compares A and B and returns true if their contents are + identical, false otherwise. */ +int +ss_equals (struct substring a, struct substring b) +{ + return a.length == b.length && !memcmp (a.string, b.string, a.length); +} + +/* Compares A and B and returns true if their contents are + identical except possibly for case differences, false + otherwise. */ +int +ss_equals_case (struct substring a, struct substring b) +{ + return a.length == b.length && !memcasecmp (a.string, b.string, a.length); +} + /* Returns the position in SS that the character at P occupies. P must point within SS or one past its end. */ size_t diff --git a/src/libpspp/str.h b/src/libpspp/str.h index eaf12c3b..81cda125 100644 --- a/src/libpspp/str.h +++ b/src/libpspp/str.h @@ -111,6 +111,7 @@ bool ss_match_char (struct substring *, char); int ss_get_char (struct substring *); size_t ss_get_chars (struct substring *, size_t cnt, struct substring *); bool ss_get_until (struct substring *, char delimiter, struct substring *); +size_t ss_get_long (struct substring *, long *); /* Inspectors. */ bool ss_is_empty (struct substring); @@ -124,6 +125,9 @@ size_t ss_span (struct substring, struct substring skip_set); size_t ss_cspan (struct substring, struct substring stop_set); size_t ss_find_char (struct substring, char); int ss_compare (struct substring, struct substring); +int ss_compare_case (struct substring, struct substring); +int ss_equals (struct substring, struct substring); +int ss_equals_case (struct substring, struct substring); size_t ss_pointer_to_position (struct substring, const char *); char *ss_xstrdup (struct substring); diff --git a/src/ui/gui/helper.c b/src/ui/gui/helper.c index dea14d54..2b3c637a 100644 --- a/src/ui/gui/helper.c +++ b/src/ui/gui/helper.c @@ -30,7 +30,7 @@ gboolean text_to_value(const gchar *text, union value *v, struct fmt_spec format) { - struct data_in di; + bool ok; if ( format.type != FMT_A) { @@ -49,15 +49,12 @@ text_to_value(const gchar *text, union value *v, } } - di.s = text; - di.e = text + strlen(text); - di.v = v; - di.flags = DI_IGNORE_ERROR; - di.f1 = di.f2 = 0; - di.format = format; - - return data_in(&di); + msg_disable (); + ok = data_in (ss_cstr (text), format.type, 0, 0, + v, fmt_var_width (&format)); + msg_enable (); + return ok; } diff --git a/src/ui/gui/psppire-case-file.c b/src/ui/gui/psppire-case-file.c index 6b0f2111..7e16087e 100644 --- a/src/ui/gui/psppire-case-file.c +++ b/src/ui/gui/psppire-case-file.c @@ -18,7 +18,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ - +#include #include #include @@ -296,7 +296,7 @@ psppire_case_file_set_value(PsppireCaseFile *cf, gint casenum, gint idx, /* Set the IDXth value of case C using D_IN */ gboolean psppire_case_file_data_in(PsppireCaseFile *cf, gint casenum, gint idx, - struct data_in *d_in) + struct substring input, const struct fmt_spec *fmt) { struct ccase cc ; @@ -309,9 +309,8 @@ psppire_case_file_data_in(PsppireCaseFile *cf, gint casenum, gint idx, return FALSE; /* Cast away const in flagrant abuse of the casefile */ - d_in->v = (union value *) case_data(&cc, idx); - - if ( ! data_in(d_in) ) + if (!data_in (input, fmt->type, 0, 0, + (union value *) case_data(&cc, idx), fmt_var_width (fmt))) g_warning("Cant set value\n"); g_signal_emit(cf, signal[CASE_CHANGED], 0, casenum); diff --git a/src/ui/gui/psppire-case-file.h b/src/ui/gui/psppire-case-file.h index 7961b3cb..644bb0be 100644 --- a/src/ui/gui/psppire-case-file.h +++ b/src/ui/gui/psppire-case-file.h @@ -26,6 +26,8 @@ #include #include +#include + G_BEGIN_DECLS @@ -79,10 +81,11 @@ gint psppire_case_file_get_case_count(const PsppireCaseFile *cf); const union value * psppire_case_file_get_value(const PsppireCaseFile *cf, gint c, gint idx); -struct data_in; +struct fmt_spec; gboolean psppire_case_file_data_in(PsppireCaseFile *cf, gint c, gint idx, - struct data_in *d_in); + struct substring input, + const struct fmt_spec *); gboolean psppire_case_file_set_value(PsppireCaseFile *cf, gint casenum, gint idx, union value *v, gint width); diff --git a/src/ui/gui/psppire-data-store.c b/src/ui/gui/psppire-data-store.c index 0cb8ef91..0466a97b 100644 --- a/src/ui/gui/psppire-data-store.c +++ b/src/ui/gui/psppire-data-store.c @@ -569,20 +569,10 @@ psppire_data_store_set_string(GSheetModel *model, } #endif - { - const gint index = psppire_variable_get_fv(pv); - - struct data_in d_in; - d_in.s = text; - d_in.e = text + strlen(text); - d_in.v = 0; - d_in.f1 = d_in.f2 = 0; - d_in.format = * psppire_variable_get_write_spec(pv); - d_in.flags = 0; - - psppire_case_file_data_in(store->case_file, row, index, &d_in) ; - } - + psppire_case_file_data_in (store->case_file, row, + psppire_variable_get_fv (pv), ss_cstr (text), + psppire_variable_get_write_spec (pv)); + return TRUE; } diff --git a/tests/ChangeLog b/tests/ChangeLog index 071cd012..41d15f5a 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,38 @@ +Sun Nov 19 09:23:34 2006 Ben Pfaff + + * automake.mk: Add the new tests listed below. + + * tests/formats/bcd-in.sh: New test. + + * tests/formats/bcd-in.expected.cmp.gz: New support file for + bcd-in.sh. + + * tests/formats/date-in.sh: New test. + + * tests/formats/ib-in.sh: New test. + + * tests/formats/ib-in.expected.cmp.gz: New test. + + * tests/formats/legacy-in.sh: New test. + + * tests/formats/legacy-in.expected.cmp.gz: New support file for + legacy-in.sh. + + * tests/formats/month-in.sh: New test. + + * tests/formats/num-in.sh: New test. + + * tests/formats/num-in.expected.gz: New support file for num-in.sh. + + * tests/formats/time-in.sh: New test. + + * tests/formats/wkday-in.sh: New test. + + * tests/commands/no_case_size.sh: Update output to conform with + update scientific notation code. + + * tests/formats/num-out.expected.cmp.gz: Ditto. + Thu Nov 2 20:58:12 2006 Ben Pfaff * command/data-list.sh: Test newly implement SKIP keyword on DATA diff --git a/tests/automake.mk b/tests/automake.mk index ce528a24..9225053d 100644 --- a/tests/automake.mk +++ b/tests/automake.mk @@ -57,12 +57,20 @@ TESTS = \ tests/command/use.sh \ tests/command/very-long-strings.sh \ tests/command/weight.sh \ + tests/formats/bcd-in.sh \ tests/formats/binhex-out.sh \ + tests/formats/date-in.sh \ tests/formats/date-out.sh \ tests/formats/float-format.sh \ + tests/formats/ib-in.sh \ + tests/formats/legacy-in.sh \ + tests/formats/month-in.sh \ tests/formats/month-out.sh \ + tests/formats/num-in.sh \ tests/formats/num-out.sh \ + tests/formats/time-in.sh \ tests/formats/time-out.sh \ + tests/formats/wkday-in.sh \ tests/formats/wkday-out.sh \ tests/bugs/agg_crash.sh \ tests/bugs/agg-crash-2.sh \ diff --git a/tests/command/no_case_size.sh b/tests/command/no_case_size.sh index 3d6f2867..d9016eb9 100755 --- a/tests/command/no_case_size.sh +++ b/tests/command/no_case_size.sh @@ -92,12 +92,12 @@ diff -b -w pspp.list - < bcd-in.data + +activity="write pspp syntax" +cat > bcd-in.pspp <<'EOF' +SET ERRORS=NONE. +SET MXWARNS=10000000. +SET MXERRS=10000000. +FILE HANDLE data/NAME='bcd-in.data'/MODE=IMAGE/LRECL=2. +DATA LIST FILE=data/p 1-2 (P) pk 1-2 (PK). +COMPUTE x=$CASENUM - 1. +PRINT OUTFILE='bcd-in.out'/x (PIBHEX4) ' ' P PK. +EXECUTE. +EOF +if [ $? -ne 0 ] ; then no_result ; fi + +activity="run program" +$SUPERVISOR $PSPP --testing-mode bcd-in.pspp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="gunzip expected results" +gzip -cd < $top_srcdir/tests/formats/bcd-in.expected.cmp.gz > bcd-in.expected.cmp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="decompress expected results" +$PERL -pe "printf ' %04X ', $.-1" < bcd-in.expected.cmp > bcd-in.expected +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare output" +diff -u bcd-in.expected bcd-in.out +if [ $? -ne 0 ] ; then fail ; fi + +pass diff --git a/tests/formats/date-in.sh b/tests/formats/date-in.sh new file mode 100755 index 00000000..e5314a39 --- /dev/null +++ b/tests/formats/date-in.sh @@ -0,0 +1,2272 @@ +#! /bin/sh + +TEMPDIR=/tmp/pspp-tst-$$ +mkdir -p $TEMPDIR +trap 'cd /; rm -rf $TEMPDIR' 0 + +# ensure that top_builddir are absolute +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +top_builddir=`cd $top_builddir; pwd` +PSPP=$top_builddir/src/ui/terminal/pspp + +# ensure that top_srcdir is absolute +top_srcdir=`cd $top_srcdir; pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +fail() +{ + echo $activity + echo FAILED + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + exit 2; +} + +pass() +{ + exit 0; +} + +cd $TEMPDIR + +activity="write program to generate PSPP syntax and data" +cat > date-in.pl <<'EOF' +#! /usr/bin/perl + +use strict; +use warnings; + +our $next = 1; + +my @formats = (['date', 'd-m-y'], + ['adate', 'm-d-y'], + ['edate', 'd-m-y'], + ['jdate', 'j'], + ['sdate', 'y-m-d'], + ['qyr', 'qQy'], + ['moyr', 'm-y'], + ['wkyr', 'wWy'], + ['datetime', 'd-m-y +H:M', 'd-m-y +H:M:S']); + +my @dates = (#yyyy mm dd jjj HH MM SS + [1648, 6, 10, 162, 0, 0, 0], + [1680, 6, 30, 182, 4, 50, 38], + [1716, 7, 24, 206, 12, 31, 35], + [1768, 6, 19, 171, 12, 47, 53], + [1819, 8, 2, 214, 1, 26, 0], + [1839, 3, 27, 86, 20, 58, 11], + [1903, 4, 19, 109, 7, 36, 5], + [1929, 8, 25, 237, 15, 43, 49], + [1941, 9, 29, 272, 4, 25, 9], + [1943, 4, 19, 109, 6, 49, 27], + [1943, 10, 7, 280, 2, 57, 52], + [1992, 3, 17, 77, 16, 45, 44], + [1996, 2, 25, 56, 21, 30, 57], + [1941, 9, 29, 272, 4, 25, 9], + [1943, 4, 19, 109, 6, 49, 27], + [1943, 10, 7, 280, 2, 57, 52], + [1992, 3, 17, 77, 16, 45, 44], + [1996, 2, 25, 56, 21, 30, 57], + [2038, 11, 10, 314, 22, 30, 4], + [2094, 7, 18, 199, 1, 56, 51]); + +open (SYNTAX, '>', 'date-in.pspp') or die "date-in.pspp: create: $!\n"; +print SYNTAX "SET EPOCH 1930.\n"; +for my $format (@formats) { + my ($name) = @$format; + print SYNTAX "DATA LIST file='$name.data'/$name 1-40 ($name).\n"; + print SYNTAX "PRINT OUTFILE='$name.out'/$name (F16.2).\n"; + print SYNTAX "EXECUTE.\n"; +} +close (SYNTAX); + +for my $format (@formats) { + my ($fmt_name, @templates) = @$format; + my ($fn) = "$fmt_name.data"; + open (DATA, '>', $fn) or die "$fn: create: $!\n"; + select DATA; + for my $template (@templates) { + for my $date (@dates) { + print_date_with_template ($date, $template) for 1...10; + } + } + close (DATA); +} + +sub print_date_with_template { + my ($date, $template) = @_; + my ($year, $month, $day, $julian, $hour, $minute, $second) = @$date; + my ($quarter) = int (($month - 1) / 3) + 1; + my ($week) = int (($julian - 1) / 7) + 1; + my (@year_types) = ('full'); + push (@year_types, '2digit') if $year >= 1930 && $year < 2030; + for my $c (split ('', $template)) { + if ($c eq 'd') { + printf (+pick ('%d', '%02d'), $day); + } elsif ($c eq 'm') { + my ($type) = pick ('arabic', 'roman', 'abbrev', 'full'); + if ($type eq 'arabic') { + printf (+pick ('%d', '%02d'), $month); + } elsif ($type eq 'roman') { + my ($mmm) = ('i', 'ii', 'iii', + 'iv', 'v', 'vi', + 'vii', 'viii', 'ix', + 'x', 'xi', 'xii')[$month - 1]; + print_rand_case ($mmm); + } elsif ($type eq 'abbrev') { + my ($mmm) = qw (jan feb mar apr may jun + jul aug sep oct nov dec)[$month - 1]; + print_rand_case ($mmm); + } elsif ($type eq 'full') { + my ($mmm) = qw (january february march + april may june + july august september + october november december)[$month - 1]; + print_rand_case ($mmm); + } else { + die; + } + } elsif ($c eq 'y') { + my ($type) = pick (@year_types); + if ($type eq '2digit') { + printf (+pick ('%d', '%02d'), $year % 100); + } elsif ($type eq 'full') { + print $year; + } else { + die; + } + } elsif ($c eq 'j') { + my ($type) = pick (@year_types); + if ($type eq '2digit') { + printf ("%02d%03d", $year % 100, $julian); + } elsif ($type eq 'full') { + printf ("%04d%03d", $year, $julian); + } else { + die; + } + } elsif ($c eq 'q') { + print $quarter; + } elsif ($c eq 'w') { + print $week; + } elsif ($c eq 'H') { + printf (+pick ('%d', '%02d'), $hour); + } elsif ($c eq 'M') { + printf (+pick ('%d', '%02d'), $minute); + } elsif ($c eq 'S') { + printf (+pick ('%d', '%02d'), $second); + } elsif ($c eq '-') { + print +pick (' ', '-', '.', ',', '/'); + } elsif ($c eq ':') { + print +pick (' ', ':'); + } elsif ($c eq ' ') { + print ' '; + } elsif ($c eq 'Q') { + maybe_print_space (); + print_rand_case ('q'); + maybe_print_space (); + } elsif ($c eq 'W') { + maybe_print_space (); + print_rand_case ('wk'); + maybe_print_space (); + } elsif ($c eq '+') { + print +pick ('', '-', '+'); + } else { + die; + } + } + print "\n"; +} + +sub print_rand_case { + my ($s) = @_; + my ($case) = pick (qw (uc lc tc)); + if ($case eq 'uc') { + print uc ($s); + } elsif ($case eq 'lc') { + print lc ($s); + } elsif ($case eq 'tc') { + print ucfirst ($s); + } else { + die; + } +} + +sub maybe_print_space { + print +pick ('', ' '); +} + +sub pick { + return $_[int (my_rand ($#_ + 1))]; +} + +sub my_rand { + my ($modulo) = @_; + $next = ($next * 1103515245 + 12345) % (2**32); + return int ($next / 65536) % $modulo; +} +EOF +if [ $? -ne 0 ] ; then no_result ; fi + +activity="generate PSPP syntax and data" +$PERL date-in.pl +if [ $? -ne 0 ] ; then no_result ; fi + +activity="run program" +$SUPERVISOR $PSPP --testing-mode date-in.pspp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare adate.out output" +diff -u adate.out - < ib-in.data + +activity="write pspp syntax" +cat > ib-in.pspp <<'EOF' +SET RIB=MSBFIRST. +SET ERRORS=NONE. +SET MXWARNS=10000000. +SET MXERRS=10000000. +FILE HANDLE data/NAME='ib-in.data'/MODE=IMAGE/LRECL=2. +DATA LIST FILE=data/ib 1-2 (IB) pib 1-2 (PIB) pibhex 1-2 (PIBHEX). +COMPUTE x=$CASENUM - 1. +PRINT OUTFILE='ib-in.out'/x (PIBHEX4) ' ' ib pib pibhex. +EXECUTE. +EOF +if [ $? -ne 0 ] ; then no_result ; fi + +activity="run program" +$SUPERVISOR $PSPP --testing-mode ib-in.pspp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="gunzip expected results" +gzip -cd < $top_srcdir/tests/formats/ib-in.expected.cmp.gz > ib-in.expected.cmp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="decompress expected results" +$PERL -pe "printf ' %04X ', $.-1" < ib-in.expected.cmp > ib-in.expected +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare output" +diff -u ib-in.expected ib-in.out +if [ $? -ne 0 ] ; then fail ; fi + +pass diff --git a/tests/formats/legacy-in.expected.cmp.gz b/tests/formats/legacy-in.expected.cmp.gz new file mode 100644 index 00000000..952f9b58 Binary files /dev/null and b/tests/formats/legacy-in.expected.cmp.gz differ diff --git a/tests/formats/legacy-in.sh b/tests/formats/legacy-in.sh new file mode 100755 index 00000000..f678356a --- /dev/null +++ b/tests/formats/legacy-in.sh @@ -0,0 +1,84 @@ +#! /bin/sh + +TEMPDIR=/tmp/pspp-tst-$$ + +# ensure that top_builddir are absolute +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +top_builddir=`cd $top_builddir; pwd` +PSPP=$top_builddir/src/ui/terminal/pspp + +# ensure that top_srcdir is absolute +top_srcdir=`cd $top_srcdir; pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + + +cleanup() +{ + cd / + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="generate data input file" +$PERL -e 'print pack "n", $_ foreach 0...65535' > legacy-in.data + +activity="write pspp syntax" +cat > legacy-in.pspp <<'EOF' +SET ERRORS=NONE. +SET MXWARNS=10000000. +SET MXERRS=10000000. +FILE HANDLE data/NAME='legacy-in.data'/MODE=IMAGE/LRECL=2. +DATA LIST FILE=data/n 1-2 (N) z 1-2 (z). +COMPUTE x=$CASENUM - 1. +PRINT OUTFILE='legacy-in.out'/x (PIBHEX4) ' ' N Z. +EXECUTE. +EOF +if [ $? -ne 0 ] ; then no_result ; fi + +activity="run program" +$SUPERVISOR $PSPP --testing-mode legacy-in.pspp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="gunzip expected results" +gzip -cd < $top_srcdir/tests/formats/legacy-in.expected.cmp.gz > legacy-in.expected.cmp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="decompress expected results" +$PERL -pe "printf ' %04X ', $.-1" < legacy-in.expected.cmp > legacy-in.expected +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare output" +diff -u legacy-in.expected legacy-in.out +if [ $? -ne 0 ] ; then fail ; fi + +pass diff --git a/tests/formats/month-in.sh b/tests/formats/month-in.sh new file mode 100755 index 00000000..2d32a594 --- /dev/null +++ b/tests/formats/month-in.sh @@ -0,0 +1,162 @@ +#! /bin/sh + +TEMPDIR=/tmp/pspp-tst-$$ +mkdir -p $TEMPDIR +trap 'cd /; rm -rf $TEMPDIR' 0 + +# ensure that top_builddir are absolute +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +top_builddir=`cd $top_builddir; pwd` +PSPP=$top_builddir/src/ui/terminal/pspp + +# ensure that top_srcdir is absolute +top_srcdir=`cd $top_srcdir; pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +fail() +{ + echo $activity + echo FAILED + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + exit 2; +} + +pass() +{ + exit 0; +} + +cd $TEMPDIR + +activity="write pspp syntax" +cat > month-in.pspp < num-in.pl <<'EOF' +#! /usr/bin/perl + +use POSIX; +use strict; +use warnings; + +our $next = 0; + +for my $number (0, 1, .5, .015625, 123) { + my ($base_exp) = floor ($number ? log10 ($number) : 0); + for my $offset (-3...3) { + my ($exponent) = $base_exp + $offset; + my ($fraction) = $number / 10**$offset; + + permute_zeros ($fraction, $exponent); + } +} + +sub permute_zeros { + my ($fraction, $exponent) = @_; + + my ($frac_rep) = sprintf ("%f", $fraction); + my ($leading_zeros) = length (($frac_rep =~ /^(0*)/)[0]); + my ($trailing_zeros) = length (($frac_rep =~ /(\.?0*)$/)[0]); + for my $i (0...$leading_zeros) { + for my $j (0...$trailing_zeros) { + my ($trimmed) = substr ($frac_rep, $i, + length ($frac_rep) - $i - $j); + next if $trimmed eq '.' || $trimmed eq ''; + + permute_commas ($trimmed, $exponent); + } + } +} + +sub permute_commas { + my ($frac_rep, $exponent) = @_; + permute_dot_comma ($frac_rep, $exponent); + my ($pos) = int (my_rand (length ($frac_rep) + 1)); + $frac_rep = substr ($frac_rep, 0, $pos) . "," . substr ($frac_rep, $pos); + permute_dot_comma ($frac_rep, $exponent); +} + +sub permute_dot_comma { + my ($frac_rep, $exponent) = @_; + permute_exponent_syntax ($frac_rep, $exponent); + if ($frac_rep =~ /[,.]/) { + $frac_rep =~ tr/.,/,./; + permute_exponent_syntax ($frac_rep, $exponent); + } +} + +sub permute_exponent_syntax { + my ($frac_rep, $exponent) = @_; + my (@exp_reps); + if ($exponent == 0) { + @exp_reps = pick ('', 'e0', 'e-0', 'e+0', '-0', '+0'); + } elsif ($exponent > 0) { + @exp_reps = pick ("e$exponent", "e+$exponent", "+$exponent"); + } else { + my ($abs_exp) = -$exponent; + @exp_reps = pick ("e-$abs_exp", , "e-$abs_exp", "-$abs_exp"); + } + permute_sign_and_affix ($frac_rep, $_) foreach @exp_reps; +} + +sub permute_sign_and_affix { + my ($frac_rep, $exp_rep) = @_; + for my $prefix (pick ('', '$'), + pick ('-', '-$', '$-', '$-$'), + pick ('+', '+$', '$+', '$+$')) { + for my $suffix ('', '%') { + permute_spaces ("$prefix$frac_rep$exp_rep$suffix"); + } + } +} + +sub permute_spaces { + my ($s) = @_; + $s =~ s/([-+\$e%])/ $1 /g; + my (@fields) = split (' ', $s); + print join ('', @fields), "\n"; + + if ($#fields > 0) { + my ($pos) = int (my_rand ($#fields)) + 1; + print join ('', @fields[0...$pos - 1]); + print " "; + print join ('', @fields[$pos...$#fields]); + print "\n"; + } +} + +sub pick { + return $_[int (my_rand ($#_ + 1))]; +} + +sub my_rand { + my ($modulo) = @_; + $next = ($next * 1103515245 + 12345) % (2**32); + return int ($next / 65536) % $modulo; +} +EOF + +activity="generate data" +$PERL num-in.pl > num-in.data +if [ $? -ne 0 ] ; then no_result ; fi +echo -n . + +activity="generate pspp syntax" +cat > num-in.pspp < num-in.expected +if [ $? -ne 0 ] ; then no_result ; fi +echo -n . + +activity="compare output" +diff -u num-in.expected num-in.out +if [ $? -ne 0 ] ; then fail ; fi + +echo . + +pass diff --git a/tests/formats/num-out.expected.cmp.gz b/tests/formats/num-out.expected.cmp.gz index f1f55b15..21caf25a 100644 Binary files a/tests/formats/num-out.expected.cmp.gz and b/tests/formats/num-out.expected.cmp.gz differ diff --git a/tests/formats/time-in.sh b/tests/formats/time-in.sh new file mode 100755 index 00000000..da64ff85 --- /dev/null +++ b/tests/formats/time-in.sh @@ -0,0 +1,953 @@ +#! /bin/sh + +TEMPDIR=/tmp/pspp-tst-$$ +mkdir -p $TEMPDIR +trap 'cd /; rm -rf $TEMPDIR' 0 + +# ensure that top_builddir are absolute +if [ -z "$top_builddir" ] ; then top_builddir=. ; fi +if [ -z "$top_srcdir" ] ; then top_srcdir=. ; fi +top_builddir=`cd $top_builddir; pwd` +PSPP=$top_builddir/src/ui/terminal/pspp + +# ensure that top_srcdir is absolute +top_srcdir=`cd $top_srcdir; pwd` + +STAT_CONFIG_PATH=$top_srcdir/config +export STAT_CONFIG_PATH + +fail() +{ + echo $activity + echo FAILED + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + exit 2; +} + +pass() +{ + exit 0; +} + +cd $TEMPDIR + +activity="write program to generate PSPP syntax and data" +cat > time-in.pl <<'EOF' +#! /usr/bin/perl + +use strict; +use warnings; + +our $next = 1; + +my @formats = (["time", "+H:M", "+H:M:S"], + ["dtime", "+D H:M", "+D H:M:S"]); + +my @times = (# D HH MM SS + [ 0, 0, 0, 0.00], + [ 1, 4, 50, 38.68], + [ 5, 12, 31, 35.82], + [ 0, 12, 47, 53.41], + [ 3, 1, 26, 0.69], + [ 1, 20, 58, 11.19], + [ 12, 7, 36, 5.98], + [ 52, 15, 43, 49.27], + [ 7, 4, 25, 9.24], + [ 0, 6, 49, 27.89], + [ 20, 2, 57, 52.56], + [555, 16, 45, 44.12], + [120, 21, 30, 57.27], + [ 0, 4, 25, 9.98], + [ 3, 6, 49, 27.24], + [ 5, 2, 57, 52.13], + [ 0, 16, 45, 44.35], + [ 1, 21, 30, 57.32], + [ 10, 22, 30, 4.27], + [ 22, 1, 56, 51.18]); + +open (SYNTAX, '>', 'time-in.pspp') or die "time-in.pspp: create: $!\n"; +for my $format (@formats) { + my ($name) = @$format; + print SYNTAX "DATA LIST file='$name.data'/$name 1-40 ($name).\n"; + print SYNTAX "PRINT OUTFILE='$name.out'/$name (F16.2).\n"; + print SYNTAX "EXECUTE.\n"; +} +close (SYNTAX); + +for my $format (@formats) { + my ($fmt_name, @templates) = @$format; + my ($fn) = "$fmt_name.data"; + open (DATA, '>', $fn) or die "$fn: create: $!\n"; + select DATA; + for my $template (@templates) { + for my $time (@times) { + print_time_with_template ($time, $template) for 1...10; + } + } + close (DATA); +} + +sub print_time_with_template { + my ($time, $template) = @_; + my ($day, $hour, $minute, $second) = @$time; + for my $c (split ('', $template)) { + if ($c eq '+') { + print +pick ('', '-', '+'); + } elsif ($c eq 'D') { + printf (+pick ('%d', '%02d'), $day); + $day = 0; + } elsif ($c eq 'H') { + printf (+pick ('%d', '%02d'), $hour + 24 * $day); + } elsif ($c eq 'M') { + printf (+pick ('%d', '%02d'), $minute); + } elsif ($c eq 'S') { + printf (+pick ('%.0f', '%02.0f', '%.1f', '%.2f'), $second); + } elsif ($c eq ':') { + print +pick (' ', ':'); + } elsif ($c eq ' ') { + print ' '; + } else { + die; + } + } + print "\n"; +} + +sub pick { + return $_[int (my_rand ($#_ + 1))]; +} + +sub my_rand { + my ($modulo) = @_; + $next = ($next * 1103515245 + 12345) % (2**32); + return int ($next / 65536) % $modulo; +} +EOF +if [ $? -ne 0 ] ; then no_result ; fi + +activity="generate PSPP syntax and data" +$PERL time-in.pl +if [ $? -ne 0 ] ; then no_result ; fi + +activity="run program" +$SUPERVISOR $PSPP --testing-mode time-in.pspp +if [ $? -ne 0 ] ; then no_result ; fi + +activity="compare time.out output" +diff -u time.out - < wkday-in.pspp <