absorb_miss boolean function RANGE (x != SYSMIS, a[n*2])
{
- size_t i;
- int sysmis = 0;
-
- for (i = 0; i < n; i++)
+ bool found = false;
+ bool valid = false;
+ for (size_t i = 0; i < n; i++)
{
double w = a[2 * i];
double y = a[2 * i + 1];
if (w != SYSMIS && y != SYSMIS)
{
if (w <= x && x <= y)
- return 1.0;
+ found = true;
+ else if (w <= y)
+ valid = true;
+ else
+ return SYSMIS;
}
- else
- sysmis = 1;
}
- return sysmis ? SYSMIS : 0.;
+ return found ? true : valid ? false : SYSMIS;
}
boolean function RANGE (string x, string a[n*2])
{
- int i;
-
- for (i = 0; i < n; i++)
+ bool found = false;
+ for (size_t i = 0; i < n; i++)
{
struct substring *w = &a[2 * i];
struct substring *y = &a[2 * i + 1];
if (compare_string_3way (w, &x) <= 0 && compare_string_3way (&x, y) <= 0)
- return 1.;
+ found = true;
+ else if (compare_string_3way (w, y) > 0)
+ return SYSMIS;
}
- return 0.;
+ return found;
}
function SD.2 (a[n])
function INDEX (string haystack, string needle)
{
- if (needle.length == 0)
- return SYSMIS;
- else
+ if (haystack.length >= needle.length)
{
- int limit = haystack.length - needle.length + 1;
- int i;
- for (i = 1; i <= limit; i++)
+ size_t limit = haystack.length - needle.length + 1;
+ for (size_t i = 1; i <= limit; i++)
if (!memcmp (&haystack.string[i - 1], needle.string, needle.length))
return i;
- return 0;
}
+ return 0;
}
-function INDEX (string haystack, string needles, needle_len_d)
+function INDEX (string haystack, string needles, integer needle_len)
+ expression e;
+ expr_node n;
{
- if (needle_len_d <= INT_MIN || needle_len_d >= INT_MAX
- || (int) needle_len_d != needle_len_d
- || needles.length == 0)
- return SYSMIS;
- else
+ if (needle_len <= 0 || needles.length % needle_len != 0)
{
- int needle_len = needle_len_d;
- if (needle_len < 0 || needle_len > needles.length
- || needles.length % needle_len != 0)
- return SYSMIS;
- else
- {
- int limit = haystack.length - needle_len + 1;
- int i, j;
- for (i = 1; i <= limit; i++)
- for (j = 0; j < needles.length; j += needle_len)
- if (!memcmp (&haystack.string[i - 1], &needles.string[j],
- needle_len))
- return i;
- return 0;
- }
+ msg_at (SE, expr_location (e, n),
+ _("INDEX needle length argument must evenly divide the "
+ "length of the needles argument."));
+ msg_at (SN, expr_location (e, n->args[1]),
+ _("The needles argument has length %zu."), needles.length);
+ msg_at (SN, expr_location (e, n->args[2]),
+ _("The needle length argument has value %d."), needle_len);
+ return SYSMIS;
}
+
+ if (haystack.length >= needle_len)
+ {
+ size_t limit = haystack.length - needle_len + 1;
+ for (size_t i = 1; i <= limit; i++)
+ for (size_t j = 0; j < needles.length; j += needle_len)
+ if (!memcmp (&haystack.string[i - 1], &needles.string[j], needle_len))
+ return i;
+ }
+
+ return 0;
}
function RINDEX (string haystack, string needle)
{
- if (needle.length == 0)
- return SYSMIS;
- else
+ if (haystack.length >= needle.length)
{
- int limit = haystack.length - needle.length + 1;
- int i;
- for (i = limit; i >= 1; i--)
+ size_t limit = haystack.length - needle.length + 1;
+ for (size_t i = limit; i >= 1; i--)
if (!memcmp (&haystack.string[i - 1], needle.string, needle.length))
return i;
- return 0;
}
+
+ return 0;
}
-function RINDEX (string haystack, string needles, needle_len_d)
+function RINDEX (string haystack, string needles, integer needle_len)
+ expression e;
+ expr_node n;
{
- if (needle_len_d <= 0 || needle_len_d >= INT_MAX
- || (int) needle_len_d != needle_len_d
- || needles.length == 0)
- return SYSMIS;
- else
+ if (needle_len <= 0 || needles.length % needle_len != 0)
{
- int needle_len = needle_len_d;
- if (needle_len < 0 || needle_len > needles.length
- || needles.length % needle_len != 0)
- return SYSMIS;
- else
- {
- int limit = haystack.length - needle_len + 1;
- int i, j;
- for (i = limit; i >= 1; i--)
- for (j = 0; j < needles.length; j += needle_len)
- if (!memcmp (&haystack.string[i - 1],
- &needles.string[j], needle_len))
- return i;
- return 0;
- }
+ msg_at (SE, expr_location (e, n),
+ _("RINDEX needle length argument must evenly divide the "
+ "length of the needles argument."));
+ msg_at (SN, expr_location (e, n->args[1]),
+ _("The needles argument has length %zu."), needles.length);
+ msg_at (SN, expr_location (e, n->args[2]),
+ _("The needle length argument has value %d."), needle_len);
+ return SYSMIS;
+ }
+
+ if (haystack.length >= needle_len)
+ {
+ size_t limit = haystack.length - needle_len + 1;
+ for (size_t i = limit; i >= 1; i--)
+ for (size_t j = 0; j < needles.length; j += needle_len)
+ if (!memcmp (&haystack.string[i - 1], &needles.string[j], needle_len))
+ return i;
}
+
+ return 0;
}
function LENGTH (string s)
return s;
}
-absorb_miss string function LPAD (string s, n)
+absorb_miss string function LPAD (string s, integer n)
expression e;
+ expr_node node;
{
- if (n < 0 || n > MAX_STRING || (int) n != n)
- return empty_string;
+ if (n < 0 || n > MAX_STRING)
+ {
+ if (n != INT_MIN)
+ {
+ msg_at (SE, expr_location (e, node),
+ _("The length argument to LPAD must be between 0 and %d."),
+ MAX_STRING);
+ msg_at (SN, expr_location (e, node->args[1]),
+ _("The length argument is %d."), n);
+ }
+
+ return s;
+ }
else if (s.length >= n)
return s;
else
{
struct substring t = alloc_string (e, n);
- memset (t.string, ' ', n - s.length);
- memcpy (&t.string[(int) n - s.length], s.string, s.length);
+ size_t pad = n - s.length;
+ memset (t.string, ' ', pad);
+ memcpy (&t.string[pad], s.string, s.length);
return t;
}
}
-absorb_miss string function LPAD (string s, n, string c)
+absorb_miss string function LPAD (string s, integer n, string c)
expression e;
+ expr_node node;
{
- if (n < 0 || n > MAX_STRING || (int) n != n || c.length != 1)
- return empty_string;
+ if (n < 0 || n > MAX_STRING)
+ {
+ if (n != INT_MIN)
+ {
+ msg_at (SE, expr_location (e, node),
+ _("The length argument to LPAD must be between 0 and %d."),
+ MAX_STRING);
+ msg_at (SN, expr_location (e, node->args[1]),
+ _("The length argument is %d."), n);
+ }
+
+ return s;
+ }
else if (s.length >= n)
return s;
+ else if (c.length == 0)
+ {
+ msg_at (SE, expr_location (e, node),
+ _("The padding argument to LPAD must not be an empty string."));
+ return s;
+ }
else
{
+ size_t n_pad = (n - s.length) / c.length;
+ if (!n_pad)
+ return s;
+
struct substring t = alloc_string (e, n);
- memset (t.string, c.string[0], n - s.length);
- memcpy (&t.string[(int) n - s.length], s.string, s.length);
+ t.length = 0;
+ for (size_t i = 0; i < n_pad; i++)
+ {
+ memcpy (t.string + t.length, c.string, c.length);
+ t.length += c.length;
+ }
+ memcpy (t.string + t.length, s.string, s.length);
+ t.length += s.length;
return t;
}
}
string function REPLACE (string haystack, string needle, string replacement)
expression e;
- = replace_string (e, haystack, needle, replacement, DBL_MAX);
+ = replace_string (e, haystack, needle, replacement, INT_MAX);
absorb_miss string function REPLACE (string haystack, string needle,
- string replacement, n)
+ string replacement, integer n)
expression e;
= replace_string (e, haystack, needle, replacement, n);
-absorb_miss string function RPAD (string s, n)
+absorb_miss string function RPAD (string s, integer n)
expression e;
+ expr_node node;
{
- if (n < 0 || n > MAX_STRING || (int) n != n)
- return empty_string;
+ if (n < 0 || n > MAX_STRING)
+ {
+ if (n != INT_MIN)
+ {
+ msg_at (SE, expr_location (e, node),
+ _("The length argument to RPAD must be between 0 and %d."),
+ MAX_STRING);
+ msg_at (SN, expr_location (e, node->args[1]),
+ _("The length argument is %d."), n);
+ }
+
+ return s;
+ }
else if (s.length >= n)
return s;
else
{
struct substring t = alloc_string (e, n);
+ size_t pad = n - s.length;
memcpy (t.string, s.string, s.length);
- memset (&t.string[s.length], ' ', n - s.length);
+ memset (t.string + s.length, ' ', pad);
return t;
}
}
-absorb_miss string function RPAD (string s, n, string c)
+absorb_miss string function RPAD (string s, integer n, string c)
expression e;
+ expr_node node;
{
- if (n < 0 || n > MAX_STRING || (int) n != n || c.length != 1)
- return empty_string;
+ if (n < 0 || n > MAX_STRING)
+ {
+ if (n != INT_MIN)
+ {
+ msg_at (SE, expr_location (e, node),
+ _("The length argument to RPAD must be between 0 and %d."),
+ MAX_STRING);
+ msg_at (SN, expr_location (e, node->args[1]),
+ _("The length argument is %d."), n);
+ }
+
+ return s;
+ }
else if (s.length >= n)
return s;
+ else if (c.length == 0)
+ {
+ msg_at (SE, expr_location (e, node),
+ _("The padding argument to RPAD must not be an empty string."));
+ return s;
+ }
else
{
+ size_t n_pad = (n - s.length) / c.length;
+ if (!n_pad)
+ return s;
+
struct substring t = alloc_string (e, n);
memcpy (t.string, s.string, s.length);
- memset (&t.string[s.length], c.string[0], n - s.length);
+ t.length = s.length;
+ for (size_t i = 0; i < n_pad; i++)
+ {
+ memcpy (t.string + t.length, c.string, c.length);
+ t.length += c.length;
+ }
return t;
}
}
string function LTRIM (string s, string c)
{
- if (c.length == 1)
- {
- while (s.length > 0 && s.string[0] == c.string[0])
- {
- s.length--;
- s.string++;
- }
- return s;
- }
- else
- return empty_string;
+ if (c.length > 0)
+ while (s.length >= c.length && !memcmp (s.string, c.string, c.length))
+ {
+ s.length -= c.length;
+ s.string += c.length;
+ }
+ return s;
}
string function RTRIM (string s)
string function RTRIM (string s, string c)
{
- if (c.length == 1)
- {
- while (s.length > 0 && s.string[s.length - 1] == c.string[0])
- s.length--;
- return s;
- }
- else
- return empty_string;
+ if (c.length > 0)
+ while (s.length >= c.length
+ && !memcmp (&s.string[s.length - c.length], c.string, c.length))
+ s.length -= c.length;
+ return s;
}
function NUMBER (string s, ni_format f)
expression e;
expr_node n;
{
- union value out;
- char *error;
-
if (s.length > f->w)
s.length = f->w;
- error = data_in (s, C_ENCODING, f->type, settings_get_fmt_settings (),
- &out, 0, NULL);
+
+ union value out;
+ char *error = data_in (s, C_ENCODING, f->type, settings_get_fmt_settings (),
+ &out, 0, NULL);
if (error == NULL)
data_in_imply_decimals (s, C_ENCODING, f->type, f->d,
settings_get_fmt_settings (), &out);
else
{
msg_at (SE, expr_location (e, n->args[0]),
- _("Cannot parse `%.*s' as format %s: %s"),
+ _("Cannot parse \"%.*s\" as format %s: %s"),
(int) s.length, s.string, fmt_name (f->type), error);
free (error);
}
{
return case_num (c, v);
}
+no_opt function VALUE (num_vec_elem v)
+{
+ return v;
+}
-no_opt operator VEC_ELEM_NUM (idx)
+// A numeric vector element used in a "normal" context, in which a user-missing
+// value becomes system-missing.
+absorb_miss no_opt operator VEC_ELEM_NUM (idx)
vector v;
case c;
expression e;
expr_node n;
{
- if (idx >= 1 && idx <= vector_get_n_vars (v))
- {
- const struct variable *var = vector_get_var (v, (size_t) idx - 1);
- double value = case_num (c, var);
- return !var_is_num_missing (var, value, MV_USER) ? value : SYSMIS;
- }
- else
+ const struct variable *var = expr_index_vector (e, n, v, idx);
+ if (var)
{
- if (idx == SYSMIS)
- msg_at (SE, expr_location (e, n->args[0]),
- _("SYSMIS is not a valid index value for %zu-element vector "
- "%s. The result will be set to SYSMIS."),
- vector_get_n_vars (v), vector_get_name (v));
- else
- msg_at (SE, expr_location (e, n->args[0]),
- _("%g is not a valid index value for %zu-element vector %s. "
- "The result will be set to SYSMIS."),
- idx, vector_get_n_vars (v), vector_get_name (v));
- return SYSMIS;
+ double d = case_num (c, var);
+ if (!var_is_num_missing (var, d, MV_USER))
+ return d;
}
+ return SYSMIS;
+}
+
+// A numeric vector element used as the argument to the VALUE() function, in
+// which a user-missing value retains its value.
+//
+// All numeric vector elements are initially parsed this way. In most contexts
+// they then get coerced into numbers.
+absorb_miss no_opt num_vec_elem operator VEC_ELEM_NUM_RAW (idx)
+ vector v;
+ case c;
+ expression e;
+ expr_node n;
+{
+ const struct variable *var = expr_index_vector (e, n, v, idx);
+ return var ? case_num (c, var) : SYSMIS;
}
absorb_miss no_opt string operator VEC_ELEM_STR (idx)
case c;
expr_node n;
{
- if (idx >= 1 && idx <= vector_get_n_vars (v))
- {
- struct variable *var = vector_get_var (v, (size_t) idx - 1);
- return copy_string (e, CHAR_CAST_BUG (char *, case_str (c, var)),
- var_get_width (var));
- }
- else
- {
- if (idx == SYSMIS)
- msg_at (SE, expr_location (e, n->args[0]),
- _("SYSMIS is not a valid index value for %zu-element vector "
- "%s. The result will be set to the empty string."),
- vector_get_n_vars (v), vector_get_name (v));
- else
- msg_at (SE, expr_location (e, n->args[0]),
- _("%g is not a valid index value for %zu-element vector %s. "
- "The result will be set to the empty string."),
- idx, vector_get_n_vars (v), vector_get_name (v));
- return empty_string;
- }
+ const struct variable *var = expr_index_vector (e, n, v, idx);
+ return (var
+ ? copy_string (e, CHAR_CAST_BUG (char *, case_str (c, var)),
+ var_get_width (var))
+ : empty_string);
}
// Terminals.