From 32dc29d70ac55ad3629053097b83c74bb0068fc0 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Sat, 20 Feb 2016 17:12:17 -0800 Subject: [PATCH] expressions: Implement the REPLACE string function. Requested by Frans Houweling. --- doc/expressions.texi | 11 ++++++- src/language/expressions/helpers.c | 44 ++++++++++++++++++++++++- src/language/expressions/helpers.h | 6 ++++ src/language/expressions/operations.def | 12 +++++-- tests/language/expressions/evaluate.at | 15 +++++++++ 5 files changed, 84 insertions(+), 4 deletions(-) diff --git a/doc/expressions.texi b/doc/expressions.texi index 7226116bd1..b7e23a5f96 100644 --- a/doc/expressions.texi +++ b/doc/expressions.texi @@ -264,7 +264,7 @@ The sections below describe each function in detail. * Set Membership:: ANY RANGE * Statistical Functions:: CFVAR MAX MEAN MIN SD SUM VARIANCE * String Functions:: CONCAT INDEX LENGTH LOWER LPAD LTRIM NUMBER - RINDEX RPAD RTRIM STRING SUBSTR UPCASE + REPLACE RINDEX RPAD RTRIM STRING SUBSTR UPCASE * Time and Date:: CTIME.xxx DATE.xxx TIME.xxx XDATE.xxx DATEDIFF DATESUM * Miscellaneous Functions:: LAG YRMODA VALUELABEL @@ -620,6 +620,15 @@ right-padded with spaces. If @var{string} is not in the correct format for @var{format}, system-missing is returned. @end deftypefn +@cindex strings, replacing substrings +@cindex replacing substrings +@deftypefn {Function} {} REPLACE (@var{haystack}, @var{needle}, @var{replacement}[, @var{n}]) +Returns string @var{haystack} with instances of @var{needle} replaced +by @var{replacement}. If nonnegative integer @var{n} is specified, it +limits the maximum number of replacements; otherwise, all instances of +@var{needle} are replaced. +@end deftypefn + @cindex strings, searching backwards @deftypefn {Function} {} RINDEX (@var{haystack}, @var{needle}) Returns a positive integer indicating the position of the last diff --git a/src/language/expressions/helpers.c b/src/language/expressions/helpers.c index 5aad13eb16..cfc46290b7 100644 --- a/src/language/expressions/helpers.c +++ b/src/language/expressions/helpers.c @@ -1,5 +1,5 @@ /* PSPP - a program for statistical analysis. - Copyright (C) 2008, 2010, 2011, 2015 Free Software Foundation, Inc. + Copyright (C) 2008, 2010, 2011, 2015, 2016 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,6 +25,8 @@ #include "libpspp/assertion.h" #include "libpspp/pool.h" +#include "gl/minmax.h" + const struct substring empty_string = {NULL, 0}; double @@ -675,3 +677,43 @@ round_nearest (double x, double mult, double fuzzbits) x = x >= 0. ? floor (x + adjustment) : -floor (-x + adjustment); return x * mult; } + +struct substring +replace_string (struct expression *e, + struct substring haystack, + struct substring needle, + struct substring replacement, + double n) +{ + if (!needle.length + || haystack.length < needle.length + || n <= 0 + || n == SYSMIS) + return haystack; + + struct substring result = alloc_string (e, MAX_STRING); + result.length = 0; + + size_t i = 0; + while (i <= haystack.length - needle.length) + if (!memcmp (&haystack.string[i], needle.string, needle.length)) + { + size_t copy_len = MIN (replacement.length, MAX_STRING - result.length); + memcpy (&result.string[result.length], replacement.string, copy_len); + result.length += copy_len; + i += needle.length; + + if (--n < 1) + break; + } + else + { + if (result.length < MAX_STRING) + result.string[result.length++] = haystack.string[i]; + i++; + } + while (i < haystack.length && result.length < MAX_STRING) + result.string[result.length++] = haystack.string[i++]; + + return result; +} diff --git a/src/language/expressions/helpers.h b/src/language/expressions/helpers.h index 0d349f7fd9..51e1db0329 100644 --- a/src/language/expressions/helpers.h +++ b/src/language/expressions/helpers.h @@ -83,4 +83,10 @@ double idf_fdist (double P, double a, double b); double round_nearest (double x, double mult, double fuzzbits); +struct substring replace_string (struct expression *, + struct substring haystack, + struct substring needle, + struct substring replacement, + double n); + #endif /* expressions/helpers.h */ diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index 471fa1d30d..203ccbb42a 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -1,7 +1,7 @@ // -*- c -*- // // PSPP - a program for statistical analysis. -// Copyright (C) 2005, 2006, 2009, 2010, 2011, 2012, 2015 Free Software Foundation, Inc. +// Copyright (C) 2005, 2006, 2009, 2010, 2011, 2012, 2015, 2016 Free Software Foundation, Inc. // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -399,7 +399,6 @@ function INDEX (string haystack, string needles, needle_len_d) } } - function RINDEX (string haystack, string needle) { if (needle.length == 0) @@ -504,6 +503,15 @@ absorb_miss string function LPAD (string s, n, string c) } } +string function REPLACE (string haystack, string needle, string replacement) + expression e; + = replace_string (e, haystack, needle, replacement, DBL_MAX); + +absorb_miss string function REPLACE (string haystack, string needle, + string replacement, n) + expression e; + = replace_string (e, haystack, needle, replacement, n); + absorb_miss string function RPAD (string s, n) expression e; { diff --git a/tests/language/expressions/evaluate.at b/tests/language/expressions/evaluate.at index 43d6b0b877..41104dd70b 100644 --- a/tests/language/expressions/evaluate.at +++ b/tests/language/expressions/evaluate.at @@ -810,6 +810,21 @@ RINDEX(string, string, number).]], [[lower(1)], [error], [error: DEBUG EVALUATE: Type mismatch invoking LOWER(string) as lower(number).]]) +CHECK_EXPR_EVAL([replace], + [[replace('banana', 'an', 'AN')], ["bANANa"]], + [[replace('banana', 'an', 'a')], ["baaa"]], + [[replace('banana', 'an', '')], ["ba"]], + [[replace('banana', 'na', '')], ["ba"]], + [[replace('banana', 'ba', 'BA')], ["BAnana"]], + [[replace('banana', 'na', 'xyzzy')], ["baxyzzyxyzzy"]], + [[replace('banana', 'an', 'xyzzy', 1)], ["bxyzzyana"]], + [[replace('banana', 'an', 'xyzzy', 1.5)], ["bxyzzyana"]], + [[replace('banana', 'bananana', 'xyzzy')], ["banana"]], + [[replace('banana', '', 'xyzzy')], ["banana"]], + [[replace('banana', 'ba', '', 0)], ["banana"]], + [[replace('banana', 'ba', '', -1)], ["banana"]], + [[replace('banana', 'ba', '', $sysmis)], ["banana"]]) + CHECK_EXPR_EVAL([lpad number ltrim lpad rtrim rpad string substr upcase], [[lpad('abc', -1)], [""]], [[lpad('abc', 0)], ["abc"]], -- 2.30.2