From: Ben Pfaff Date: Sun, 13 Mar 2016 18:20:22 +0000 (-0700) Subject: expressions: Implement the MEDIAN function. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=adccf1932c84ccb435f14f60fdb4aaaf9fdc1e30;p=pspp expressions: Implement the MEDIAN function. --- diff --git a/NEWS b/NEWS index 4236ae1754..309cf9e2a5 100644 --- a/NEWS +++ b/NEWS @@ -38,6 +38,12 @@ Changes from 0.8.5 to 0.9.0: discourage its use PSPP and PSPPIRE do not directly read or write this format. + * The following functions for transformation expressions are new: + + - REPLACE, for search-and-replace of one string with another. + + - MEDIAN, to compute the median of its arguments. + * Bug fixes, including the following notable ones: - The correlation coefficient in the paired samples t-test diff --git a/doc/expressions.texi b/doc/expressions.texi index b7e23a5f96..e468b8750a 100644 --- a/doc/expressions.texi +++ b/doc/expressions.texi @@ -262,7 +262,7 @@ The sections below describe each function in detail. * Trigonometry:: ACOS ARCOS ARSIN ARTAN ASIN ATAN COS SIN TAN * Missing Value Functions:: MISSING NMISS NVALID SYSMIS VALUE * Set Membership:: ANY RANGE -* Statistical Functions:: CFVAR MAX MEAN MIN SD SUM VARIANCE +* Statistical Functions:: CFVAR MAX MEAN MEDIAN MIN SD SUM VARIANCE * String Functions:: CONCAT INDEX LENGTH LOWER LPAD LTRIM NUMBER REPLACE RINDEX RPAD RTRIM STRING SUBSTR UPCASE * Time and Date:: CTIME.xxx DATE.xxx TIME.xxx XDATE.xxx @@ -510,6 +510,13 @@ be numeric or string. Results in the mean of the values of @var{number}. @end deftypefn +@cindex median +@deftypefn {Function} {} MEDIAN (@var{number}, @var{number}[, @dots{}]) +Results in the median of the values of @var{number}. Given an even +number of nonmissing arguments, yields the mean of the two middle +values. +@end deftypefn + @cindex minimum @deftypefn {Function} {} MIN (@var{number}, @var{number}[, @dots{}]) Results in the value of the least @var{value}. The @var{value}s may diff --git a/src/language/expressions/helpers.c b/src/language/expressions/helpers.c index cfc46290b7..57e17713b9 100644 --- a/src/language/expressions/helpers.c +++ b/src/language/expressions/helpers.c @@ -717,3 +717,32 @@ replace_string (struct expression *e, return result; } + +static int +compare_doubles (const void *a_, const void *b_) +{ + const double *ap = a_; + const double *bp = b_; + double a = *ap; + double b = *bp; + + /* Sort SYSMIS to the end. */ + return (a == b ? 0 + : a == SYSMIS ? 1 + : b == SYSMIS ? -1 + : a > b ? 1 : -1); +} + +double +median (double *a, size_t n) +{ + /* Sort the array in-place, sorting SYSMIS to the end. */ + qsort (a, n, sizeof *a, compare_doubles); + + /* Drop SYSMIS. */ + n = count_valid (a, n); + + return (!n ? SYSMIS + : n % 2 ? a[n / 2] + : (a[n / 2 - 1] + a[n / 2]) / 2.0); +} diff --git a/src/language/expressions/helpers.h b/src/language/expressions/helpers.h index 51e1db0329..c4e3323406 100644 --- a/src/language/expressions/helpers.h +++ b/src/language/expressions/helpers.h @@ -89,4 +89,6 @@ struct substring replace_string (struct expression *, struct substring replacement, double n); +double median (double *, size_t n); + #endif /* expressions/helpers.h */ diff --git a/src/language/expressions/operations.def b/src/language/expressions/operations.def index 203ccbb42a..18ab4447c8 100644 --- a/src/language/expressions/operations.def +++ b/src/language/expressions/operations.def @@ -168,6 +168,11 @@ function MEAN.1 (a[n]) return mean; } +function MEDIAN.1 (a[n]) +{ + return median (a, n); +} + function MIN.1 (a[n]) { double min; diff --git a/tests/language/expressions/evaluate.at b/tests/language/expressions/evaluate.at index 41104dd70b..703758b3b5 100644 --- a/tests/language/expressions/evaluate.at +++ b/tests/language/expressions/evaluate.at @@ -633,7 +633,7 @@ MIN(string[, string]...).]], [[min("1", "2")], ["1"]], [[min("1")], ["1"]]) -CHECK_EXPR_EVAL([cfvar mean sd sum variance], +CHECK_EXPR_EVAL([cfvar mean median sd sum variance], [[cfvar(1, 2, 3, 4, 5)], [0.53]], [[cfvar(1, $sysmis, 2, 3, $sysmis, 4, 5)], [0.53]], [[cfvar(1, 2)], [0.47]], @@ -661,6 +661,25 @@ CHECK_EXPR_EVAL([cfvar mean sd sum variance], [[mean.4(1, 2, 3)], [error], [error: DEBUG EVALUATE: With MEAN(number[, number]...), using minimum valid argument count of 4 does not make sense when passing only 3 arguments in list.]], + [[median(1, 2, 3, 4, 5)], [3.00]], + [[median(2, 3, 4, 5, 1)], [3.00]], + [[median(2, 3, 4, 1, 5)], [3.00]], + [[median(2, 1, 4, 5, 3)], [3.00]], + [[median(1, 2, 3, 4)], [2.50]], + [[median(2, 3, 1, 4)], [2.50]], + [[median(2, 3, 4, 1)], [2.50]], + [[median(2, 1, 4, 3)], [2.50]], + [[median(1, $sysmis, 3, 4, 5)], [3.50]], + [[median(2, 3, 4, 5, $sysmis, 1)], [3.00]], + [[median($sysmis, $sysmis, $sysmis, 2, 3, 4, 1, 5)], [3.00]], + [[median(1, 2, 3)], [2.00]], + [[median(1)], [1.00]], + [[median(1, 2)], [1.50]], + [[median(1, 2, $sysmis)], [1.50]], + [[median(1, $sysmis, $sysmis)], [1.00]], + [[median($sysmis, $sysmis, $sysmis)], [sysmis]], + [[median.3(1, 2, $sysmis)], [sysmis]], + [[median.2(1, $sysmis)], [sysmis]], [[sd(1, 2, 3, 4, 5)], [1.58]], [[sd(1, $sysmis, 2, 3, $sysmis, 4, 5)], [1.58]],