From 46cc6dd316280579f5b03dfd39d01bc7daf88df7 Mon Sep 17 00:00:00 2001 From: John Darrington Date: Fri, 31 Dec 2004 08:51:06 +0000 Subject: [PATCH] Added Boxplots to the EXAMINE subcommand repertoire Fixed a few bugs in percentiles calculations --- doc/statistics.texi | 9 + po/en_GB.po | 85 ++++---- po/pspp.pot | 85 ++++---- src/ChangeLog | 7 + src/box-whisker.c | 217 +++++++++++--------- src/chart.h | 12 +- src/examine.q | 294 +++++++++++++++++++++++---- src/factor_stats.c | 2 +- src/factor_stats.h | 4 +- src/percentiles.c | 50 +++-- src/percentiles.h | 2 +- src/piechart.c | 3 +- src/q2c.c | 2 +- tests/Makefile.am | 1 + tests/command/examine-percentiles.sh | 198 ++++++++++++++++++ 15 files changed, 725 insertions(+), 246 deletions(-) create mode 100755 tests/command/examine-percentiles.sh diff --git a/doc/statistics.texi b/doc/statistics.texi index 56c37949..98be7cc7 100644 --- a/doc/statistics.texi +++ b/doc/statistics.texi @@ -259,6 +259,15 @@ how many upper and lower extremes to show. The default number is 5. The PLOT subcommand specifies which plots are to be produced if any. +The COMPARE subcommand is only relevant if producing boxplots, and it is only +useful there is more than one dependent variable and at least one factor. If +/COMPARE=GROUPS is specified, then one plot per dependent variable is produced, +containing boxplots for all the factors. +If /COMPARE=VARIABLES is specified, then one plot per factor is produced, each +each containing one boxplot per dependent variable. +If the /COMPARE subcommand is ommitted, then PSPP uses the default value of +/COMPARE=GROUPS. + The CINTERVAL subcommand specifies the confidence interval to use in calculation of the descriptives command. The default it 95%. diff --git a/po/en_GB.po b/po/en_GB.po index 6b382054..9dec07c7 100644 --- a/po/en_GB.po +++ b/po/en_GB.po @@ -7,7 +7,7 @@ msgid "" msgstr "" "Project-Id-Version: PSPP 0.3.1\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2004-12-29 08:18+0800\n" +"POT-Creation-Date: 2004-12-31 08:53+0800\n" "PO-Revision-Date: 2004-01-23 13:04+0800\n" "Last-Translator: John Darrington \n" "Language-Team: John Darrington \n" @@ -957,7 +957,7 @@ msgstr "" msgid "Only USE ALL is currently implemented." msgstr "" -#: src/descript.c:99 src/examine.q:1400 src/frequencies.q:112 src/oneway.q:396 +#: src/descript.c:99 src/examine.q:1420 src/frequencies.q:112 src/oneway.q:396 #: src/t-test.q:690 src/t-test.q:713 src/t-test.q:836 src/t-test.q:1173 msgid "Mean" msgstr "" @@ -970,11 +970,11 @@ msgstr "" msgid "Std Dev" msgstr "" -#: src/descript.c:102 src/examine.q:1478 src/frequencies.q:117 +#: src/descript.c:102 src/examine.q:1498 src/frequencies.q:117 msgid "Variance" msgstr "" -#: src/descript.c:103 src/examine.q:1585 src/frequencies.q:118 +#: src/descript.c:103 src/examine.q:1605 src/frequencies.q:118 msgid "Kurtosis" msgstr "" @@ -982,7 +982,7 @@ msgstr "" msgid "S E Kurt" msgstr "" -#: src/descript.c:105 src/examine.q:1565 src/frequencies.q:120 +#: src/descript.c:105 src/examine.q:1585 src/frequencies.q:120 msgid "Skewness" msgstr "" @@ -990,16 +990,16 @@ msgstr "" msgid "S E Skew" msgstr "" -#: src/descript.c:107 src/examine.q:1526 src/frequencies.q:122 +#: src/descript.c:107 src/examine.q:1546 src/frequencies.q:122 msgid "Range" msgstr "" -#: src/descript.c:108 src/examine.q:1503 src/frequencies.q:123 +#: src/descript.c:108 src/examine.q:1523 src/frequencies.q:123 #: src/oneway.q:408 msgid "Minimum" msgstr "" -#: src/descript.c:109 src/examine.q:1514 src/frequencies.q:124 +#: src/descript.c:109 src/examine.q:1534 src/frequencies.q:124 #: src/oneway.q:409 msgid "Maximum" msgstr "" @@ -3723,7 +3723,7 @@ msgstr "" #: src/sysfile-info.c:531 src/vfm.c:875 src/crosstabs.q:1099 #: src/crosstabs.q:1126 src/crosstabs.q:1146 src/crosstabs.q:1168 -#: src/examine.q:1054 src/frequencies.q:1136 src/frequencies.q:1257 +#: src/examine.q:1074 src/frequencies.q:1136 src/frequencies.q:1257 msgid "Value" msgstr "" @@ -3944,32 +3944,32 @@ msgstr "" msgid "Summary." msgstr "" -#: src/crosstabs.q:802 src/examine.q:838 +#: src/crosstabs.q:802 src/examine.q:858 msgid "Cases" msgstr "" -#: src/crosstabs.q:803 src/examine.q:772 src/frequencies.q:1134 +#: src/crosstabs.q:803 src/examine.q:792 src/frequencies.q:1134 #: src/frequencies.q:1507 msgid "Valid" msgstr "" -#: src/crosstabs.q:804 src/examine.q:773 src/frequencies.q:1202 +#: src/crosstabs.q:804 src/examine.q:793 src/frequencies.q:1202 #: src/frequencies.q:1508 msgid "Missing" msgstr "" #: src/crosstabs.q:805 src/crosstabs.q:1008 src/crosstabs.q:1722 -#: src/examine.q:774 src/frequencies.q:1211 src/oneway.q:307 src/oneway.q:486 +#: src/examine.q:794 src/frequencies.q:1211 src/oneway.q:307 src/oneway.q:486 msgid "Total" msgstr "" -#: src/crosstabs.q:815 src/examine.q:850 src/frequencies.q:1506 +#: src/crosstabs.q:815 src/examine.q:870 src/frequencies.q:1506 #: src/oneway.q:395 src/t-test.q:689 src/t-test.q:712 src/t-test.q:837 #: src/t-test.q:1372 msgid "N" msgstr "" -#: src/crosstabs.q:816 src/examine.q:853 src/frequencies.q:1138 +#: src/crosstabs.q:816 src/examine.q:873 src/frequencies.q:1138 #: src/frequencies.q:1139 src/frequencies.q:1140 msgid "Percent" msgstr "" @@ -4007,7 +4007,7 @@ msgid "adj. resid." msgstr "" #: src/crosstabs.q:1098 src/crosstabs.q:1125 src/crosstabs.q:1145 -#: src/crosstabs.q:1166 src/examine.q:1288 +#: src/crosstabs.q:1166 src/examine.q:1308 msgid "Statistic" msgstr "" @@ -4190,96 +4190,105 @@ msgstr "" msgid "%s Dependent" msgstr "" -#: src/examine.q:418 src/examine.q:430 +#: src/examine.q:438 src/examine.q:450 #, c-format msgid "%s and %s are mutually exclusive" msgstr "" -#: src/examine.q:832 +#: src/examine.q:852 msgid "Case Processing Summary" msgstr "" -#: src/examine.q:1038 +#: src/examine.q:1058 msgid "Extreme Values" msgstr "" -#: src/examine.q:1055 +#: src/examine.q:1075 msgid "Case Number" msgstr "" -#: src/examine.q:1143 +#: src/examine.q:1163 msgid "Highest" msgstr "" -#: src/examine.q:1148 +#: src/examine.q:1168 msgid "Lowest" msgstr "" -#: src/examine.q:1289 src/oneway.q:398 src/oneway.q:705 +#: src/examine.q:1309 src/oneway.q:398 src/oneway.q:705 msgid "Std. Error" msgstr "" -#: src/examine.q:1291 src/oneway.q:412 +#: src/examine.q:1311 src/oneway.q:412 msgid "Descriptives" msgstr "" -#: src/examine.q:1418 src/oneway.q:403 +#: src/examine.q:1438 src/oneway.q:403 #, c-format msgid "%g%% Confidence Interval for Mean" msgstr "" -#: src/examine.q:1424 src/oneway.q:405 +#: src/examine.q:1444 src/oneway.q:405 msgid "Lower Bound" msgstr "" -#: src/examine.q:1435 src/oneway.q:406 +#: src/examine.q:1455 src/oneway.q:406 msgid "Upper Bound" msgstr "" -#: src/examine.q:1447 +#: src/examine.q:1467 msgid "5% Trimmed Mean" msgstr "" -#: src/examine.q:1458 src/frequencies.q:114 +#: src/examine.q:1478 src/frequencies.q:114 msgid "Median" msgstr "" -#: src/examine.q:1490 src/oneway.q:397 src/t-test.q:691 src/t-test.q:714 +#: src/examine.q:1510 src/oneway.q:397 src/t-test.q:691 src/t-test.q:714 #: src/t-test.q:838 src/t-test.q:1174 msgid "Std. Deviation" msgstr "" -#: src/examine.q:1538 +#: src/examine.q:1558 msgid "Interquartile Range" msgstr "" -#: src/examine.q:1628 +#: src/examine.q:1708 +#, c-format +msgid "Boxplot of %s" +msgstr "" + +#: src/examine.q:1734 +msgid "Boxplot" +msgstr "" + +#: src/examine.q:1773 #, c-format msgid "Normal Q-Q Plot of %s" msgstr "" -#: src/examine.q:1629 src/examine.q:1635 +#: src/examine.q:1774 src/examine.q:1780 msgid "Observed Value" msgstr "" -#: src/examine.q:1630 +#: src/examine.q:1775 msgid "Expected Normal" msgstr "" -#: src/examine.q:1633 +#: src/examine.q:1778 #, c-format msgid "Detrended Normal Q-Q Plot of %s" msgstr "" -#: src/examine.q:1636 +#: src/examine.q:1781 msgid "Dev from Normal" msgstr "" -#: src/examine.q:1757 src/examine.q:1779 src/frequencies.q:1518 +#: src/examine.q:1902 src/examine.q:1924 src/frequencies.q:1518 msgid "Percentiles" msgstr "" -#: src/examine.q:1904 +#: src/examine.q:2049 msgid "Tukey's Hinges" msgstr "" diff --git a/po/pspp.pot b/po/pspp.pot index c3fbf686..087dba18 100644 --- a/po/pspp.pot +++ b/po/pspp.pot @@ -8,7 +8,7 @@ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\n" "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n" -"POT-Creation-Date: 2004-12-29 08:18+0800\n" +"POT-Creation-Date: 2004-12-31 08:53+0800\n" "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" "Last-Translator: FULL NAME \n" "Language-Team: LANGUAGE \n" @@ -958,7 +958,7 @@ msgstr "" msgid "Only USE ALL is currently implemented." msgstr "" -#: src/descript.c:99 src/examine.q:1400 src/frequencies.q:112 src/oneway.q:396 +#: src/descript.c:99 src/examine.q:1420 src/frequencies.q:112 src/oneway.q:396 #: src/t-test.q:690 src/t-test.q:713 src/t-test.q:836 src/t-test.q:1173 msgid "Mean" msgstr "" @@ -971,11 +971,11 @@ msgstr "" msgid "Std Dev" msgstr "" -#: src/descript.c:102 src/examine.q:1478 src/frequencies.q:117 +#: src/descript.c:102 src/examine.q:1498 src/frequencies.q:117 msgid "Variance" msgstr "" -#: src/descript.c:103 src/examine.q:1585 src/frequencies.q:118 +#: src/descript.c:103 src/examine.q:1605 src/frequencies.q:118 msgid "Kurtosis" msgstr "" @@ -983,7 +983,7 @@ msgstr "" msgid "S E Kurt" msgstr "" -#: src/descript.c:105 src/examine.q:1565 src/frequencies.q:120 +#: src/descript.c:105 src/examine.q:1585 src/frequencies.q:120 msgid "Skewness" msgstr "" @@ -991,16 +991,16 @@ msgstr "" msgid "S E Skew" msgstr "" -#: src/descript.c:107 src/examine.q:1526 src/frequencies.q:122 +#: src/descript.c:107 src/examine.q:1546 src/frequencies.q:122 msgid "Range" msgstr "" -#: src/descript.c:108 src/examine.q:1503 src/frequencies.q:123 +#: src/descript.c:108 src/examine.q:1523 src/frequencies.q:123 #: src/oneway.q:408 msgid "Minimum" msgstr "" -#: src/descript.c:109 src/examine.q:1514 src/frequencies.q:124 +#: src/descript.c:109 src/examine.q:1534 src/frequencies.q:124 #: src/oneway.q:409 msgid "Maximum" msgstr "" @@ -3723,7 +3723,7 @@ msgstr "" #: src/sysfile-info.c:531 src/vfm.c:875 src/crosstabs.q:1099 #: src/crosstabs.q:1126 src/crosstabs.q:1146 src/crosstabs.q:1168 -#: src/examine.q:1054 src/frequencies.q:1136 src/frequencies.q:1257 +#: src/examine.q:1074 src/frequencies.q:1136 src/frequencies.q:1257 msgid "Value" msgstr "" @@ -3944,32 +3944,32 @@ msgstr "" msgid "Summary." msgstr "" -#: src/crosstabs.q:802 src/examine.q:838 +#: src/crosstabs.q:802 src/examine.q:858 msgid "Cases" msgstr "" -#: src/crosstabs.q:803 src/examine.q:772 src/frequencies.q:1134 +#: src/crosstabs.q:803 src/examine.q:792 src/frequencies.q:1134 #: src/frequencies.q:1507 msgid "Valid" msgstr "" -#: src/crosstabs.q:804 src/examine.q:773 src/frequencies.q:1202 +#: src/crosstabs.q:804 src/examine.q:793 src/frequencies.q:1202 #: src/frequencies.q:1508 msgid "Missing" msgstr "" #: src/crosstabs.q:805 src/crosstabs.q:1008 src/crosstabs.q:1722 -#: src/examine.q:774 src/frequencies.q:1211 src/oneway.q:307 src/oneway.q:486 +#: src/examine.q:794 src/frequencies.q:1211 src/oneway.q:307 src/oneway.q:486 msgid "Total" msgstr "" -#: src/crosstabs.q:815 src/examine.q:850 src/frequencies.q:1506 +#: src/crosstabs.q:815 src/examine.q:870 src/frequencies.q:1506 #: src/oneway.q:395 src/t-test.q:689 src/t-test.q:712 src/t-test.q:837 #: src/t-test.q:1372 msgid "N" msgstr "" -#: src/crosstabs.q:816 src/examine.q:853 src/frequencies.q:1138 +#: src/crosstabs.q:816 src/examine.q:873 src/frequencies.q:1138 #: src/frequencies.q:1139 src/frequencies.q:1140 msgid "Percent" msgstr "" @@ -4007,7 +4007,7 @@ msgid "adj. resid." msgstr "" #: src/crosstabs.q:1098 src/crosstabs.q:1125 src/crosstabs.q:1145 -#: src/crosstabs.q:1166 src/examine.q:1288 +#: src/crosstabs.q:1166 src/examine.q:1308 msgid "Statistic" msgstr "" @@ -4190,96 +4190,105 @@ msgstr "" msgid "%s Dependent" msgstr "" -#: src/examine.q:418 src/examine.q:430 +#: src/examine.q:438 src/examine.q:450 #, c-format msgid "%s and %s are mutually exclusive" msgstr "" -#: src/examine.q:832 +#: src/examine.q:852 msgid "Case Processing Summary" msgstr "" -#: src/examine.q:1038 +#: src/examine.q:1058 msgid "Extreme Values" msgstr "" -#: src/examine.q:1055 +#: src/examine.q:1075 msgid "Case Number" msgstr "" -#: src/examine.q:1143 +#: src/examine.q:1163 msgid "Highest" msgstr "" -#: src/examine.q:1148 +#: src/examine.q:1168 msgid "Lowest" msgstr "" -#: src/examine.q:1289 src/oneway.q:398 src/oneway.q:705 +#: src/examine.q:1309 src/oneway.q:398 src/oneway.q:705 msgid "Std. Error" msgstr "" -#: src/examine.q:1291 src/oneway.q:412 +#: src/examine.q:1311 src/oneway.q:412 msgid "Descriptives" msgstr "" -#: src/examine.q:1418 src/oneway.q:403 +#: src/examine.q:1438 src/oneway.q:403 #, c-format msgid "%g%% Confidence Interval for Mean" msgstr "" -#: src/examine.q:1424 src/oneway.q:405 +#: src/examine.q:1444 src/oneway.q:405 msgid "Lower Bound" msgstr "" -#: src/examine.q:1435 src/oneway.q:406 +#: src/examine.q:1455 src/oneway.q:406 msgid "Upper Bound" msgstr "" -#: src/examine.q:1447 +#: src/examine.q:1467 msgid "5% Trimmed Mean" msgstr "" -#: src/examine.q:1458 src/frequencies.q:114 +#: src/examine.q:1478 src/frequencies.q:114 msgid "Median" msgstr "" -#: src/examine.q:1490 src/oneway.q:397 src/t-test.q:691 src/t-test.q:714 +#: src/examine.q:1510 src/oneway.q:397 src/t-test.q:691 src/t-test.q:714 #: src/t-test.q:838 src/t-test.q:1174 msgid "Std. Deviation" msgstr "" -#: src/examine.q:1538 +#: src/examine.q:1558 msgid "Interquartile Range" msgstr "" -#: src/examine.q:1628 +#: src/examine.q:1708 +#, c-format +msgid "Boxplot of %s" +msgstr "" + +#: src/examine.q:1734 +msgid "Boxplot" +msgstr "" + +#: src/examine.q:1773 #, c-format msgid "Normal Q-Q Plot of %s" msgstr "" -#: src/examine.q:1629 src/examine.q:1635 +#: src/examine.q:1774 src/examine.q:1780 msgid "Observed Value" msgstr "" -#: src/examine.q:1630 +#: src/examine.q:1775 msgid "Expected Normal" msgstr "" -#: src/examine.q:1633 +#: src/examine.q:1778 #, c-format msgid "Detrended Normal Q-Q Plot of %s" msgstr "" -#: src/examine.q:1636 +#: src/examine.q:1781 msgid "Dev from Normal" msgstr "" -#: src/examine.q:1757 src/examine.q:1779 src/frequencies.q:1518 +#: src/examine.q:1902 src/examine.q:1924 src/frequencies.q:1518 msgid "Percentiles" msgstr "" -#: src/examine.q:1904 +#: src/examine.q:2049 msgid "Tukey's Hinges" msgstr "" diff --git a/src/ChangeLog b/src/ChangeLog index b2ec7335..544f91a4 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +Fri Dec 31 16:47:45 WST 2004 John Darrington + + * examine.q box-whisker.c chart.h Implemented boxplots in EXAMINE + + * percentiles.c Fixed some bugs when calculating percentiles when + there's a small number of cases. + Wed Dec 29 08:18:08 WST 2004 John Darrington * percentiles.[ch] Added. Calculates percentiles and Tukey hinges diff --git a/src/box-whisker.c b/src/box-whisker.c index fa52476e..2600e8aa 100644 --- a/src/box-whisker.c +++ b/src/box-whisker.c @@ -20,106 +20,77 @@ #include "chart.h" #include +#include "misc.h" -/* Draw a box-and-whiskers plot -*/ - -struct data_stats -{ - double ptile0 ; - double ptile25 ; - double median ; - double ptile75 ; - - double ptile100; - - double outlier ; -}; - - -const struct data_stats stats1 = { - 40, - 45, - 54, - 60, - 70, - - 33 -}; - -const struct data_stats stats2 = { - 30, - 40, - 45, - 54, - 60, - - - 72 -}; - - - - - -static const double y_min = 25; -static const double y_max = 75; -static const double y_tick = 10; +#include "factor_stats.h" +/* Draw a box-and-whiskers plot +*/ -#define min(A,B) ((A>B)?B:A) - - -void draw_box_and_whiskers(struct chart *ch, - double box_centre, const struct data_stats *s, - const char *name); - +/* Draw an outlier on the plot CH + * at CENTRELINE + * The outlier is in (*wvp)[idx] + * If EXTREME is non zero, then consider it to be an extreme + * value + */ +void +draw_outlier(struct chart *ch, double centreline, + struct weighted_value **wvp, + int idx, + short extreme); -static double ordinate_scale; -void -draw_box_whisker_chart(struct chart *ch, const char *title) +void +draw_outlier(struct chart *ch, double centreline, + struct weighted_value **wvp, + int idx, + short extreme + ) { - double d; + char label[10]; - ordinate_scale = fabs(ch->data_top - ch->data_bottom) / fabs(y_max - y_min) ; +#define MARKER_CIRCLE 4 +#define MARKER_STAR 3 + pl_fmarker_r(ch->lp, + centreline, + ch->data_bottom + + (wvp[idx]->v.f - ch->y_min ) * ch->ordinate_scale, + extreme?MARKER_STAR:MARKER_CIRCLE, + 20); - chart_write_title(ch, title); + pl_moverel_r(ch->lp, 10,0); + snprintf(label, 10, "%d", wvp[idx]->case_nos->num); - - /* Move to data bottom-left */ - pl_move_r(ch->lp, - ch->data_left, ch->data_bottom); - - for ( d = y_min; d <= y_max ; d += y_tick ) - { - draw_tick (ch, TICK_ORDINATE, (d - y_min ) * ordinate_scale, "%g", d); - } - - draw_box_and_whiskers(ch, - ch->data_left + 1.0/4.0 * (ch->data_right - ch->data_left) , - &stats1,"Stats1" - ); - - draw_box_and_whiskers(ch, - ch->data_left + 3.0/4.0 * (ch->data_right - ch->data_left), - &stats2,"Stats2" - ); - + pl_alabel_r(ch->lp, 'l', 'c', label); } void -draw_box_and_whiskers(struct chart *ch, - double box_centre, const struct data_stats *s, - const char *name) +boxplot_draw_boxplot(struct chart *ch, + double box_centre, + double box_width, + struct metrics *m, + /* + const double hinge[3], + struct weighted_value **wvp, + int n_data, + */ + const char *name) { + double whisker[2]; + int i; + + + const double *hinge = m->hinge; + struct weighted_value **wvp = m->wvp; + const int n_data = m->n_data; + + const double step = (hinge[2] - hinge[0]) * 1.5; - const double box_width = (ch->data_right - ch->data_left) / 4.0; const double box_left = box_centre - box_width / 2.0; @@ -127,22 +98,32 @@ draw_box_and_whiskers(struct chart *ch, const double box_bottom = - ch->data_bottom + ( s->ptile25 - y_min ) * ordinate_scale; + ch->data_bottom + ( hinge[0] - ch->y_min ) * ch->ordinate_scale; const double box_top = - ch->data_bottom + ( s->ptile75 - y_min ) * ordinate_scale; + ch->data_bottom + ( hinge[2] - ch->y_min ) * ch->ordinate_scale; + whisker[1] = hinge[2]; + whisker[0] = wvp[0]->v.f; - const double iq_range = s->ptile75 - s->ptile25; + for ( i = 0 ; i < n_data ; ++i ) + { + if ( hinge[2] + step > wvp[i]->v.f) + whisker[1] = wvp[i]->v.f; + if ( hinge[0] - step > wvp[i]->v.f) + whisker[0] = wvp[i]->v.f; + + } + + const double bottom_whisker = - ch->data_bottom + (min(s->ptile0,s->ptile25 + iq_range*1.5) - y_min ) * - ordinate_scale; + ch->data_bottom + ( whisker[0] - ch->y_min ) * ch->ordinate_scale; + + const double top_whisker = + ch->data_bottom + ( whisker[1] - ch->y_min ) * ch->ordinate_scale; - const double top_whisker = - ch->data_bottom + (min(s->ptile100,s->ptile75 + iq_range*1.5) - y_min ) * - ordinate_scale; pl_savestate_r(ch->lp); @@ -166,9 +147,9 @@ draw_box_and_whiskers(struct chart *ch, pl_linewidth_r(ch->lp,5); pl_fline_r(ch->lp, box_left, - ch->data_bottom + ( s->median - y_min ) * ordinate_scale, + ch->data_bottom + ( hinge[1] - ch->y_min ) * ch->ordinate_scale, box_right, - ch->data_bottom + ( s->median - y_min ) * ordinate_scale); + ch->data_bottom + ( hinge[1] - ch->y_min ) * ch->ordinate_scale); pl_restorestate_r(ch->lp); @@ -187,6 +168,7 @@ draw_box_and_whiskers(struct chart *ch, top_whisker); + /* Draw centre line. (bottom half) */ pl_fline_r(ch->lp, @@ -198,15 +180,19 @@ draw_box_and_whiskers(struct chart *ch, box_centre, top_whisker, box_centre, box_top); - - /* Draw an outlier */ - pl_fcircle_r(ch->lp, - box_centre, - ch->data_bottom + (s->outlier - y_min ) * ordinate_scale, - 5); - - pl_moverel_r(ch->lp, 10,0); - pl_alabel_r(ch->lp,'l','c',"123"); + /* Draw outliers */ + for ( i = 0 ; i < n_data ; ++i ) + { + if ( wvp[i]->v.f >= hinge[2] + step ) + draw_outlier(ch, box_centre, wvp, i, + ( wvp[i]->v.f > hinge[2] + 2 * step ) + ); + + if ( wvp[i]->v.f <= hinge[0] - step ) + draw_outlier(ch, box_centre, wvp, i, + ( wvp[i]->v.f < hinge[0] - 2 * step ) + ); + } /* Draw tick mark on x axis */ @@ -216,3 +202,34 @@ draw_box_and_whiskers(struct chart *ch, } + + +void +boxplot_draw_yscale(struct chart *ch , double y_max, double y_min) +{ + double y_tick; + double d; + + ch->y_max = y_max; + ch->y_min = y_min; + + y_tick = chart_rounded_tick(fabs(ch->y_max - ch->y_min) / 5.0); + + ch->y_min = (ceil( ch->y_min / y_tick ) - 1.0 ) * y_tick; + + ch->y_max = ( floor( ch->y_max / y_tick ) + 1.0 ) * y_tick; + + ch->ordinate_scale = fabs(ch->data_top - ch->data_bottom) + / fabs(ch->y_max - ch->y_min) ; + + + /* Move to data bottom-left */ + pl_move_r(ch->lp, + ch->data_left, ch->data_bottom); + + for ( d = ch->y_min; d <= ch->y_max ; d += y_tick ) + { + draw_tick (ch, TICK_ORDINATE, (d - ch->y_min ) * ch->ordinate_scale, "%g", d); + } + +} diff --git a/src/chart.h b/src/chart.h index f2c9e501..eec5bff5 100644 --- a/src/chart.h +++ b/src/chart.h @@ -61,7 +61,7 @@ struct chart { char fill_colour[10]; - /* Stuff Particular to Cartesians */ + /* Stuff Particular to Cartesians (and Boxplots ) */ double ordinate_scale; double abscissa_scale; double x_min; @@ -166,8 +166,18 @@ void chart_write_yscale(struct chart *ch, void chart_datum(struct chart *ch, int dataset, double x, double y); +struct metrics; +void boxplot_draw_boxplot(struct chart *ch, + double box_centre, + double box_width, + struct metrics *m, + const char *name); + + +void boxplot_draw_yscale(struct chart *ch , double y_max, double y_min); + enum CHART_DIM { diff --git a/src/examine.q b/src/examine.q index 97a63a90..5cb89af2 100644 --- a/src/examine.q +++ b/src/examine.q @@ -57,6 +57,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA incl:include/!exclude; +compare=cmp:variables/!groups; +percentiles=custom; + +id=var; +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none; +cinterval=double; +statistics[st_]=descriptives,:extreme(*d:n),all,none. @@ -120,9 +121,22 @@ static void show_percentiles(struct variable **dependent_var, + void np_plot(const struct metrics *m, const char *factorname); +void box_plot_group(const struct factor *fctr, + const struct variable **vars, int n_vars, + const struct variable *id + ) ; + + +void box_plot_variables(const struct factor *fctr, + struct variable **vars, int n_vars, + const struct variable *id + ); + + /* Per Split function */ static void run_examine(const struct casefile *cf, void *cmd_); @@ -134,6 +148,22 @@ void factor_calc(struct ccase *c, int case_no, double weight, int case_missing); +/* Represent a factor as a string, so it can be + printed in a human readable fashion */ +const char * factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var); + + +/* Represent a factor as a string, so it can be + printed in a human readable fashion, + but sacrificing some readablility for the sake of brevity */ +const char *factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs); + + + + /* Function to use for testing for missing values */ static is_missing_func value_is_missing; @@ -169,7 +199,6 @@ cmd_examine(void) if ( ! cmd.sbc_cinterval) cmd.n_cinterval[0] = 95.0; - /* If descriptives have been requested, make sure the quartiles are calculated */ if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] ) @@ -223,6 +252,18 @@ output_examine(void) np_plot(&totals[v], var_to_string(dependent_vars[v])); } + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_GROUPS ) + { + box_plot_group(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + else + box_plot_variables(0, dependent_vars, n_dependent_vars, + cmd.v_id); + } + if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { for ( v = 0 ; v < n_dependent_vars; ++v ) @@ -269,39 +310,26 @@ output_examine(void) struct factor_statistics **fs = fctr->fs ; + if ( cmd.a_plot[XMN_PLT_BOXPLOT] ) + { + if ( cmd.cmp == XMN_VARIABLES ) + box_plot_variables(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + else + box_plot_group(fctr, dependent_vars, n_dependent_vars, + cmd.v_id); + } + for ( v = 0 ; v < n_dependent_vars; ++v ) { for ( fs = fctr->fs ; *fs ; ++fs ) { - char buf1[100]; - char buf2[100]; - sprintf(buf1, "%s (", - var_to_string(dependent_vars[v])); - - snprintf(buf2, 100, "%s = %s", - var_to_string(fctr->indep_var[0]), - value_to_string(&(*fs)->id[0],fctr->indep_var[0])); - - strcat(buf1, buf2); - - if ( fctr->indep_var[1] ) - { - sprintf(buf2, "; %s = %s)", - var_to_string(fctr->indep_var[1]), - value_to_string(&(*fs)->id[1], - fctr->indep_var[1])); - strcat(buf1, buf2); - } - else - { - strcat(buf1, ")"); - } + const char *s = factor_to_string(fctr, *fs, dependent_vars[v]); if ( cmd.a_plot[XMN_PLT_NPPLOT] ) - np_plot(&(*fs)->m[v],buf1); + np_plot(&(*fs)->m[v], s); - if ( cmd.a_plot[XMN_PLT_HISTOGRAM] ) { struct normal_curve normal; @@ -311,7 +339,7 @@ output_examine(void) normal.stddev = (*fs)->m[v].stddev; histogram_plot((*fs)->m[v].histogram, - buf1, &normal, 0); + s, &normal, 0); } } /* for ( fs .... */ @@ -579,8 +607,9 @@ factor_calc(struct ccase *c, int case_no, double weight, int case_missing) if ( value_is_missing(val,var) || case_missing ) val = 0; - - metrics_calc( &(*foo)->m[v], val, weight, case_no ); + + metrics_calc( &(*foo)->m[v], val, weight, case_no); + } fctr = fctr->next; @@ -652,7 +681,7 @@ run_examine(const struct casefile *cf, void *cmd_ ) if ( value_is_missing(val,var) || case_missing ) val = 0; - metrics_calc(&totals[v], val, weight, case_no ); + metrics_calc(&totals[v], val, weight, case_no); } @@ -1037,7 +1066,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_title (tbl, 0, _("Extreme Values")); - tab_vline (tbl, TAL_2, n_cols - 2, 0, n_rows -1); tab_vline (tbl, TAL_1, n_cols - 1, 0, n_rows -1); @@ -1054,9 +1082,6 @@ show_extremes(struct variable **dependent_var, int n_dep_var, tab_text (tbl, n_cols - 1, 0, TAB_CENTER | TAT_TITLE, _("Value")); tab_text (tbl, n_cols - 2, 0, TAB_CENTER | TAT_TITLE, _("Case Number")); - - - for ( i = 0 ; i < n_dep_var ; ++i ) { @@ -1377,13 +1402,6 @@ show_descriptives(struct variable **dependent_var, - - - - - - - /* Fill in the descriptives data */ void populate_descriptives(struct tab_table *tbl, int col, int row, @@ -1602,6 +1620,133 @@ populate_descriptives(struct tab_table *tbl, int col, int row, } + +void +box_plot_variables(const struct factor *fctr, + struct variable **vars, int n_vars, + const struct variable *id) +{ + int i; + struct factor_statistics **fs ; + + if ( ! fctr ) + { + box_plot_group(fctr, vars, n_vars, id); + return; + } + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + double y_min = DBL_MAX; + double y_max = -DBL_MAX; + struct chart ch; + + chart_initialise(&ch); + + const char *s = factor_to_string(fctr, *fs, 0 ); + + chart_write_title(&ch, s); + + for ( i = 0 ; i < n_vars ; ++i ) + { + y_max = max(y_max, (*fs)->m[i].max); + y_min = min(y_min, (*fs)->m[i].min); + } + + boxplot_draw_yscale(&ch, y_max, y_min); + + for ( i = 0 ; i < n_vars ; ++i ) + { + + const double box_width = (ch.data_right - ch.data_left) + / (n_vars * 2.0 ) ; + + const double box_centre = ( i * 2 + 1) * box_width + + ch.data_left; + + boxplot_draw_boxplot(&ch, + box_centre, box_width, + &(*fs)->m[i], + var_to_string(vars[i])); + + + } + + chart_finalise(&ch); + + } + +} + + + +/* Do a box plot, grouping all factors into one plot ; + each dependent variable has its own plot. +*/ +void +box_plot_group(const struct factor *fctr, + const struct variable **vars, + int n_vars, + const struct variable *id) +{ + int i; + + for ( i = 0 ; i < n_vars ; ++i ) + { + struct factor_statistics **fs ; + struct chart ch; + + chart_initialise(&ch); + + boxplot_draw_yscale(&ch, totals[i].max, totals[i].min); + + if ( fctr ) + { + int n_factors = 0; + int f=0; + for ( fs = fctr->fs ; *fs ; ++fs ) + ++n_factors; + + chart_write_title(&ch, _("Boxplot of %s vs. %s"), + var_to_string(vars[i]), var_to_string(fctr->indep_var[0]) ); + + for ( fs = fctr->fs ; *fs ; ++fs ) + { + + const char *s = factor_to_string_concise(fctr, *fs); + + const double box_width = (ch.data_right - ch.data_left) + / (n_factors * 2.0 ) ; + + const double box_centre = ( f++ * 2 + 1) * box_width + + ch.data_left; + + boxplot_draw_boxplot(&ch, + box_centre, box_width, + &(*fs)->m[i], + s); + } + } + else + { + const double box_width = (ch.data_right - ch.data_left) / 3.0; + const double box_centre = (ch.data_right + ch.data_left) / 2.0; + + chart_write_title(&ch, _("Boxplot")); + + boxplot_draw_boxplot(&ch, + box_centre, box_width, + &totals[i], + var_to_string(vars[i]) ); + + } + + chart_finalise(&ch); + } + +} + + /* Plot the normal and detrended normal plots for m Label the plots with factorname */ void @@ -1920,17 +2065,17 @@ populate_percentiles(struct tab_table *tbl, int col, int row, if ( (*p)->p == 25 ) tab_float(tbl, col + i + 1 , row + 1, TAB_CENTER, - m->hinges[0], 8, 2); + m->hinge[0], 8, 2); if ( (*p)->p == 50 ) tab_float(tbl, col + i + 1 , row + 1, TAB_CENTER, - m->hinges[1], 8, 2); + m->hinge[1], 8, 2); if ( (*p)->p == 75 ) tab_float(tbl, col + i + 1 , row + 1, TAB_CENTER, - m->hinges[2], 8, 2); + m->hinge[2], 8, 2); i++; @@ -1938,8 +2083,69 @@ populate_percentiles(struct tab_table *tbl, int col, int row, p++; } +} + +const char * +factor_to_string(const struct factor *fctr, + struct factor_statistics *fs, + const struct variable *var) +{ + + static char buf1[100]; + char buf2[100]; + strcpy(buf1,""); + + if (var) + sprintf(buf1, "%s (",var_to_string(var) ); + + + snprintf(buf2, 100, "%s = %s", + var_to_string(fctr->indep_var[0]), + value_to_string(&fs->id[0],fctr->indep_var[0])); + + strcat(buf1, buf2); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, "; %s = %s)", + var_to_string(fctr->indep_var[1]), + value_to_string(&fs->id[1], + fctr->indep_var[1])); + strcat(buf1, buf2); + } + else + { + if ( var ) + strcat(buf1, ")"); + } + + return buf1; } + + +const char * +factor_to_string_concise(const struct factor *fctr, + struct factor_statistics *fs) + +{ + + static char buf[100]; + + char buf2[100]; + + snprintf(buf, 100, "%s", + value_to_string(&fs->id[0], fctr->indep_var[0])); + + if ( fctr->indep_var[1] ) + { + sprintf(buf2, ",%s)", value_to_string(&fs->id[1], fctr->indep_var[1]) ); + strcat(buf, buf2); + } + + + return buf; +} diff --git a/src/factor_stats.c b/src/factor_stats.c index 7e5ac8b4..7c4cce00 100644 --- a/src/factor_stats.c +++ b/src/factor_stats.c @@ -187,7 +187,7 @@ metrics_postcalc(struct metrics *m) /* Calculate the percentiles */ ptiles(m->ptile_hash, m->wvp, m->n_data, m->n, m->ptile_alg); - tukey_hinges(m->wvp, m->n_data, m->n, m->hinges); + tukey_hinges(m->wvp, m->n_data, m->n, m->hinge); /* Special case here */ if ( k1 + 1 == k2 ) diff --git a/src/factor_stats.h b/src/factor_stats.h index f6394a8a..b05d7423 100644 --- a/src/factor_stats.h +++ b/src/factor_stats.h @@ -80,13 +80,11 @@ struct metrics enum pc_alg ptile_alg; /* Tukey's Hinges */ - double hinges[3]; + double hinge[3]; }; - - void metrics_precalc(struct metrics *m); void metrics_calc(struct metrics *m, const union value *f, double weight, diff --git a/src/percentiles.c b/src/percentiles.c index 9719676d..1c8eef2a 100644 --- a/src/percentiles.c +++ b/src/percentiles.c @@ -80,18 +80,22 @@ ptile_round(const struct weighted_value **wv, const struct ptile_params *par) { double x; + double a=0; + + if ( par->k1 >= 0 ) + a = wv[par->k1]->v.f; if ( wv[par->k1 + 1]->w >= 1 ) { if ( par->g1_star < 0.5 ) - x = wv[par->k1]->v.f; + x = a; else x = wv[par->k1 + 1]->v.f; } else { if ( par->g1 < 0.5 ) - x = wv[par->k1]->v.f; + x = a; else x = wv[par->k1 + 1]->v.f; @@ -105,6 +109,9 @@ double ptile_haverage(const struct weighted_value **wv, const struct ptile_params *par) { + + double a=0; + if ( par->g2_star >= 1.0 ) return wv[par->k2 + 1]->v.f ; @@ -117,15 +124,17 @@ ptile_haverage(const struct weighted_value **wv, return wv[par->k2]->v.f; } - assert(par->k2 >= 0); + /* Ditto for k2 < 0 */ + if ( par->k2 >= 0 ) + { + a = wv[par->k2]->v.f; + } if ( wv[par->k2 + 1]->w >= 1.0 ) - return ( (1 - par->g2_star) * wv[par->k2]->v.f - + + return ( (1 - par->g2_star) * a + par->g2_star * wv[par->k2 + 1]->v.f); else - return ( (1 - par->g2) * wv[par->k2]->v.f - + + return ( (1 - par->g2) * a + par->g2 * wv[par->k2 + 1]->v.f); } @@ -137,16 +146,21 @@ double ptile_waverage(const struct weighted_value **wv, const struct ptile_params *par) { + double a=0; + if ( par->g1_star >= 1.0 ) return wv[par->k1 + 1]->v.f ; + if ( par->k1 >= 0 ) + { + a = wv[par->k1]->v.f; + } + if ( wv[par->k1 + 1]->w >= 1.0 ) - return ( (1 - par->g1_star) * wv[par->k1]->v.f - + + return ( (1 - par->g1_star) * a + par->g1_star * wv[par->k1 + 1]->v.f); else - return ( (1 - par->g1) * wv[par->k1]->v.f - + + return ( (1 - par->g1) * a + par->g1 * wv[par->k1 + 1]->v.f); } @@ -305,7 +319,8 @@ void tukey_hinges(const struct weighted_value **wv, int n_data, double w, - double hinges[3]) + double hinge[3] + ) { int i; double c_star = DBL_MAX; @@ -351,27 +366,28 @@ tukey_hinges(const struct weighted_value **wv, if ( a_star >= 1.0 ) { - hinges[i] = wv[h[i] + 1]->v.f ; + hinge[i] = wv[h[i] + 1]->v.f ; continue; } if ( wv[h[i]+1]->w >= 1) { - hinges[i] = ( 1 - a_star)* wv[h[i]]->v.f + hinge[i] = ( 1 - a_star)* wv[h[i]]->v.f + a_star * wv[h[i]+1]->v.f; continue; } - hinges[i] = ( 1 - a)* wv[h[i]]->v.f + a * wv[h[i]+1]->v.f; + hinge[i] = ( 1 - a)* wv[h[i]]->v.f + a * wv[h[i]+1]->v.f; } - assert(hinges[0] <= hinges[1]); - assert(hinges[1] <= hinges[2]); + assert(hinge[0] <= hinge[1]); + assert(hinge[1] <= hinge[2]); } + int ptile_compare(const struct percentile *p1, const struct percentile *p2, diff --git a/src/percentiles.h b/src/percentiles.h index 8baba9fa..e330aabd 100644 --- a/src/percentiles.h +++ b/src/percentiles.h @@ -64,7 +64,7 @@ void ptiles(struct hsh_table *pc_hash, enum pc_alg algorithm); -/* Calculate Tukey's Hinges */ +/* Calculate Tukey's Hinges and the Whiskers for the box plot*/ void tukey_hinges(const struct weighted_value **wv, int n_data, double w, diff --git a/src/piechart.c b/src/piechart.c index 14115e4c..53c2b5fd 100644 --- a/src/piechart.c +++ b/src/piechart.c @@ -27,6 +27,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #include #include "str.h" #include "value-labels.h" +#include "misc.h" /* Pie charts of course need to know Pi :) */ @@ -35,8 +36,6 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA #endif -#define min(A,B) ((A>B)?B:A) - /* Draw a single slice of the pie */ static void diff --git a/src/q2c.c b/src/q2c.c index 5d188bbc..aaf3343c 100644 --- a/src/q2c.c +++ b/src/q2c.c @@ -1611,7 +1611,7 @@ dump_subcommand (const subcommand *sbc) { dump (0, "p->%sv_%s = parse_variable ();", st_lower (sbc->prefix), st_lower (sbc->name)); - dump (1, "if (p->%sv_%s)", + dump (1, "if (!p->%sv_%s)", st_lower (sbc->prefix), st_lower (sbc->name)); dump (0, "goto lossage;"); outdent (); diff --git a/tests/Makefile.am b/tests/Makefile.am index 3b4de758..a48b9092 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -11,6 +11,7 @@ TESTS = \ command/erase.sh \ command/examine.sh \ command/examine-extremes.sh \ + command/examine-percentiles.sh \ command/file-label.sh \ command/filter.sh \ command/flip.sh \ diff --git a/tests/command/examine-percentiles.sh b/tests/command/examine-percentiles.sh new file mode 100755 index 00000000..b7982d30 --- /dev/null +++ b/tests/command/examine-percentiles.sh @@ -0,0 +1,198 @@ +#!/bin/sh + +# This program tests the PERCENTILES subcommand of the EXAMINE command. +# In particular it tests that it behaves properly when there are only +# a few cases + +TEMPDIR=/tmp/pspp-tst-$$ + +here=`pwd`; + +# ensure that top_srcdir is absolute +cd $top_srcdir; top_srcdir=`pwd` + +export STAT_CONFIG_PATH=$top_srcdir/config + + +cleanup() +{ + rm -rf $TEMPDIR +} + + +fail() +{ + echo $activity + echo FAILED + cleanup; + exit 1; +} + + +no_result() +{ + echo $activity + echo NO RESULT; + cleanup; + exit 2; +} + +pass() +{ + cleanup; + exit 0; +} + +mkdir -p $TEMPDIR + +cd $TEMPDIR + +activity="create program" +cat > $TEMPDIR/out.stat <