Implemented calculation of percentiles and Tukey hinges

author John Darrington <john@darrington.wattle.id.au>

Wed, 29 Dec 2004 01:19:57 +0000 (01:19 +0000)

committer John Darrington <john@darrington.wattle.id.au>

Wed, 29 Dec 2004 01:19:57 +0000 (01:19 +0000)
author John Darrington <john@darrington.wattle.id.au>
Wed, 29 Dec 2004 01:19:57 +0000 (01:19 +0000)
committer John Darrington <john@darrington.wattle.id.au>
Wed, 29 Dec 2004 01:19:57 +0000 (01:19 +0000)
diff --git a/doc/statistics.texi b/doc/statistics.texi

index 19fe6d5abcab12f21db907760203f97999fda7b8..56c3794929cea18cc723138ec1160f6321af3b4a 100644 (file)
--- a/doc/statistics.texi
+++ b/doc/statistics.texi
@@ -230,6 +230,7 @@ EXAMINE
          /COMPARE=@{GROUPS,VARIABLES@}
          /ID=@{case_number, var_name@}
          /@{TOTAL,NOTOTAL@}
+        /PERCENTILE=[value_list]=@{HAVERAGE, WAVERAGE, ROUND, AEMPIRICAL, EMPIRICAL @}
          /MISSING=@{LISTWISE, PAIRWISE@} [@{EXCLUDE, INCLUDE@}] 
                 [@{NOREPORT,REPORT@}]
  
@@ -261,6 +262,11 @@ The PLOT subcommand specifies which plots are to be produced if any.
  The CINTERVAL subcommand specifies the confidence interval to use in
  calculation of the descriptives command.  The default it 95%.
  
+The PERCENTILES subcommand specifies which percentiles are to be calculated, 
+and which algorithm to use for calculating them.  The default is to
+calculate the 5, 10, 25, 50, 75, 90, 95 percentiles using the
+HAVERAGE algorithm.
+
  The TOTAL and NOTOTAL subcommands are mutually exclusive.  If NOTOTAL
  is given and factors have been specified in the VARIABLES subcommand,
  then then statistics for the unfactored dependent variables are
diff --git a/po/en_GB.po b/po/en_GB.po

index 2dbb8508cb87de15431921935dad21e1f238eff7..6b3820545e210cce15d83b372f45404a5850fa12 100644 (file)
--- a/po/en_GB.po
+++ b/po/en_GB.po
@@ -7,7 +7,7 @@ msgid ""
  msgstr ""
  "Project-Id-Version: PSPP 0.3.1\n"
  "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n"
-"POT-Creation-Date: 2004-12-02 13:38+0800\n"
+"POT-Creation-Date: 2004-12-29 08:18+0800\n"
  "PO-Revision-Date: 2004-01-23 13:04+0800\n"
  "Last-Translator: John Darrington <john@darrington.wattle.id.au>\n"
  "Language-Team: John Darrington <john@darrington.wattle.id.au>\n"
@@ -957,8 +957,8 @@ msgstr ""
  msgid "Only USE ALL is currently implemented."
  msgstr ""
  
-#: src/descript.c:99 src/examine.q:1268 src/frequencies.q:112 src/oneway.q:396
-#: src/t-test.q:683 src/t-test.q:706 src/t-test.q:829 src/t-test.q:1166
+#: src/descript.c:99 src/examine.q:1400 src/frequencies.q:112 src/oneway.q:396
+#: src/t-test.q:690 src/t-test.q:713 src/t-test.q:836 src/t-test.q:1173
  msgid "Mean"
  msgstr ""
  
@@ -970,11 +970,11 @@ msgstr ""
  msgid "Std Dev"
  msgstr ""
  
-#: src/descript.c:102 src/examine.q:1331 src/frequencies.q:117
+#: src/descript.c:102 src/examine.q:1478 src/frequencies.q:117
  msgid "Variance"
  msgstr ""
  
-#: src/descript.c:103 src/examine.q:1416 src/frequencies.q:118
+#: src/descript.c:103 src/examine.q:1585 src/frequencies.q:118
  msgid "Kurtosis"
  msgstr ""
  
@@ -982,7 +982,7 @@ msgstr ""
  msgid "S E Kurt"
  msgstr ""
  
-#: src/descript.c:105 src/examine.q:1396 src/frequencies.q:120
+#: src/descript.c:105 src/examine.q:1565 src/frequencies.q:120
  msgid "Skewness"
  msgstr ""
  
@@ -990,16 +990,16 @@ msgstr ""
  msgid "S E Skew"
  msgstr ""
  
-#: src/descript.c:107 src/examine.q:1379 src/frequencies.q:122
+#: src/descript.c:107 src/examine.q:1526 src/frequencies.q:122
  msgid "Range"
  msgstr ""
  
-#: src/descript.c:108 src/examine.q:1356 src/frequencies.q:123
+#: src/descript.c:108 src/examine.q:1503 src/frequencies.q:123
  #: src/oneway.q:408
  msgid "Minimum"
  msgstr ""
  
-#: src/descript.c:109 src/examine.q:1367 src/frequencies.q:124
+#: src/descript.c:109 src/examine.q:1514 src/frequencies.q:124
  #: src/oneway.q:409
  msgid "Maximum"
  msgstr ""
@@ -1985,16 +1985,16 @@ msgstr ""
  msgid "<<fallback>>"
  msgstr ""
  
-#: src/hash.c:517
+#: src/hash.c:519
  #, c-format
  msgid "hash table:"
  msgstr ""
  
-#: src/histogram.c:138
+#: src/histogram.c:115
  msgid "HISTOGRAM"
  msgstr ""
  
-#: src/histogram.c:140 src/frequencies.q:1135
+#: src/histogram.c:117 src/frequencies.q:1137
  msgid "Frequency"
  msgstr ""
  
@@ -2714,6 +2714,26 @@ msgstr ""
  msgid "Error opening page on %s device of %s class."
  msgstr ""
  
+#: src/percentiles.c:38
+msgid "HAverage"
+msgstr ""
+
+#: src/percentiles.c:39
+msgid "Weighted Average"
+msgstr ""
+
+#: src/percentiles.c:40
+msgid "Rounded"
+msgstr ""
+
+#: src/percentiles.c:41
+msgid "Empirical"
+msgstr ""
+
+#: src/percentiles.c:42
+msgid "Empirical with averaging"
+msgstr ""
+
  #: src/permissions.c:75
  #, c-format
  msgid "Expecting %s or %s."
@@ -3703,7 +3723,7 @@ msgstr ""
  
  #: src/sysfile-info.c:531 src/vfm.c:875 src/crosstabs.q:1099
  #: src/crosstabs.q:1126 src/crosstabs.q:1146 src/crosstabs.q:1168
-#: src/examine.q:927 src/frequencies.q:1134 src/frequencies.q:1255
+#: src/examine.q:1054 src/frequencies.q:1136 src/frequencies.q:1257
  msgid "Value"
  msgstr ""
  
@@ -3924,33 +3944,33 @@ msgstr ""
  msgid "Summary."
  msgstr ""
  
-#: src/crosstabs.q:802 src/examine.q:711
+#: src/crosstabs.q:802 src/examine.q:838
  msgid "Cases"
  msgstr ""
  
-#: src/crosstabs.q:803 src/examine.q:645 src/frequencies.q:1132
-#: src/frequencies.q:1505
+#: src/crosstabs.q:803 src/examine.q:772 src/frequencies.q:1134
+#: src/frequencies.q:1507
  msgid "Valid"
  msgstr ""
  
-#: src/crosstabs.q:804 src/examine.q:646 src/frequencies.q:1200
-#: src/frequencies.q:1506
+#: src/crosstabs.q:804 src/examine.q:773 src/frequencies.q:1202
+#: src/frequencies.q:1508
  msgid "Missing"
  msgstr ""
  
  #: src/crosstabs.q:805 src/crosstabs.q:1008 src/crosstabs.q:1722
-#: src/examine.q:647 src/frequencies.q:1209 src/oneway.q:307 src/oneway.q:486
+#: src/examine.q:774 src/frequencies.q:1211 src/oneway.q:307 src/oneway.q:486
  msgid "Total"
  msgstr ""
  
-#: src/crosstabs.q:815 src/examine.q:723 src/frequencies.q:1504
-#: src/oneway.q:395 src/t-test.q:682 src/t-test.q:705 src/t-test.q:830
-#: src/t-test.q:1365
+#: src/crosstabs.q:815 src/examine.q:850 src/frequencies.q:1506
+#: src/oneway.q:395 src/t-test.q:689 src/t-test.q:712 src/t-test.q:837
+#: src/t-test.q:1372
  msgid "N"
  msgstr ""
  
-#: src/crosstabs.q:816 src/examine.q:726 src/frequencies.q:1136
-#: src/frequencies.q:1137 src/frequencies.q:1138
+#: src/crosstabs.q:816 src/examine.q:853 src/frequencies.q:1138
+#: src/frequencies.q:1139 src/frequencies.q:1140
  msgid "Percent"
  msgstr ""
  
@@ -3987,12 +4007,12 @@ msgid "adj. resid."
  msgstr ""
  
  #: src/crosstabs.q:1098 src/crosstabs.q:1125 src/crosstabs.q:1145
-#: src/crosstabs.q:1166 src/examine.q:1161
+#: src/crosstabs.q:1166 src/examine.q:1288
  msgid "Statistic"
  msgstr ""
  
-#: src/crosstabs.q:1100 src/oneway.q:278 src/oneway.q:707 src/t-test.q:980
-#: src/t-test.q:1172 src/t-test.q:1264
+#: src/crosstabs.q:1100 src/oneway.q:278 src/oneway.q:707 src/t-test.q:987
+#: src/t-test.q:1179 src/t-test.q:1271
  msgid "df"
  msgstr ""
  
@@ -4029,11 +4049,11 @@ msgstr ""
  msgid " 95%% Confidence Interval"
  msgstr ""
  
-#: src/crosstabs.q:1147 src/t-test.q:984 src/t-test.q:1169 src/t-test.q:1267
+#: src/crosstabs.q:1147 src/t-test.q:991 src/t-test.q:1176 src/t-test.q:1274
  msgid "Lower"
  msgstr ""
  
-#: src/crosstabs.q:1148 src/t-test.q:985 src/t-test.q:1170 src/t-test.q:1268
+#: src/crosstabs.q:1148 src/t-test.q:992 src/t-test.q:1177 src/t-test.q:1275
  msgid "Upper"
  msgstr ""
  
@@ -4170,91 +4190,99 @@ msgstr ""
  msgid "%s Dependent"
  msgstr ""
  
-#: src/examine.q:300 src/examine.q:312
+#: src/examine.q:418 src/examine.q:430
  #, c-format
  msgid "%s and %s are mutually exclusive"
  msgstr ""
  
-#: src/examine.q:705
+#: src/examine.q:832
  msgid "Case Processing Summary"
  msgstr ""
  
-#: src/examine.q:911
+#: src/examine.q:1038
  msgid "Extreme Values"
  msgstr ""
  
-#: src/examine.q:928
+#: src/examine.q:1055
  msgid "Case Number"
  msgstr ""
  
-#: src/examine.q:1016
+#: src/examine.q:1143
  msgid "Highest"
  msgstr ""
  
-#: src/examine.q:1021
+#: src/examine.q:1148
  msgid "Lowest"
  msgstr ""
  
-#: src/examine.q:1162 src/oneway.q:398 src/oneway.q:705
+#: src/examine.q:1289 src/oneway.q:398 src/oneway.q:705
  msgid "Std. Error"
  msgstr ""
  
-#: src/examine.q:1164 src/oneway.q:412
+#: src/examine.q:1291 src/oneway.q:412
  msgid "Descriptives"
  msgstr ""
  
-#: src/examine.q:1286 src/oneway.q:403
+#: src/examine.q:1418 src/oneway.q:403
  #, c-format
  msgid "%g%% Confidence Interval for Mean"
  msgstr ""
  
-#: src/examine.q:1292 src/oneway.q:405
+#: src/examine.q:1424 src/oneway.q:405
  msgid "Lower Bound"
  msgstr ""
  
-#: src/examine.q:1303 src/oneway.q:406
+#: src/examine.q:1435 src/oneway.q:406
  msgid "Upper Bound"
  msgstr ""
  
-#: src/examine.q:1315
+#: src/examine.q:1447
  msgid "5% Trimmed Mean"
  msgstr ""
  
-#: src/examine.q:1326 src/frequencies.q:114
+#: src/examine.q:1458 src/frequencies.q:114
  msgid "Median"
  msgstr ""
  
-#: src/examine.q:1343 src/oneway.q:397 src/t-test.q:684 src/t-test.q:707
-#: src/t-test.q:831 src/t-test.q:1167
+#: src/examine.q:1490 src/oneway.q:397 src/t-test.q:691 src/t-test.q:714
+#: src/t-test.q:838 src/t-test.q:1174
  msgid "Std. Deviation"
  msgstr ""
  
-#: src/examine.q:1391
+#: src/examine.q:1538
  msgid "Interquartile Range"
  msgstr ""
  
-#: src/examine.q:1459
+#: src/examine.q:1628
  #, c-format
  msgid "Normal Q-Q Plot of %s"
  msgstr ""
  
-#: src/examine.q:1460 src/examine.q:1466
+#: src/examine.q:1629 src/examine.q:1635
  msgid "Observed Value"
  msgstr ""
  
-#: src/examine.q:1461
+#: src/examine.q:1630
  msgid "Expected Normal"
  msgstr ""
  
-#: src/examine.q:1464
+#: src/examine.q:1633
  #, c-format
  msgid "Detrended Normal Q-Q Plot of %s"
  msgstr ""
  
-#: src/examine.q:1467
+#: src/examine.q:1636
  msgid "Dev from Normal"
  msgstr ""
  
+#: src/examine.q:1757 src/examine.q:1779 src/frequencies.q:1518
+msgid "Percentiles"
+msgstr ""
+
+#: src/examine.q:1904
+msgid "Tukey's Hinges"
+msgstr ""
+
  #: src/file-handle.q:122
  #, c-format
  msgid ""
@@ -4314,75 +4342,71 @@ msgstr ""
  msgid "S.E. Skew"
  msgstr ""
  
-#: src/frequencies.q:394
+#: src/frequencies.q:396
  msgid ""
  "At most one of BARCHART, HISTOGRAM, or HBAR should be given.  HBAR will be "
  "assumed.  Argument values will be given precedence increasing along the "
  "order given."
  msgstr ""
  
-#: src/frequencies.q:477
+#: src/frequencies.q:479
  #, c-format
  msgid ""
  "MAX must be greater than or equal to MIN, if both are specified.  However, "
  "MIN was specified as %g and MAX as %g.  MIN and MAX will be ignored."
  msgstr ""
  
-#: src/frequencies.q:798
+#: src/frequencies.q:800
  msgid ""
  "Upper limit of integer mode value range must be greater than lower limit."
  msgstr ""
  
-#: src/frequencies.q:811
+#: src/frequencies.q:813
  #, c-format
  msgid "Variable %s specified multiple times on VARIABLES subcommand."
  msgstr ""
  
-#: src/frequencies.q:817
+#: src/frequencies.q:819
  #, c-format
  msgid "Integer mode specified, but %s is not a numeric variable."
  msgstr ""
  
-#: src/frequencies.q:883
+#: src/frequencies.q:885
  msgid "`)' expected after GROUPED interval list."
  msgstr ""
  
-#: src/frequencies.q:895
+#: src/frequencies.q:897
  #, c-format
  msgid "Variables %s specified on GROUPED but not on VARIABLES."
  msgstr ""
  
-#: src/frequencies.q:902
+#: src/frequencies.q:904
  #, c-format
  msgid "Variables %s specified multiple times on GROUPED subcommand."
  msgstr ""
  
-#: src/frequencies.q:1133 src/frequencies.q:1225 src/frequencies.q:1226
-#: src/frequencies.q:1258
+#: src/frequencies.q:1135 src/frequencies.q:1227 src/frequencies.q:1228
+#: src/frequencies.q:1260
  msgid "Cum"
  msgstr ""
  
-#: src/frequencies.q:1155
+#: src/frequencies.q:1157
  msgid "Value Label"
  msgstr ""
  
-#: src/frequencies.q:1256
+#: src/frequencies.q:1258
  msgid "Freq"
  msgstr ""
  
-#: src/frequencies.q:1257 src/frequencies.q:1259
+#: src/frequencies.q:1259 src/frequencies.q:1261
  msgid "Pct"
  msgstr ""
  
-#: src/frequencies.q:1478
+#: src/frequencies.q:1480
  #, c-format
  msgid "No valid data for variable %s; statistics not displayed."
  msgstr ""
  
-#: src/frequencies.q:1516
-msgid "Percentiles"
-msgstr ""
-
  #: src/list.q:150
  #, c-format
  msgid ""
@@ -4432,7 +4456,7 @@ msgstr ""
  msgid "Coefficients for contrast %d do not total zero"
  msgstr ""
  
-#: src/oneway.q:242 src/t-test.q:366 src/t-test.q:451
+#: src/oneway.q:242 src/t-test.q:366 src/t-test.q:458
  #, c-format
  msgid "`%s' is not a variable name"
  msgstr ""
@@ -4445,7 +4469,7 @@ msgstr ""
  msgid "Mean Square"
  msgstr ""
  
-#: src/oneway.q:280 src/t-test.q:977
+#: src/oneway.q:280 src/t-test.q:984
  msgid "F"
  msgstr ""
  
@@ -4497,11 +4521,11 @@ msgstr ""
  msgid "Value of Contrast"
  msgstr ""
  
-#: src/oneway.q:706 src/t-test.q:979 src/t-test.q:1171 src/t-test.q:1263
+#: src/oneway.q:706 src/t-test.q:986 src/t-test.q:1178 src/t-test.q:1270
  msgid "t"
  msgstr ""
  
-#: src/oneway.q:708 src/t-test.q:981 src/t-test.q:1173 src/t-test.q:1265
+#: src/oneway.q:708 src/t-test.q:988 src/t-test.q:1180 src/t-test.q:1272
  msgid "Sig. (2-tailed)"
  msgstr ""
  
@@ -4662,111 +4686,110 @@ msgstr ""
  msgid "Long string variable %s is not valid here."
  msgstr ""
  
-#: src/t-test.q:399
+#: src/t-test.q:399 src/t-test.q:414
  msgid ""
-"When applying GROUPS to a string variable, at least one value must be "
-"specified."
+"When applying GROUPS to a string variable, two values must be specified."
  msgstr ""
  
-#: src/t-test.q:486
+#: src/t-test.q:493
  #, c-format
  msgid ""
  "PAIRED was specified but the number of variables preceding WITH (%d) did not "
  "match the number following (%d)."
  msgstr ""
  
-#: src/t-test.q:503
+#: src/t-test.q:510
  msgid "At least two variables must be specified on PAIRS."
  msgstr ""
  
-#: src/t-test.q:680
+#: src/t-test.q:687
  msgid "One-Sample Statistics"
  msgstr ""
  
-#: src/t-test.q:685 src/t-test.q:708 src/t-test.q:832
+#: src/t-test.q:692 src/t-test.q:715 src/t-test.q:839
  msgid "SE. Mean"
  msgstr ""
  
-#: src/t-test.q:702
+#: src/t-test.q:709
  msgid "Group Statistics"
  msgstr ""
  
-#: src/t-test.q:826
+#: src/t-test.q:833
  msgid "Paired Sample Statistics"
  msgstr ""
  
-#: src/t-test.q:848 src/t-test.q:1192 src/t-test.q:1382
+#: src/t-test.q:855 src/t-test.q:1199 src/t-test.q:1389
  #, c-format
  msgid "Pair %d"
  msgstr ""
  
-#: src/t-test.q:965
+#: src/t-test.q:972
  msgid "Independent Samples Test"
  msgstr ""
  
-#: src/t-test.q:973
+#: src/t-test.q:980
  msgid "Levene's Test for Equality of Variances"
  msgstr ""
  
-#: src/t-test.q:975
+#: src/t-test.q:982
  msgid "t-test for Equality of Means"
  msgstr ""
  
-#: src/t-test.q:978 src/t-test.q:1367
+#: src/t-test.q:985 src/t-test.q:1374
  msgid "Sig."
  msgstr ""
  
-#: src/t-test.q:982 src/t-test.q:1266
+#: src/t-test.q:989 src/t-test.q:1273
  msgid "Mean Difference"
  msgstr ""
  
-#: src/t-test.q:983
+#: src/t-test.q:990
  msgid "Std. Error Difference"
  msgstr ""
  
-#: src/t-test.q:988 src/t-test.q:1163 src/t-test.q:1258
+#: src/t-test.q:995 src/t-test.q:1170 src/t-test.q:1265
  #, c-format
  msgid "%g%% Confidence Interval of the Difference"
  msgstr ""
  
-#: src/t-test.q:1043
+#: src/t-test.q:1050
  msgid "Equal variances assumed"
  msgstr ""
  
-#: src/t-test.q:1095
+#: src/t-test.q:1102
  msgid "Equal variances not assumed"
  msgstr ""
  
-#: src/t-test.q:1153
+#: src/t-test.q:1160
  msgid "Paired Samples Test"
  msgstr ""
  
-#: src/t-test.q:1156
+#: src/t-test.q:1163
  msgid "Paired Differences"
  msgstr ""
  
-#: src/t-test.q:1168
+#: src/t-test.q:1175
  msgid "Std. Error Mean"
  msgstr ""
  
-#: src/t-test.q:1247
+#: src/t-test.q:1254
  msgid "One-Sample Test"
  msgstr ""
  
-#: src/t-test.q:1252
+#: src/t-test.q:1259
  #, c-format
  msgid "Test Value = %f"
  msgstr ""
  
-#: src/t-test.q:1362
+#: src/t-test.q:1369
  msgid "Paired Samples Correlations"
  msgstr ""
  
-#: src/t-test.q:1366
+#: src/t-test.q:1373
  msgid "Correlation"
  msgstr ""
  
-#: src/t-test.q:1385
+#: src/t-test.q:1392
  #, c-format
  msgid "%s & %s"
  msgstr ""
diff --git a/po/pspp.pot b/po/pspp.pot

index a74ef27a9c83735b018937a073b1755bdbfb4cd7..c3fbf68608c0597be8892bcafb63d6faebb8e5a6 100644 (file)
--- a/po/pspp.pot
+++ b/po/pspp.pot
@@ -8,7 +8,7 @@ msgid ""
  msgstr ""
  "Project-Id-Version: PACKAGE VERSION\n"
  "Report-Msgid-Bugs-To: pspp-dev@gnu.org\n"
-"POT-Creation-Date: 2004-12-02 19:27+0800\n"
+"POT-Creation-Date: 2004-12-29 08:18+0800\n"
  "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
  "Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
  "Language-Team: LANGUAGE <LL@li.org>\n"
@@ -958,8 +958,8 @@ msgstr ""
  msgid "Only USE ALL is currently implemented."
  msgstr ""
  
-#: src/descript.c:99 src/examine.q:1268 src/frequencies.q:112 src/oneway.q:396
-#: src/t-test.q:683 src/t-test.q:706 src/t-test.q:829 src/t-test.q:1166
+#: src/descript.c:99 src/examine.q:1400 src/frequencies.q:112 src/oneway.q:396
+#: src/t-test.q:690 src/t-test.q:713 src/t-test.q:836 src/t-test.q:1173
  msgid "Mean"
  msgstr ""
  
@@ -971,11 +971,11 @@ msgstr ""
  msgid "Std Dev"
  msgstr ""
  
-#: src/descript.c:102 src/examine.q:1331 src/frequencies.q:117
+#: src/descript.c:102 src/examine.q:1478 src/frequencies.q:117
  msgid "Variance"
  msgstr ""
  
-#: src/descript.c:103 src/examine.q:1416 src/frequencies.q:118
+#: src/descript.c:103 src/examine.q:1585 src/frequencies.q:118
  msgid "Kurtosis"
  msgstr ""
  
@@ -983,7 +983,7 @@ msgstr ""
  msgid "S E Kurt"
  msgstr ""
  
-#: src/descript.c:105 src/examine.q:1396 src/frequencies.q:120
+#: src/descript.c:105 src/examine.q:1565 src/frequencies.q:120
  msgid "Skewness"
  msgstr ""
  
@@ -991,16 +991,16 @@ msgstr ""
  msgid "S E Skew"
  msgstr ""
  
-#: src/descript.c:107 src/examine.q:1379 src/frequencies.q:122
+#: src/descript.c:107 src/examine.q:1526 src/frequencies.q:122
  msgid "Range"
  msgstr ""
  
-#: src/descript.c:108 src/examine.q:1356 src/frequencies.q:123
+#: src/descript.c:108 src/examine.q:1503 src/frequencies.q:123
  #: src/oneway.q:408
  msgid "Minimum"
  msgstr ""
  
-#: src/descript.c:109 src/examine.q:1367 src/frequencies.q:124
+#: src/descript.c:109 src/examine.q:1514 src/frequencies.q:124
  #: src/oneway.q:409
  msgid "Maximum"
  msgstr ""
@@ -1985,16 +1985,16 @@ msgstr ""
  msgid "<<fallback>>"
  msgstr ""
  
-#: src/hash.c:517
+#: src/hash.c:519
  #, c-format
  msgid "hash table:"
  msgstr ""
  
-#: src/histogram.c:138
+#: src/histogram.c:115
  msgid "HISTOGRAM"
  msgstr ""
  
-#: src/histogram.c:140 src/frequencies.q:1135
+#: src/histogram.c:117 src/frequencies.q:1137
  msgid "Frequency"
  msgstr ""
  
@@ -2714,6 +2714,26 @@ msgstr ""
  msgid "Error opening page on %s device of %s class."
  msgstr ""
  
+#: src/percentiles.c:38
+msgid "HAverage"
+msgstr ""
+
+#: src/percentiles.c:39
+msgid "Weighted Average"
+msgstr ""
+
+#: src/percentiles.c:40
+msgid "Rounded"
+msgstr ""
+
+#: src/percentiles.c:41
+msgid "Empirical"
+msgstr ""
+
+#: src/percentiles.c:42
+msgid "Empirical with averaging"
+msgstr ""
+
  #: src/permissions.c:75
  #, c-format
  msgid "Expecting %s or %s."
@@ -3703,7 +3723,7 @@ msgstr ""
  
  #: src/sysfile-info.c:531 src/vfm.c:875 src/crosstabs.q:1099
  #: src/crosstabs.q:1126 src/crosstabs.q:1146 src/crosstabs.q:1168
-#: src/examine.q:927 src/frequencies.q:1134 src/frequencies.q:1255
+#: src/examine.q:1054 src/frequencies.q:1136 src/frequencies.q:1257
  msgid "Value"
  msgstr ""
  
@@ -3924,33 +3944,33 @@ msgstr ""
  msgid "Summary."
  msgstr ""
  
-#: src/crosstabs.q:802 src/examine.q:711
+#: src/crosstabs.q:802 src/examine.q:838
  msgid "Cases"
  msgstr ""
  
-#: src/crosstabs.q:803 src/examine.q:645 src/frequencies.q:1132
-#: src/frequencies.q:1505
+#: src/crosstabs.q:803 src/examine.q:772 src/frequencies.q:1134
+#: src/frequencies.q:1507
  msgid "Valid"
  msgstr ""
  
-#: src/crosstabs.q:804 src/examine.q:646 src/frequencies.q:1200
-#: src/frequencies.q:1506
+#: src/crosstabs.q:804 src/examine.q:773 src/frequencies.q:1202
+#: src/frequencies.q:1508
  msgid "Missing"
  msgstr ""
  
  #: src/crosstabs.q:805 src/crosstabs.q:1008 src/crosstabs.q:1722
-#: src/examine.q:647 src/frequencies.q:1209 src/oneway.q:307 src/oneway.q:486
+#: src/examine.q:774 src/frequencies.q:1211 src/oneway.q:307 src/oneway.q:486
  msgid "Total"
  msgstr ""
  
-#: src/crosstabs.q:815 src/examine.q:723 src/frequencies.q:1504
-#: src/oneway.q:395 src/t-test.q:682 src/t-test.q:705 src/t-test.q:830
-#: src/t-test.q:1365
+#: src/crosstabs.q:815 src/examine.q:850 src/frequencies.q:1506
+#: src/oneway.q:395 src/t-test.q:689 src/t-test.q:712 src/t-test.q:837
+#: src/t-test.q:1372
  msgid "N"
  msgstr ""
  
-#: src/crosstabs.q:816 src/examine.q:726 src/frequencies.q:1136
-#: src/frequencies.q:1137 src/frequencies.q:1138
+#: src/crosstabs.q:816 src/examine.q:853 src/frequencies.q:1138
+#: src/frequencies.q:1139 src/frequencies.q:1140
  msgid "Percent"
  msgstr ""
  
@@ -3987,12 +4007,12 @@ msgid "adj. resid."
  msgstr ""
  
  #: src/crosstabs.q:1098 src/crosstabs.q:1125 src/crosstabs.q:1145
-#: src/crosstabs.q:1166 src/examine.q:1161
+#: src/crosstabs.q:1166 src/examine.q:1288
  msgid "Statistic"
  msgstr ""
  
-#: src/crosstabs.q:1100 src/oneway.q:278 src/oneway.q:707 src/t-test.q:980
-#: src/t-test.q:1172 src/t-test.q:1264
+#: src/crosstabs.q:1100 src/oneway.q:278 src/oneway.q:707 src/t-test.q:987
+#: src/t-test.q:1179 src/t-test.q:1271
  msgid "df"
  msgstr ""
  
@@ -4029,11 +4049,11 @@ msgstr ""
  msgid " 95%% Confidence Interval"
  msgstr ""
  
-#: src/crosstabs.q:1147 src/t-test.q:984 src/t-test.q:1169 src/t-test.q:1267
+#: src/crosstabs.q:1147 src/t-test.q:991 src/t-test.q:1176 src/t-test.q:1274
  msgid "Lower"
  msgstr ""
  
-#: src/crosstabs.q:1148 src/t-test.q:985 src/t-test.q:1170 src/t-test.q:1268
+#: src/crosstabs.q:1148 src/t-test.q:992 src/t-test.q:1177 src/t-test.q:1275
  msgid "Upper"
  msgstr ""
  
@@ -4170,91 +4190,99 @@ msgstr ""
  msgid "%s Dependent"
  msgstr ""
  
-#: src/examine.q:300 src/examine.q:312
+#: src/examine.q:418 src/examine.q:430
  #, c-format
  msgid "%s and %s are mutually exclusive"
  msgstr ""
  
-#: src/examine.q:705
+#: src/examine.q:832
  msgid "Case Processing Summary"
  msgstr ""
  
-#: src/examine.q:911
+#: src/examine.q:1038
  msgid "Extreme Values"
  msgstr ""
  
-#: src/examine.q:928
+#: src/examine.q:1055
  msgid "Case Number"
  msgstr ""
  
-#: src/examine.q:1016
+#: src/examine.q:1143
  msgid "Highest"
  msgstr ""
  
-#: src/examine.q:1021
+#: src/examine.q:1148
  msgid "Lowest"
  msgstr ""
  
-#: src/examine.q:1162 src/oneway.q:398 src/oneway.q:705
+#: src/examine.q:1289 src/oneway.q:398 src/oneway.q:705
  msgid "Std. Error"
  msgstr ""
  
-#: src/examine.q:1164 src/oneway.q:412
+#: src/examine.q:1291 src/oneway.q:412
  msgid "Descriptives"
  msgstr ""
  
-#: src/examine.q:1286 src/oneway.q:403
+#: src/examine.q:1418 src/oneway.q:403
  #, c-format
  msgid "%g%% Confidence Interval for Mean"
  msgstr ""
  
-#: src/examine.q:1292 src/oneway.q:405
+#: src/examine.q:1424 src/oneway.q:405
  msgid "Lower Bound"
  msgstr ""
  
-#: src/examine.q:1303 src/oneway.q:406
+#: src/examine.q:1435 src/oneway.q:406
  msgid "Upper Bound"
  msgstr ""
  
-#: src/examine.q:1315
+#: src/examine.q:1447
  msgid "5% Trimmed Mean"
  msgstr ""
  
-#: src/examine.q:1326 src/frequencies.q:114
+#: src/examine.q:1458 src/frequencies.q:114
  msgid "Median"
  msgstr ""
  
-#: src/examine.q:1343 src/oneway.q:397 src/t-test.q:684 src/t-test.q:707
-#: src/t-test.q:831 src/t-test.q:1167
+#: src/examine.q:1490 src/oneway.q:397 src/t-test.q:691 src/t-test.q:714
+#: src/t-test.q:838 src/t-test.q:1174
  msgid "Std. Deviation"
  msgstr ""
  
-#: src/examine.q:1391
+#: src/examine.q:1538
  msgid "Interquartile Range"
  msgstr ""
  
-#: src/examine.q:1459
+#: src/examine.q:1628
  #, c-format
  msgid "Normal Q-Q Plot of %s"
  msgstr ""
  
-#: src/examine.q:1460 src/examine.q:1466
+#: src/examine.q:1629 src/examine.q:1635
  msgid "Observed Value"
  msgstr ""
  
-#: src/examine.q:1461
+#: src/examine.q:1630
  msgid "Expected Normal"
  msgstr ""
  
-#: src/examine.q:1464
+#: src/examine.q:1633
  #, c-format
  msgid "Detrended Normal Q-Q Plot of %s"
  msgstr ""
  
-#: src/examine.q:1467
+#: src/examine.q:1636
  msgid "Dev from Normal"
  msgstr ""
  
+#: src/examine.q:1757 src/examine.q:1779 src/frequencies.q:1518
+msgid "Percentiles"
+msgstr ""
+
+#: src/examine.q:1904
+msgid "Tukey's Hinges"
+msgstr ""
+
  #: src/file-handle.q:122
  #, c-format
  msgid ""
@@ -4314,75 +4342,71 @@ msgstr ""
  msgid "S.E. Skew"
  msgstr ""
  
-#: src/frequencies.q:394
+#: src/frequencies.q:396
  msgid ""
  "At most one of BARCHART, HISTOGRAM, or HBAR should be given.  HBAR will be "
  "assumed.  Argument values will be given precedence increasing along the "
  "order given."
  msgstr ""
  
-#: src/frequencies.q:477
+#: src/frequencies.q:479
  #, c-format
  msgid ""
  "MAX must be greater than or equal to MIN, if both are specified.  However, "
  "MIN was specified as %g and MAX as %g.  MIN and MAX will be ignored."
  msgstr ""
  
-#: src/frequencies.q:798
+#: src/frequencies.q:800
  msgid ""
  "Upper limit of integer mode value range must be greater than lower limit."
  msgstr ""
  
-#: src/frequencies.q:811
+#: src/frequencies.q:813
  #, c-format
  msgid "Variable %s specified multiple times on VARIABLES subcommand."
  msgstr ""
  
-#: src/frequencies.q:817
+#: src/frequencies.q:819
  #, c-format
  msgid "Integer mode specified, but %s is not a numeric variable."
  msgstr ""
  
-#: src/frequencies.q:883
+#: src/frequencies.q:885
  msgid "`)' expected after GROUPED interval list."
  msgstr ""
  
-#: src/frequencies.q:895
+#: src/frequencies.q:897
  #, c-format
  msgid "Variables %s specified on GROUPED but not on VARIABLES."
  msgstr ""
  
-#: src/frequencies.q:902
+#: src/frequencies.q:904
  #, c-format
  msgid "Variables %s specified multiple times on GROUPED subcommand."
  msgstr ""
  
-#: src/frequencies.q:1133 src/frequencies.q:1225 src/frequencies.q:1226
-#: src/frequencies.q:1258
+#: src/frequencies.q:1135 src/frequencies.q:1227 src/frequencies.q:1228
+#: src/frequencies.q:1260
  msgid "Cum"
  msgstr ""
  
-#: src/frequencies.q:1155
+#: src/frequencies.q:1157
  msgid "Value Label"
  msgstr ""
  
-#: src/frequencies.q:1256
+#: src/frequencies.q:1258
  msgid "Freq"
  msgstr ""
  
-#: src/frequencies.q:1257 src/frequencies.q:1259
+#: src/frequencies.q:1259 src/frequencies.q:1261
  msgid "Pct"
  msgstr ""
  
-#: src/frequencies.q:1478
+#: src/frequencies.q:1480
  #, c-format
  msgid "No valid data for variable %s; statistics not displayed."
  msgstr ""
  
-#: src/frequencies.q:1516
-msgid "Percentiles"
-msgstr ""
-
  #: src/list.q:150
  #, c-format
  msgid ""
@@ -4432,7 +4456,7 @@ msgstr ""
  msgid "Coefficients for contrast %d do not total zero"
  msgstr ""
  
-#: src/oneway.q:242 src/t-test.q:366 src/t-test.q:451
+#: src/oneway.q:242 src/t-test.q:366 src/t-test.q:458
  #, c-format
  msgid "`%s' is not a variable name"
  msgstr ""
@@ -4445,7 +4469,7 @@ msgstr ""
  msgid "Mean Square"
  msgstr ""
  
-#: src/oneway.q:280 src/t-test.q:977
+#: src/oneway.q:280 src/t-test.q:984
  msgid "F"
  msgstr ""
  
@@ -4497,11 +4521,11 @@ msgstr ""
  msgid "Value of Contrast"
  msgstr ""
  
-#: src/oneway.q:706 src/t-test.q:979 src/t-test.q:1171 src/t-test.q:1263
+#: src/oneway.q:706 src/t-test.q:986 src/t-test.q:1178 src/t-test.q:1270
  msgid "t"
  msgstr ""
  
-#: src/oneway.q:708 src/t-test.q:981 src/t-test.q:1173 src/t-test.q:1265
+#: src/oneway.q:708 src/t-test.q:988 src/t-test.q:1180 src/t-test.q:1272
  msgid "Sig. (2-tailed)"
  msgstr ""
  
@@ -4662,111 +4686,110 @@ msgstr ""
  msgid "Long string variable %s is not valid here."
  msgstr ""
  
-#: src/t-test.q:399
+#: src/t-test.q:399 src/t-test.q:414
  msgid ""
-"When applying GROUPS to a string variable, at least one value must be "
-"specified."
+"When applying GROUPS to a string variable, two values must be specified."
  msgstr ""
  
-#: src/t-test.q:486
+#: src/t-test.q:493
  #, c-format
  msgid ""
  "PAIRED was specified but the number of variables preceding WITH (%d) did not "
  "match the number following (%d)."
  msgstr ""
  
-#: src/t-test.q:503
+#: src/t-test.q:510
  msgid "At least two variables must be specified on PAIRS."
  msgstr ""
  
-#: src/t-test.q:680
+#: src/t-test.q:687
  msgid "One-Sample Statistics"
  msgstr ""
  
-#: src/t-test.q:685 src/t-test.q:708 src/t-test.q:832
+#: src/t-test.q:692 src/t-test.q:715 src/t-test.q:839
  msgid "SE. Mean"
  msgstr ""
  
-#: src/t-test.q:702
+#: src/t-test.q:709
  msgid "Group Statistics"
  msgstr ""
  
-#: src/t-test.q:826
+#: src/t-test.q:833
  msgid "Paired Sample Statistics"
  msgstr ""
  
-#: src/t-test.q:848 src/t-test.q:1192 src/t-test.q:1382
+#: src/t-test.q:855 src/t-test.q:1199 src/t-test.q:1389
  #, c-format
  msgid "Pair %d"
  msgstr ""
  
-#: src/t-test.q:965
+#: src/t-test.q:972
  msgid "Independent Samples Test"
  msgstr ""
  
-#: src/t-test.q:973
+#: src/t-test.q:980
  msgid "Levene's Test for Equality of Variances"
  msgstr ""
  
-#: src/t-test.q:975
+#: src/t-test.q:982
  msgid "t-test for Equality of Means"
  msgstr ""
  
-#: src/t-test.q:978 src/t-test.q:1367
+#: src/t-test.q:985 src/t-test.q:1374
  msgid "Sig."
  msgstr ""
  
-#: src/t-test.q:982 src/t-test.q:1266
+#: src/t-test.q:989 src/t-test.q:1273
  msgid "Mean Difference"
  msgstr ""
  
-#: src/t-test.q:983
+#: src/t-test.q:990
  msgid "Std. Error Difference"
  msgstr ""
  
-#: src/t-test.q:988 src/t-test.q:1163 src/t-test.q:1258
+#: src/t-test.q:995 src/t-test.q:1170 src/t-test.q:1265
  #, c-format
  msgid "%g%% Confidence Interval of the Difference"
  msgstr ""
  
-#: src/t-test.q:1043
+#: src/t-test.q:1050
  msgid "Equal variances assumed"
  msgstr ""
  
-#: src/t-test.q:1095
+#: src/t-test.q:1102
  msgid "Equal variances not assumed"
  msgstr ""
  
-#: src/t-test.q:1153
+#: src/t-test.q:1160
  msgid "Paired Samples Test"
  msgstr ""
  
-#: src/t-test.q:1156
+#: src/t-test.q:1163
  msgid "Paired Differences"
  msgstr ""
  
-#: src/t-test.q:1168
+#: src/t-test.q:1175
  msgid "Std. Error Mean"
  msgstr ""
  
-#: src/t-test.q:1247
+#: src/t-test.q:1254
  msgid "One-Sample Test"
  msgstr ""
  
-#: src/t-test.q:1252
+#: src/t-test.q:1259
  #, c-format
  msgid "Test Value = %f"
  msgstr ""
  
-#: src/t-test.q:1362
+#: src/t-test.q:1369
  msgid "Paired Samples Correlations"
  msgstr ""
  
-#: src/t-test.q:1366
+#: src/t-test.q:1373
  msgid "Correlation"
  msgstr ""
  
-#: src/t-test.q:1385
+#: src/t-test.q:1392
  #, c-format
  msgid "%s & %s"
  msgstr ""
diff --git a/src/ChangeLog b/src/ChangeLog

index ad2e7b248e784b4a11a5b2afc090ef2111bf8100..b2ec733528199fb3acaf783f6de40838faeb1a3a 100644 (file)
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,9 @@
+Wed Dec 29 08:18:08 WST 2004 John Darrington <john@darrington.wattle.id.au>
+
+       * percentiles.[ch] Added. Calculates percentiles and Tukey hinges
+
+       * examine.q factor_stats.[ch]  Added calculation of percentiles
+
  Fri Dec 24 15:09:11 WST 2004 John Darrington <john@darrington.wattle.id.au>
  
         * t-test.q Fixed bug #11227 Made t-test work when the independent
diff --git a/src/Makefile.am b/src/Makefile.am

index af8608617c2ea66ba4c863000377c95f4d771104..1afb336efb407a1000d63fa50a08f9a8b0bf9b07 100644 (file)
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -64,7 +64,8 @@ groff-font.c group.c group.h group_proc.h \
  hash.c hash.h html.c htmlP.h include.c inpt-pgm.c lexer.c      \
  lexer.h levene.c levene.h log.h loop.c magic.c magic.h main.c main.h   \
  matrix-data.c mis-val.c misc.c misc.h modify-vars.c                    \
-moments.c moments.h numeric.c output.c output.h permissions.c \
+moments.c moments.h numeric.c output.c output.h \
+percentiles.c percentiles.h permissions.c \
  pfm-read.c pfm-read.h  \
  pfm-write.c pfm-write.h \
  pool.c pool.h postscript.c print.c recode.c    \
diff --git a/src/examine.q b/src/examine.q

index d426ecca1e549c5a83dd4d512bfd9b73bcbf0ef9..97a63a902642aa9d69386034dc9c6c6f395d4156 100644 (file)
--- a/src/examine.q
+++ b/src/examine.q
@@ -42,6 +42,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  #include "casefile.h"
  #include "factor_stats.h"
  #include "moments.h"
+#include "percentiles.h"
  
  /* (headers) */
  #include "chart.h"
@@ -55,6 +56,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
     rep:report/!noreport,
     incl:include/!exclude;
     +compare=cmp:variables/!groups;
+   +percentiles=custom;
     +plot[plt_]=stemleaf,boxplot,npplot,:spreadlevel(*d:n),histogram,all,none;
     +cinterval=double;
     +statistics[st_]=descriptives,:extreme(*d:n),all,none.
@@ -112,6 +114,11 @@ static void show_descriptives(struct variable **dependent_var,
                               int n_dep_var, 
                               struct factor *factor);
  
+static void show_percentiles(struct variable **dependent_var, 
+                             int n_dep_var, 
+                             struct factor *factor);
+
+
  
  void np_plot(const struct metrics *m, const char *factorname);
  
@@ -131,10 +138,22 @@ void factor_calc(struct ccase *c, int case_no,
  static is_missing_func value_is_missing;
  
  
+/* PERCENTILES */
+
+static subc_list_double percentile_list;
+
+static enum pc_alg percentile_algorithm;
+
+static short sbc_percentile;
+
+
  int
  cmd_examine(void)
  {
  
+  subc_list_double_create(&percentile_list);
+  percentile_algorithm = PC_HAVERAGE;
+
    if ( !parse_examine(&cmd) )
      return CMD_FAILURE;
  
@@ -150,11 +169,23 @@ cmd_examine(void)
    if ( ! cmd.sbc_cinterval) 
      cmd.n_cinterval[0] = 95.0;
  
+
+  /* If descriptives have been requested, make sure the 
+     quartiles are calculated */
+  if ( cmd.a_statistics[XMN_ST_DESCRIPTIVES] )
+    {
+      subc_list_double_push(&percentile_list, 25);
+      subc_list_double_push(&percentile_list, 50);
+      subc_list_double_push(&percentile_list, 75);
+    }
+
    multipass_procedure_with_splits (run_examine, &cmd);
  
    if ( totals ) 
      free(totals);
  
+  subc_list_double_destroy(&percentile_list);
+
    return CMD_SUCCESS;
  };
  
@@ -180,6 +211,8 @@ output_examine(void)
             show_descriptives(dependent_vars, n_dependent_vars, 0);
  
         }
+      if ( sbc_percentile ) 
+       show_percentiles(dependent_vars, n_dependent_vars, 0);
  
        if ( cmd.sbc_plot) 
         {
@@ -208,7 +241,6 @@ output_examine(void)
  
         }
  
-
      }
  
  
@@ -227,6 +259,10 @@ output_examine(void)
             show_descriptives(dependent_vars, n_dependent_vars, fctr);
         }
  
+      if ( sbc_percentile ) 
+       show_percentiles(dependent_vars, n_dependent_vars, fctr);
+
+
        if ( cmd.sbc_plot) 
         {
           int v;
@@ -290,6 +326,88 @@ output_examine(void)
  }
  
  
+static struct hsh_table *
+list_to_ptile_hash(const subc_list_double *l)
+{
+  int i;
+  
+  struct hsh_table *h ; 
+
+  h = hsh_create(subc_list_double_count(l), 
+                (hsh_compare_func *) ptile_compare,
+                (hsh_hash_func *) ptile_hash, 
+                (hsh_free_func *) free,
+                0);
+
+
+  for ( i = 0 ; i < subc_list_double_count(l) ; ++i )
+    {
+      struct percentile *p = xmalloc (sizeof (struct percentile));
+      
+      p->p = subc_list_double_at(l,i);
+
+      hsh_insert(h, p);
+
+    }
+
+  return h;
+
+}
+
+/* Parse the PERCENTILES subcommand */
+static int
+xmn_custom_percentiles(struct cmd_examine *p UNUSED)
+{
+  sbc_percentile = 1;
+
+  lex_match('=');
+
+  lex_match('(');
+
+  while ( lex_double_p() ) 
+    {
+      subc_list_double_push(&percentile_list,lex_double());
+
+      lex_get();
+
+      lex_match(',') ;
+    }
+  lex_match(')');
+
+  lex_match('=');
+
+  if ( lex_match_id("HAVERAGE"))
+    percentile_algorithm = PC_HAVERAGE; 
+
+  else if ( lex_match_id("WAVERAGE"))
+    percentile_algorithm = PC_WAVERAGE; 
+
+  else if ( lex_match_id("ROUND"))
+    percentile_algorithm = PC_ROUND;
+
+  else if ( lex_match_id("EMPIRICAL"))
+    percentile_algorithm = PC_EMPIRICAL;
+
+  else if ( lex_match_id("AEMPIRICAL"))
+    percentile_algorithm = PC_AEMPIRICAL; 
+
+  else if ( lex_match_id("NONE"))
+    percentile_algorithm = PC_NONE; 
+
+
+  if ( 0 == subc_list_double_count(&percentile_list))
+    {
+      subc_list_double_push(&percentile_list, 5);
+      subc_list_double_push(&percentile_list, 10);
+      subc_list_double_push(&percentile_list, 25);
+      subc_list_double_push(&percentile_list, 50);
+      subc_list_double_push(&percentile_list, 75);
+      subc_list_double_push(&percentile_list, 90);
+      subc_list_double_push(&percentile_list, 95);
+    }
+
+  return 1;
+}
  
  /* TOTAL and NOTOTAL are simple, mutually exclusive flags */
  static int
@@ -399,6 +517,9 @@ examine_parse_independent_vars(struct cmd_examine *cmd)
  
  
  
+void populate_percentiles(struct tab_table *tbl, int col, int row, 
+                         const struct metrics *m);
+
  void populate_descriptives(struct tab_table *t, int col, int row, 
                            const struct metrics *fs);
  
@@ -552,11 +673,17 @@ run_examine(const struct casefile *cf, void *cmd_ )
                 fs != 0 ;
                 fs = hsh_next(fctr->fstats, &hi))
             {
+             
+             fs->m[v].ptile_hash = list_to_ptile_hash(&percentile_list);
+             fs->m[v].ptile_alg = percentile_algorithm;
               metrics_postcalc(&fs->m[v]);
             }
  
           fctr = fctr->next;
         }
+
+      totals[v].ptile_hash = list_to_ptile_hash(&percentile_list);
+      totals[v].ptile_alg = percentile_algorithm;
        metrics_postcalc(&totals[v]);
      }
  
@@ -1252,6 +1379,11 @@ show_descriptives(struct variable **dependent_var,
  
  
  
+
+
+
+
+
  /* Fill in the descriptives data */
  void
  populate_descriptives(struct tab_table *tbl, int col, int row, 
@@ -1325,6 +1457,21 @@ populate_descriptives(struct tab_table *tbl, int col, int row,
             TAB_LEFT | TAT_TITLE,
             _("Median"));
  
+  {
+    struct percentile *p;
+    double d = 50;
+    
+    p = hsh_find(m->ptile_hash, &d);
+    
+    assert(p);
+
+    tab_float (tbl, col + 2, 
+              row + 4,
+              TAB_CENTER,
+              p->v,
+              8, 2);
+  }
+
    tab_text (tbl, col, 
             row + 5,
             TAB_LEFT | TAT_TITLE,
@@ -1390,6 +1537,28 @@ populate_descriptives(struct tab_table *tbl, int col, int row,
             TAB_LEFT | TAT_TITLE,
             _("Interquartile Range"));
  
+  {
+    struct percentile *p1;
+    struct percentile *p2;
+
+    double d = 75;
+    p1 = hsh_find(m->ptile_hash, &d);
+
+    d = 25;
+    p2 = hsh_find(m->ptile_hash, &d);
+
+    assert(p1);
+    assert(p2);
+
+    tab_float (tbl, col + 2, 
+              row + 10,
+              TAB_CENTER,
+              p1->v - p2->v,
+              8, 2);
+  }
+
+
+
    tab_text (tbl, col, 
             row + 11,
             TAB_LEFT | TAT_TITLE,
@@ -1518,3 +1687,259 @@ np_plot(const struct metrics *m, const char *factorname)
    chart_finalise(&dnp_chart);
  
  }
+
+
+
+
+/* Show the percentiles */
+void
+show_percentiles(struct variable **dependent_var, 
+                 int n_dep_var, 
+                 struct factor *fctr)
+{
+  struct tab_table *tbl;
+  int i;
+  
+  int n_cols, n_rows;
+  int n_factors;
+
+  struct hsh_table *ptiles ;
+
+  int n_heading_columns;
+  const int n_heading_rows = 2;
+  const int n_stat_rows = 2;
+
+  int n_ptiles ;
+
+  if ( fctr )
+    {
+      struct factor_statistics **fs = fctr->fs ; 
+      n_heading_columns = 3;
+      n_factors = hsh_count(fctr->fstats);
+
+      ptiles = (*fs)->m[0].ptile_hash;
+
+      if ( fctr->indep_var[1] )
+         n_heading_columns = 4;
+    }
+  else
+    {
+      n_factors = 1;
+      n_heading_columns = 2;
+
+      ptiles = totals[0].ptile_hash;
+    }
+
+  n_ptiles = hsh_count(ptiles);
+
+  n_rows = n_heading_rows + n_dep_var * n_stat_rows * n_factors;
+
+  n_cols = n_heading_columns + n_ptiles ; 
+
+  tbl = tab_create (n_cols, n_rows, 0);
+
+  tab_headers (tbl, n_heading_columns + 1, 0, n_heading_rows, 0);
+
+  tab_dim (tbl, tab_natural_dimensions);
+
+  /* Outline the box and have no internal lines*/
+  tab_box (tbl, 
+          TAL_2, TAL_2,
+          -1, -1,
+          0, 0,
+          n_cols - 1, n_rows - 1);
+
+  tab_hline (tbl, TAL_2, 0, n_cols - 1, n_heading_rows );
+
+  tab_vline (tbl, TAL_2, n_heading_columns, 0, n_rows - 1);
+
+
+  tab_title (tbl, 0, _("Percentiles"));
+
+
+  tab_hline (tbl, TAL_1, n_heading_columns, n_cols - 1, 1 );
+
+
+  tab_box (tbl, 
+          -1, -1,
+          -1, TAL_1,
+          0, n_heading_rows,
+          n_heading_columns - 1, n_rows - 1);
+
+
+  tab_box (tbl, 
+          -1, -1,
+          -1, TAL_1,
+          n_heading_columns, n_heading_rows - 1,
+          n_cols - 1, n_rows - 1);
+
+  tab_joint_text(tbl, n_heading_columns + 1, 0,
+                n_cols - 1 , 0,
+                TAB_CENTER | TAT_TITLE ,
+                _("Percentiles"));
+
+
+  {
+    /* Put in the percentile break points as headings */
+
+    struct percentile **p = (struct percentile **) hsh_sort(ptiles);
+
+    i = 0;
+    while ( (*p)  ) 
+      {
+       tab_float(tbl, n_heading_columns + i++ , 1, 
+                 TAB_CENTER,
+                 (*p)->p, 8, 0);
+       
+       p++;
+      }
+
+  }
+
+  for ( i = 0 ; i < n_dep_var ; ++i ) 
+    {
+      const int n_stat_rows = 2;
+      const int row = n_heading_rows + i * n_stat_rows * n_factors ;
+
+      if ( i > 0 )
+       tab_hline(tbl, TAL_1, 0, n_cols - 1, row );
+
+      tab_text (tbl, 0,
+               i * n_stat_rows * n_factors  + n_heading_rows,
+               TAB_LEFT | TAT_TITLE, 
+               var_to_string(dependent_var[i])
+               );
+
+      if ( fctr  )
+       {
+         struct factor_statistics **fs = fctr->fs;
+         int count = 0;
+
+         tab_text (tbl, 1, n_heading_rows - 1, 
+                   TAB_CENTER | TAT_TITLE, 
+                   var_to_string(fctr->indep_var[0]));
+
+
+         if ( fctr->indep_var[1])
+           tab_text (tbl, 2, n_heading_rows - 1, TAB_CENTER | TAT_TITLE, 
+                     var_to_string(fctr->indep_var[1]));
+
+         while( *fs ) 
+           {
+
+             static union value prev ;
+
+             const int row = n_heading_rows + n_stat_rows  * 
+               ( ( i  * n_factors  ) +  count );
+
+
+             if ( 0 != compare_values(&prev, &(*fs)->id[0], 
+                                      fctr->indep_var[0]->width))
+               {
+                 
+                 if ( count > 0 ) 
+                   tab_hline (tbl, TAL_1, 1, n_cols - 1, row);
+
+                 tab_text (tbl, 
+                           1, row,
+                           TAB_LEFT | TAT_TITLE, 
+                           value_to_string(&(*fs)->id[0], fctr->indep_var[0])
+                           );
+
+
+               }
+
+             prev = (*fs)->id[0];
+
+             if (fctr->indep_var[1] && count > 0 ) 
+               tab_hline(tbl, TAL_1, 2, n_cols - 1, row);
+
+             if ( fctr->indep_var[1]) 
+               tab_text (tbl, 2, row,
+                         TAB_LEFT | TAT_TITLE, 
+                         value_to_string(&(*fs)->id[1], fctr->indep_var[1])
+                         );
+
+
+             populate_percentiles(tbl, n_heading_columns - 1, 
+                               row, &(*fs)->m[i]);
+
+
+             count++ ; 
+             fs++;
+           }
+
+
+       }
+      else 
+       {
+         populate_percentiles(tbl, n_heading_columns - 1, 
+                               i * n_stat_rows * n_factors  + n_heading_rows,
+                               &totals[i]);
+       }
+
+
+    }
+
+
+  tab_submit(tbl);
+
+
+}
+
+
+
+
+void
+populate_percentiles(struct tab_table *tbl, int col, int row, 
+                    const struct metrics *m)
+{
+  int i;
+
+  struct percentile **p = (struct percentile **) hsh_sort(m->ptile_hash);
+  
+  tab_text (tbl, 
+           col, row + 1,
+           TAB_LEFT | TAT_TITLE, 
+           _("Tukey\'s Hinges")
+           );
+
+  tab_text (tbl, 
+           col, row, 
+           TAB_LEFT | TAT_TITLE, 
+           ptile_alg_desc[m->ptile_alg]
+           );
+
+
+  i = 0;
+  while ( (*p)  ) 
+    {
+      tab_float(tbl, col + i + 1 , row, 
+               TAB_CENTER,
+               (*p)->v, 8, 2);
+      if ( (*p)->p == 25 ) 
+       tab_float(tbl, col + i + 1 , row + 1, 
+                 TAB_CENTER,
+                 m->hinges[0], 8, 2);
+
+      if ( (*p)->p == 50 ) 
+       tab_float(tbl, col + i + 1 , row + 1, 
+                 TAB_CENTER,
+                 m->hinges[1], 8, 2);
+
+      if ( (*p)->p == 75 ) 
+       tab_float(tbl, col + i + 1 , row + 1, 
+                 TAB_CENTER,
+                 m->hinges[2], 8, 2);
+
+
+      i++;
+
+      p++;
+    }
+
+
+
+
+}
+
diff --git a/src/factor_stats.c b/src/factor_stats.c

index 3a88ab44a4a234a8f64dec6d1d9674cc2ee79caf..7e5ac8b466d546accfec1ae9bf592d4cfa2b7f0c 100644 (file)
--- a/src/factor_stats.c
+++ b/src/factor_stats.c
@@ -25,6 +25,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  #include "algorithm.h"
  #include "alloc.h"
  #include "moments.h"
+#include "percentiles.h"
  
  #include <stdlib.h>
  #include <math.h>
@@ -51,9 +52,6 @@ metrics_precalc(struct metrics *m)
                                 (hsh_hash_func *) hash_value,
                                 (hsh_free_func *) weighted_value_free,
                                 (void *) 0);
-
-
-
  }
  
  
@@ -152,7 +150,6 @@ metrics_postcalc(struct metrics *m)
        gsl_histogram_accumulate(m->histogram, wv[i]->v.f, wv[i]->w);
      }
  
-
    /* Trimmed mean calculation */
    if ( m->n_data <= 1 ) 
      {
@@ -175,9 +172,10 @@ metrics_postcalc(struct metrics *m)
        
        if ( cc < tc ) 
         k1 = i;
-
      }
  
+  
+
    k2 = m->n_data;
    for ( i = m->n_data -1  ; i >= 0; --i ) 
      {
@@ -185,6 +183,12 @@ metrics_postcalc(struct metrics *m)
         k2 = i;
      }
  
+
+  /* Calculate the percentiles */
+  ptiles(m->ptile_hash, m->wvp, m->n_data, m->n, m->ptile_alg);
+
+  tukey_hinges(m->wvp, m->n_data, m->n, m->hinges);
+
    /* Special case here */
    if ( k1 + 1 == k2 ) 
      {
diff --git a/src/factor_stats.h b/src/factor_stats.h

index d29ca4d80a39ae530d6de7415ad70a38d18fb3c1..f6394a8a95aa1a8358b90e1812edb32d521bfc12 100644 (file)
--- a/src/factor_stats.h
+++ b/src/factor_stats.h
@@ -28,6 +28,8 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
  #include "hash.h"
  #include "val.h"
  #include <gsl/gsl_histogram.h>
+#include "subclist.h"
+#include "percentiles.h"
  
  struct moments1;
  
@@ -58,18 +60,28 @@ struct metrics
  
    double trimmed_mean;
  
-  /* A hash of data for this factor.
-   */
+  /* A hash of data for this factor. */
    struct hsh_table *ordered_data;
  
    /* A Pointer to this hash table AFTER it has been SORTED and crunched */
    struct weighted_value **wvp;
  
-
    /* The number of values in the above array
       (if all the weights are 1, then this will
       be the same as n) */
    int n_data;
+
+  /* Percentile stuff */
+
+  /* A hash of struct percentiles */
+  struct hsh_table *ptile_hash;
+
+  /* Algorithm to be used for calculating percentiles */
+  enum pc_alg ptile_alg;
+
+  /* Tukey's Hinges */
+  double hinges[3];
+
  };
  
  
diff --git a/src/hash.c b/src/hash.c

index a73256589ab69d55fce8252be7fa16074f0834a0..2544a75d0dcd025f82668bfc24a47dfaeb2ad70c 100644 (file)
--- a/src/hash.c
+++ b/src/hash.c
@@ -142,7 +142,9 @@ hsh_create (int size, hsh_compare_func *compare, hsh_hash_func *hash,
    struct hsh_table *h;
    int i;
  
-  assert (size > 0);
+  if ( size ==  0 ) 
+    return NULL;
+
    assert (compare != NULL);
    assert (hash != NULL);
    
diff --git a/src/percentiles.c b/src/percentiles.c

new file mode 100644 (file)

index 0000000..9719676
--- /dev/null
+++ b/src/percentiles.c
@@ -0,0 +1,399 @@
+/* PSPP - A program for statistical analysis . -*-c-*-
+
+Copyright (C) 2004 Free Software Foundation, Inc.
+Author: John Darrington 2004
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+#include "factor_stats.h"
+#include "percentiles.h"
+#include "misc.h"
+
+#include <assert.h>
+
+
+struct ptile_params
+{
+  double g1, g1_star;
+  double g2, g2_star;
+  int k1, k2;
+};
+
+
+const char *ptile_alg_desc[] = {
+  "",
+  N_("HAverage"),
+  N_("Weighted Average"),
+  N_("Rounded"),
+  N_("Empirical"),
+  N_("Empirical with averaging")
+};
+
+
+
+
+/* Individual Percentile algorithms */
+
+/* Closest observation to tc1 */
+double ptile_round(const struct weighted_value **wv, 
+                  const struct ptile_params *par);
+
+
+/* Weighted average at y_tc2 */
+double ptile_haverage(const struct weighted_value **wv, 
+                  const struct ptile_params *par);
+
+
+/* Weighted average at y_tc1 */
+double ptile_waverage(const struct weighted_value **wv, 
+                  const struct ptile_params *par);
+
+
+/* Empirical distribution function */
+double ptile_empirical(const struct weighted_value **wv, 
+                  const struct ptile_params *par);
+
+
+/* Empirical distribution function with averaging*/
+double ptile_aempirical(const struct weighted_value **wv, 
+                  const struct ptile_params *par);
+
+
+
+
+/* Closest observation to tc1 */
+double
+ptile_round(const struct weighted_value **wv, 
+           const struct ptile_params *par)
+{
+  double x;
+
+  if ( wv[par->k1 + 1]->w >= 1 )
+    {
+      if ( par->g1_star < 0.5 ) 
+       x = wv[par->k1]->v.f;
+      else
+       x = wv[par->k1 + 1]->v.f;
+    }
+  else
+    {
+      if ( par->g1 < 0.5 ) 
+       x = wv[par->k1]->v.f;
+      else
+       x = wv[par->k1 + 1]->v.f;
+
+    }
+
+  return x;
+}
+
+/* Weighted average at y_tc2 */
+double
+ptile_haverage(const struct weighted_value **wv, 
+              const struct ptile_params *par)
+{
+  if ( par->g2_star >= 1.0 ) 
+      return wv[par->k2 + 1]->v.f ;
+
+  /* Special case  for k2 + 1 >= n_data 
+     (actually it's not a special case, but just avoids indexing errors )
+   */
+  if ( par->g2_star == 0 ) 
+    {
+      assert(par->g2 == 0 );
+      return wv[par->k2]->v.f;
+    }
+
+  assert(par->k2 >= 0);
+
+  if ( wv[par->k2 + 1]->w >= 1.0 ) 
+    return ( (1 - par->g2_star) *  wv[par->k2]->v.f
+            + 
+            par->g2_star * wv[par->k2 + 1]->v.f);
+  else
+    return ( (1 - par->g2) *  wv[par->k2]->v.f
+            + 
+            par->g2 * wv[par->k2 + 1]->v.f);
+
+}
+
+
+
+/* Weighted average at y_tc1 */
+double 
+ptile_waverage(const struct weighted_value **wv, 
+              const struct ptile_params *par)
+{
+  if ( par->g1_star >= 1.0 ) 
+      return wv[par->k1 + 1]->v.f ;
+
+  if ( wv[par->k1 + 1]->w >= 1.0 ) 
+    return ( (1 - par->g1_star) *  wv[par->k1]->v.f
+            + 
+            par->g1_star * wv[par->k1 + 1]->v.f);
+  else
+    return ( (1 - par->g1) *  wv[par->k1]->v.f
+            + 
+            par->g1 * wv[par->k1 + 1]->v.f);
+}
+
+
+/* Empirical distribution function */
+double 
+ptile_empirical(const struct weighted_value **wv, 
+              const struct ptile_params *par)
+{
+  if ( par->g1_star > 0 ) 
+    return wv[par->k1 + 1]->v.f;
+  else
+    return wv[par->k1]->v.f;
+}
+
+
+
+/* Empirical distribution function with averageing */
+double 
+ptile_aempirical(const struct weighted_value **wv, 
+              const struct ptile_params *par)
+{
+  if ( par->g1_star > 0 ) 
+    return wv[par->k1 + 1]->v.f;
+  else
+    return (wv[par->k1]->v.f + wv[par->k1 + 1]->v.f ) / 2.0 ;
+}
+
+
+
+/* Compute the percentile p */
+double ptile(double p, 
+            const struct weighted_value **wv,
+            int n_data,
+            double w,
+            enum pc_alg algorithm);
+
+
+
+double 
+ptile(double p, 
+      const struct weighted_value **wv,
+      int n_data,
+      double w,
+      enum pc_alg algorithm)
+{
+  int i;
+  double tc1, tc2;
+  double result;
+
+  struct ptile_params pp;
+
+  assert( p <= 1.0);
+
+  tc1 = w * p ;
+  tc2 = (w + 1) * p ;
+
+  pp.k1 = -1;
+  pp.k2 = -1;
+
+  for ( i = 0 ; i < n_data ; ++i ) 
+    {
+      if ( wv[i]->cc <= tc1 ) 
+       pp.k1 = i;
+
+      if ( wv[i]->cc <= tc2 ) 
+       pp.k2 = i;
+      
+    }
+
+
+  if ( pp.k1 >= 0 ) 
+    {
+      pp.g1 = ( tc1 - wv[pp.k1]->cc ) / wv[pp.k1 + 1]->w;
+      pp.g1_star = tc1 -  wv[pp.k1]->cc ; 
+    }
+  else
+    {
+      pp.g1 = tc1 / wv[pp.k1 + 1]->w;
+      pp.g1_star = tc1 ;
+    }
+
+
+  if ( pp.k2  + 1 >= n_data ) 
+    {
+      pp.g2 = 0 ;
+      pp.g2_star = 0;
+    }
+  else 
+    {
+      if ( pp.k2 >= 0 ) 
+       {
+         pp.g2 = ( tc2 - wv[pp.k2]->cc ) / wv[pp.k2 + 1]->w;
+         pp.g2_star = tc2 -  wv[pp.k2]->cc ; 
+       }
+      else
+       {
+         pp.g2 = tc2 / wv[pp.k2 + 1]->w;
+         pp.g2_star = tc2 ;
+       }
+    }
+
+  switch ( algorithm ) 
+    {
+    case PC_HAVERAGE:
+      result = ptile_haverage(wv, &pp);
+      break;
+    case PC_WAVERAGE:
+      result = ptile_waverage(wv, &pp);
+      break;
+    case PC_ROUND:
+      result = ptile_round(wv, &pp);
+      break;
+    case PC_EMPIRICAL:
+      result = ptile_empirical(wv, &pp);
+      break;
+    case PC_AEMPIRICAL:
+      result = ptile_aempirical(wv, &pp);
+      break;
+    default:
+      result = SYSMIS;
+    }
+
+  return result;
+}
+
+
+/* 
+   Calculate the values of the percentiles in pc_hash.
+   wv is  a sorted array of weighted values of the data set.
+*/
+void 
+ptiles(struct hsh_table *pc_hash,
+       const struct weighted_value **wv,
+       int n_data,
+       double w,
+       enum pc_alg algorithm)
+{
+  struct hsh_iterator hi;
+  struct percentile *p;
+
+  if ( !pc_hash ) 
+    return ;
+  for ( p = hsh_first(pc_hash, &hi);
+       p != 0 ;
+       p = hsh_next(pc_hash, &hi))
+    {
+      p->v = ptile(p->p/100.0 , wv, n_data, w, algorithm);
+    }
+  
+}
+
+
+/* Calculate Tukey's Hinges */
+void
+tukey_hinges(const struct weighted_value **wv,
+            int n_data, 
+            double w,
+            double hinges[3])
+{
+  int i;
+  double c_star = DBL_MAX;
+  double d;
+  double l[3];
+  int h[3];
+  double a, a_star;
+  
+  for ( i = 0 ; i < n_data ; ++i ) 
+    {
+      c_star = min(c_star, wv[i]->w);
+    }
+
+  if ( c_star > 1 ) c_star = 1;
+
+  d = floor((w/c_star + 3 ) / 2.0)/ 2.0;
+
+  l[0] = d*c_star;
+  l[1] = w/2.0 + c_star/2.0;
+  l[2] = w + c_star - d*c_star;
+
+  h[0]=-1;
+  h[1]=-1;
+  h[2]=-1;
+
+  for ( i = 0 ; i < n_data ; ++i ) 
+    {
+      if ( l[0] >= wv[i]->cc ) h[0] = i ;
+      if ( l[1] >= wv[i]->cc ) h[1] = i ;
+      if ( l[2] >= wv[i]->cc ) h[2] = i ;
+    }
+
+  for ( i = 0 ; i < 3 ; i++ )
+    {
+      assert(h[i] + 1< n_data);
+
+      if ( h[i] >= 0 ) 
+       a_star = l[i] - wv[h[i]]->cc ;
+      else
+       a_star = l[i];
+
+      a = a_star / ( wv[h[i]+1]->cc ) ; 
+
+      if ( a_star >= 1.0 ) 
+       {
+         hinges[i] = wv[h[i] + 1]->v.f ;
+         continue;
+       }
+
+      if ( wv[h[i]+1]->w >= 1)
+       {
+         hinges[i] = ( 1 - a_star)* wv[h[i]]->v.f
+           + a_star * wv[h[i]+1]->v.f;
+
+         continue;
+       }
+
+      hinges[i] = ( 1 - a)* wv[h[i]]->v.f + a * wv[h[i]+1]->v.f;
+      
+    }
+
+  assert(hinges[0] <= hinges[1]);
+  assert(hinges[1] <= hinges[2]);
+
+}
+
+int
+ptile_compare(const struct percentile *p1, 
+                  const struct percentile *p2, 
+                  void *aux UNUSED)
+{
+
+  int cmp;
+  
+  if ( p1->p == p2->p) 
+    cmp = 0 ;
+  else if (p1->p < p2->p)
+    cmp = -1 ; 
+  else 
+    cmp = +1;
+
+  return cmp;
+}
+
+unsigned
+ptile_hash(const struct percentile *p, void *aux UNUSED)
+{
+  return hsh_hash_double(p->p);
+}
+
+
diff --git a/src/percentiles.h b/src/percentiles.h

new file mode 100644 (file)

index 0000000..8baba9f
--- /dev/null
+++ b/src/percentiles.h
@@ -0,0 +1,83 @@
+/* PSPP - A program for statistical analysis . -*-c-*-
+
+Copyright (C) 2004 Free Software Foundation, Inc.
+Author: John Darrington 2004
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 2 of the
+License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA. */
+
+#ifndef PERCENTILES_H
+#define PERCENTILES_H
+
+
+#include "hash.h"
+
+struct weighted_value ;
+
+/* The algorithm used to calculate percentiles */
+enum pc_alg {
+  PC_NONE=0, 
+  PC_HAVERAGE, 
+  PC_WAVERAGE, 
+  PC_ROUND, 
+  PC_EMPIRICAL, 
+  PC_AEMPIRICAL
+} ;
+
+
+
+extern  const char *ptile_alg_desc[];
+
+
+
+
+struct percentile {
+
+  /* The break point of the percentile */
+  double p;
+
+  /* The value of the percentile */
+  double v;
+};
+
+
+/* Calculate the percentiles of the break points in pc_bp,
+   placing the values in pc_val.
+   wv is  a sorted array of weighted values of the data set.
+*/
+void ptiles(struct hsh_table *pc_hash,
+           const struct weighted_value **wv,
+           int n_data,
+           double w,
+           enum pc_alg algorithm);
+
+
+/* Calculate Tukey's Hinges */
+void tukey_hinges(const struct weighted_value **wv,
+                 int n_data, 
+                 double w,
+                 double hinges[3]);
+
+
+
+/* Hash utility functions */
+int ptile_compare(const struct percentile *p1, 
+                  const struct percentile *p2, 
+                  void *aux);
+
+unsigned ptile_hash(const struct percentile *p, void *aux);
+
+
+#endif
diff --git a/src/subclist.c b/src/subclist.c

index d2add44c178952781e384986af54e36198f7e7fa..2a7f3679d955f7390557360fa3cab56f59fec5cd 100644 (file)
--- a/src/subclist.c
+++ b/src/subclist.c
@@ -53,7 +53,7 @@ subc_list_double_push(subc_list_double *l, double d)
  
  /* Return the number of items in the list */
  int 
-subc_list_double_count(subc_list_double *l)
+subc_list_double_count(const subc_list_double *l)
  {
    return l->n_data;
  }
@@ -61,7 +61,7 @@ subc_list_double_count(subc_list_double *l)
  
  /* Index into the list (array) */
  double
-subc_list_double_at(subc_list_double *l, int idx)
+subc_list_double_at(const subc_list_double *l, int idx)
  {
    return l->data[idx];
  }
diff --git a/src/subclist.h b/src/subclist.h

index b311bc67bcb36c73d68cf94ef0e6a9e79094d062..1a110e137e5e61eddb53d96d0d673cd2011806d5 100644 (file)
--- a/src/subclist.h
+++ b/src/subclist.h
@@ -57,12 +57,12 @@ void subc_list_double_push(subc_list_double *l, double d) ;
  void subc_list_int_push(subc_list_int *l, int i) ;
  
  /* Index into the list */
-double subc_list_double_at(subc_list_double *l, int idx);
-int subc_list_int_at(subc_list_int *l, int idx);
+double subc_list_double_at(const subc_list_double *l, int idx);
+int subc_list_int_at(const subc_list_int *l, int idx);
  
  /* Return the number of values in the list */
-int subc_list_double_count(subc_list_double *l);
-int subc_list_int_count(subc_list_int *l);
+int subc_list_double_count(const subc_list_double *l);
+int subc_list_int_count(const subc_list_int *l);
  
  /* Destroy the list */
  void subc_list_double_destroy(subc_list_double *l) ;
diff --git a/tests/command/examine.sh b/tests/command/examine.sh

index 11830edbfba4227fac4e643b3de157bda41dfee0..55c80a8a914436e76d59cd8f74e1d64d96fcbcc7 100755 (executable)
--- a/tests/command/examine.sh
+++ b/tests/command/examine.sh
@@ -152,13 +152,13 @@ Case#  QUALITY        W    BRAND
  #               95% Confidence Interval for MeanLower Bound#  3.562  |          #
  #                                               Upper Bound#  3.521  |          #
  #               5% Trimmed Mean                            #   3.50  |          #
-#               Median                                     #         |          #
+#               Median                                     #   4.00  |          #
  #               Variance                                   #  2.520  |          #
  #               Std. Deviation                             #  1.587  |          #
  #               Minimum                                    #  1.000  |          #
  #               Maximum                                    #  7.000  |          #
  #               Range                                      #  6.000  |          #
-#               Interquartile Range                        #         |          #
+#               Interquartile Range                        #   2.75  |          #
  #               Skewness                                   #   .059  |   .472   #
  #               Kurtosis                                   #  -.358  |   .918   #
  #==========================================================#=========#==========#
@@ -213,13 +213,13 @@ Case#  QUALITY        W    BRAND
  #                           95% Confidence Interval for MeanLower Bound#  2.279  |          #
  #                                                           Upper Bound#  2.221  |          #
  #                           5% Trimmed Mean                            #   2.22  |          #
-#                           Median                                     #         |          #
+#                           Median                                     #   2.00  |          #
  #                           Variance                                   #  1.643  |          #
  #                           Std. Deviation                             #  1.282  |          #
  #                           Minimum                                    #  1.000  |          #
  #                           Maximum                                    #  4.000  |          #
  #                           Range                                      #  3.000  |          #
-#                           Interquartile Range                        #         |          #
+#                           Interquartile Range                        #   2.75  |          #
  #                           Skewness                                   #   .475  |   .752   #
  #                           Kurtosis                                   #  -1.546 |   1.481  #
  #               -------------------------------------------------------#---------+----------#
@@ -227,13 +227,13 @@ Case#  QUALITY        W    BRAND
  #                           95% Confidence Interval for MeanLower Bound#  3.525  |          #
  #                                                           Upper Bound#  3.475  |          #
  #                           5% Trimmed Mean                            #   3.50  |          #
-#                           Median                                     #         |          #
+#                           Median                                     #   4.00  |          #
  #                           Variance                                   #  1.143  |          #
  #                           Std. Deviation                             #  1.069  |          #
  #                           Minimum                                    #  2.000  |          #
  #                           Maximum                                    #  5.000  |          #
  #                           Range                                      #  3.000  |          #
-#                           Interquartile Range                        #         |          #
+#                           Interquartile Range                        #   1.75  |          #
  #                           Skewness                                   #  -.468  |   .752   #
  #                           Kurtosis                                   #  -.831  |   1.481  #
  #               -------------------------------------------------------#---------+----------#
@@ -241,13 +241,13 @@ Case#  QUALITY        W    BRAND
  #                           95% Confidence Interval for MeanLower Bound#  4.904  |          #
  #                                                           Upper Bound#  4.846  |          #
  #                           5% Trimmed Mean                            #   4.86  |          #
-#                           Median                                     #         |          #
+#                           Median                                     #   5.00  |          #
  #                           Variance                                   #  1.554  |          #
  #                           Std. Deviation                             #  1.246  |          #
  #                           Minimum                                    #  3.000  |          #
  #                           Maximum                                    #  7.000  |          #
  #                           Range                                      #  4.000  |          #
-#                           Interquartile Range                        #         |          #
+#                           Interquartile Range                        #   1.75  |          #
  #                           Skewness                                   #   .304  |   .752   #
  #                           Kurtosis                                   #   .146  |   1.481  #
  #======================================================================#=========#==========#
diff --git a/tests/command/trimmed-mean.sh b/tests/command/trimmed-mean.sh

index 0690c0eabff3f6852a5e09c4ede70b1b46be4a92..8b84a31c47c24d9bb38f916d54518910c6cdc819 100755 (executable)
--- a/tests/command/trimmed-mean.sh
+++ b/tests/command/trimmed-mean.sh
@@ -100,13 +100,13 @@ diff $TEMPDIR/pspp.list - << EOF
  # 95% Confidence Interval for MeanLower Bound#  2.021  |          #
  #                                 Upper Bound#  2.017  |          #
  # 5% Trimmed Mean                            #   2.00  |          #
-# Median                                     #         |          #
+# Median                                     #   2.00  |          #
  # Variance                                   #   .058  |          #
  # Std. Deviation                             #   .242  |          #
  # Minimum                                    #  1.000  |          #
  # Maximum                                    #  3.000  |          #
  # Range                                      #  2.000  |          #
-# Interquartile Range                        #         |          #
+# Interquartile Range                        #   .00   |          #
  # Skewness                                   #  1.194  |   .330   #
  # Kurtosis                                   #  15.732 |   .650   #
  #============================================#=========#==========#
author	John Darrington <john@darrington.wattle.id.au>
	Wed, 29 Dec 2004 01:19:57 +0000 (01:19 +0000)
committer	John Darrington <john@darrington.wattle.id.au>
	Wed, 29 Dec 2004 01:19:57 +0000 (01:19 +0000)
doc/statistics.texi		patch \| blob \| history
po/en_GB.po		patch \| blob \| history
po/pspp.pot		patch \| blob \| history
src/ChangeLog		patch \| blob \| history
src/Makefile.am		patch \| blob \| history
src/examine.q		patch \| blob \| history
src/factor_stats.c		patch \| blob \| history
src/factor_stats.h		patch \| blob \| history
src/hash.c		patch \| blob \| history
src/percentiles.c	[new file with mode: 0644]	patch \| blob
src/percentiles.h	[new file with mode: 0644]	patch \| blob
src/subclist.c		patch \| blob \| history
src/subclist.h		patch \| blob \| history
tests/command/examine.sh		patch \| blob \| history
tests/command/trimmed-mean.sh		patch \| blob \| history