X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tests%2Flanguage%2Fstats%2Fquick-cluster.at;h=f3d0d24283e39d355c72fd5e79db4a54faf61439;hb=0891406f7c1039af8c769b8069691246566fd894;hp=9e3e87c899d3635b3e21e46852e48a5e11157691;hpb=3ec4ce83403f756f2c983ca2b5544cb1dcf9dfe0;p=pspp diff --git a/tests/language/stats/quick-cluster.at b/tests/language/stats/quick-cluster.at index 9e3e87c899..f3d0d24283 100644 --- a/tests/language/stats/quick-cluster.at +++ b/tests/language/stats/quick-cluster.at @@ -1,3 +1,19 @@ +dnl PSPP - a program for statistical analysis. +dnl Copyright (C) 2017 Free Software Foundation, Inc. +dnl +dnl This program is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation, either version 3 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl AT_BANNER([QUICK CLUSTER]) AT_SETUP([QUICK CLUSTER with small data set]) @@ -29,14 +45,13 @@ z,F8.0 Table: Final Cluster Centers ,Cluster, -,, ,1,2 -,, x,6.50,19.20 y,7.50,3250.00 z,6.50,5058.00 Table: Number of Cases in each Cluster +,,Count Cluster,1,6 ,2,5 Valid,,11 @@ -44,31 +59,31 @@ Valid,,11 AT_CLEANUP AT_SETUP([QUICK CLUSTER with large data set]) +AT_KEYWORDS([slow]) AT_DATA([quick-cluster.sps], [dnl input program. -loop #i = 1 to 500000. +loop #i = 1 to 50000. compute x = 3. end case. end loop. end file. end input program. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) NOINITIAL. ]) AT_CHECK([pspp -o pspp.csv quick-cluster.sps]) AT_CHECK([cat pspp.csv], [0], [dnl Table: Final Cluster Centers ,Cluster,,, -,,,, ,1,2,3,4 -,,,, -x,.00,.00,.00,3.00 +x,NaN,NaN,NaN,3.00 Table: Number of Cases in each Cluster +,,Count Cluster,1,0 ,2,0 ,3,0 -,4,500000 -Valid,,500000 +,4,50000 +Valid,,50000 ]) AT_CLEANUP @@ -91,7 +106,7 @@ end input program. weight by w. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-w.csv qc-weighted.sps]) @@ -106,7 +121,7 @@ end loop. end file. end input program. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-unw.csv qc-unweighted.sps]) @@ -128,7 +143,7 @@ begin data. 2 end data. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-m.csv quick-miss.sps]) @@ -144,7 +159,7 @@ begin data. 2 end data. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-nm.csv quick-nmiss.sps]) @@ -155,6 +170,12 @@ AT_CLEANUP AT_SETUP([QUICK CLUSTER with pairwise missing]) + +dnl This test runs two programs, which are identical except that one +dnl has an extra case with one missing value. Because the syntax uses +dnl NOINITIAL and NOUPDATE, the results should be identical except for +dnl the final classification. + AT_DATA([quick-s.sps], [dnl data list notable list /x * y *. begin data. @@ -169,16 +190,15 @@ begin data. 3.4 3 3.5 2.5 3.1 2.0 -3.9 2.5 -3.8 2.0 end data. -QUICK CLUSTER x y - /CRITERIA = CLUSTER(3) MXITER (100) +QUICK CLUSTER x y + /PRINT = INITIAL + /CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE . ]) -AT_CHECK([pspp -O format=csv quick-s.sps | tail -5 > pspp-s.csv]) +AT_CHECK([pspp -O format=csv quick-s.sps > pspp-s.csv]) AT_DATA([quick-pw.sps], [dnl data list notable list /x * y *. @@ -194,19 +214,374 @@ begin data. 3.4 3 3.5 2.5 3.1 2.0 -3.9 . -3.8 . +. 2.3 end data. -QUICK CLUSTER x y - /CRITERIA = CLUSTER(3) MXITER (100) +QUICK CLUSTER x y + /CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE + /PRINT = INITIAL /MISSING = PAIRWISE . ]) -AT_CHECK([pspp -O format=csv quick-pw.sps | tail -5 > pspp-pw.csv]) +AT_CHECK([pspp -O format=csv quick-pw.sps > pspp-pw.csv]) + +AT_CHECK([head -n 13 pspp-s.csv > top-s.csv]) +AT_CHECK([head -n 13 pspp-pw.csv > top-pw.csv]) +AT_CHECK([diff top-s.csv top-pw.csv]) + + +AT_CHECK([grep Valid pspp-s.csv], [0], [Valid,,11 +]) + +AT_CHECK([grep Valid pspp-pw.csv], [0], [Valid,,12 +]) + + +AT_CLEANUP + + + +AT_SETUP([QUICK CLUSTER crash on bad cluster quantity]) +AT_DATA([badn.sps], [dnl +data list notable list /x * y *. +begin data. +1 2 +1 2.2 +end data. + +QUICK CLUSTER x y + /CRITERIA = CLUSTER(0) + . +]) + +AT_CHECK([pspp -O format=csv badn.sps], [1], [ignore]) + +AT_CLEANUP + +AT_SETUP([QUICK CLUSTER infinite loop on bad command name]) +AT_DATA([quick-cluster.sps], [dnl +data list notable list /x y. +begin data. +1 2 +1 2.2 +end data. + +QUICK CLUSTER x y /UNSUPPORTED. +]) +AT_CHECK([pspp -O format=csv quick-cluster.sps], [1], [dnl +quick-cluster.sps:7.20-7.30: error: QUICK CLUSTER: Syntax error at `UNSUPPORTED'. +]) +AT_CLEANUP + + + +AT_SETUP([QUICK CLUSTER /PRINT subcommand]) +AT_DATA([quick-cluster.sps], [dnl +data list notable list /cluster (A8) x y (F8.0). +begin data. +A 10.45 9.38 +A 10.67 9.17 +A 10.86 9.63 +A 8.77 8.45 +A 8.04 11.77 +A 10.34 9.83 +A 10.37 10.54 +A 11.49 8.18 +A 10.17 11.10 +A 11.37 9.16 +A 10.25 8.83 +A 8.69 9.92 +A 10.36 10.39 +A 10.89 10.51 +A 9.9 11.39 +A 11.1 10.91 +A 11.77 8.47 +A 9.5 10.46 +B -11.01 -9.21 +B -10.8 -11.76 +B -10.03 -10.29 +B -9.54 -9.17 +B -10.16 -9.82 +B -10.01 -8.63 +B -9.6 -10.22 +B -11.36 -10.93 +B -10.63 -10.97 +B -9.53 -10.78 +B -9.40 -10.26 +B -10.76 -9.76 +B -9.9 -10.11 +B -10.16 -9.75 +B -8.65 -11.31 +B -10.10 -10.90 +B -11.67 -9.89 +B -11.11 -9.23 +B -8.7 -8.43 +B -11.35 -8.68 +C -10.20 9.00 +C -10.12 9.92 +C -10.41 10.16 +C -9.86 10.12 +C -10.31 10.12 +C -9.57 10.16 +C -9.69 9.93 +C -9.14 10.84 +C -9.8 10.19 +C -9.97 10.22 +C -11.65 10.81 +C -9.80 11.39 +C -10.31 10.74 +C -10.26 10.38 +C -11.57 10.02 +C -10.50 9.75 +C -9.06 9.63 +C -10.17 10.82 +C -10.22 9.99 +end data. + +QUICK CLUSTER x y + /CRITERIA=CLUSTERS(3) + /PRINT=INITIAL CLUSTER. +]) + +AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl +Table: Initial Cluster Centers +,Cluster,, +,1,2,3 +x,-11,-12,11 +y,-12,11,11 + +Table: Final Cluster Centers +,Cluster,, +,1,2,3 +x,-10,-10,10 +y,-10,10,10 + +Table: Number of Cases in each Cluster +,,Count +Cluster,1,20 +,2,19 +,3,18 +Valid,,57 + +Table: Cluster Membership +Case Number,Cluster +1,3 +2,3 +3,3 +4,3 +5,3 +6,3 +7,3 +8,3 +9,3 +10,3 +11,3 +12,3 +13,3 +14,3 +15,3 +16,3 +17,3 +18,3 +19,1 +20,1 +21,1 +22,1 +23,1 +24,1 +25,1 +26,1 +27,1 +28,1 +29,1 +30,1 +31,1 +32,1 +33,1 +34,1 +35,1 +36,1 +37,1 +38,1 +39,2 +40,2 +41,2 +42,2 +43,2 +44,2 +45,2 +46,2 +47,2 +48,2 +49,2 +50,2 +51,2 +52,2 +53,2 +54,2 +55,2 +56,2 +57,2 +]) + +AT_CLEANUP + + +dnl Test for a crash which happened on bad input syntax +AT_SETUP([QUICK CLUSTER -- Empty Parentheses]) + +AT_DATA([empty-parens.sps], [dnl +data list notable list /x * y *. +begin data. +1 2 +1 2.2 +end data. + +QUICK CLUSTER x y + /CRITERIA = CONVERGE() + . +]) + +AT_CHECK([pspp -o pspp.csv empty-parens.sps], [1], [ignore]) + +AT_CLEANUP + -AT_CHECK([diff pspp-s.csv pspp-pw.csv], [0]) +AT_SETUP([QUICK CLUSTER with save]) +AT_DATA([quick-cluster.sps], [dnl +DATA LIST notable LIST /x y z. +BEGIN DATA. +22,2930,4099 +17,3350,4749 +22,2640,3799 +20, 3250,4816 +15,4080,7827 +4,5,4 +5,6,5 +6,7,6 +7,8,7 +8,9,8 +9,10,9 +END DATA. +QUICK CLUSTER x y z + /CRITERIA=CLUSTER(2) MXITER(20) + /SAVE = CLUSTER (cluster) DISTANCE (distance). + +list. +]) + +AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl +Table: Final Cluster Centers +,Cluster, +,1,2 +x,6.50,19.20 +y,7.50,3250.00 +z,6.50,5058.00 + +Table: Number of Cases in each Cluster +,,Count +Cluster,1,6 +,2,5 +Valid,,11 + +Table: Data List +x,y,z,cluster,distance +22.00,2930.00,4099.00,2.00,1010.98 +17.00,3350.00,4749.00,2.00,324.79 +22.00,2640.00,3799.00,2.00,1399.00 +20.00,3250.00,4816.00,2.00,242.00 +15.00,4080.00,7827.00,2.00,2890.72 +4.00,5.00,4.00,1.00,4.33 +5.00,6.00,5.00,1.00,2.60 +6.00,7.00,6.00,1.00,.87 +7.00,8.00,7.00,1.00,.87 +8.00,9.00,8.00,1.00,2.60 +9.00,10.00,9.00,1.00,4.33 +]) +AT_CLEANUP + + +AT_SETUP([QUICK CLUSTER with single save]) +AT_DATA([quick-cluster.sps], [dnl +DATA LIST notable LIST /x y z. +BEGIN DATA. +22,2930,4099 +17,3350,4749 +22,2640,3799 +20, 3250,4816 +15,4080,7827 +4,5,4 +5,6,5 +6,7,6 +7,8,7 +8,9,8 +9,10,9 +END DATA. +QUICK CLUSTER x y z + /CRITERIA=CLUSTER(2) MXITER(20) + /SAVE = DISTANCE. + +list. +]) + +AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl +Table: Final Cluster Centers +,Cluster, +,1,2 +x,6.50,19.20 +y,7.50,3250.00 +z,6.50,5058.00 + +Table: Number of Cases in each Cluster +,,Count +Cluster,1,6 +,2,5 +Valid,,11 + +Table: Data List +x,y,z,QCL_0 +22.00,2930.00,4099.00,1010.98 +17.00,3350.00,4749.00,324.79 +22.00,2640.00,3799.00,1399.00 +20.00,3250.00,4816.00,242.00 +15.00,4080.00,7827.00,2890.72 +4.00,5.00,4.00,4.33 +5.00,6.00,5.00,2.60 +6.00,7.00,6.00,.87 +7.00,8.00,7.00,.87 +8.00,9.00,8.00,2.60 +9.00,10.00,9.00,4.33 +]) +AT_CLEANUP + + +dnl This one was noticed to crash at one point. +AT_SETUP([QUICK CLUSTER crash on bizarre input]) +AT_DATA([badn.sps], [dnl +data list notable list /x. +begin da\a* +22 +17 +22 +22 +15 +4, +5, +6, +7,j8, +9, +end data. + +quick cluster x +" /criteria=cluster(2) mxiter(20) + /save = distance + . + +list. +]) + +AT_CHECK([pspp -O format=csv badn.sps], [1], [ignore]) AT_CLEANUP