X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=blobdiff_plain;ds=sidebyside;f=tests%2Flanguage%2Fstats%2Fquick-cluster.at;h=34294468c62679269e7e3cdea9fc7440774938ce;hb=a70857ce808bba36e67a66375290e340dcf75adc;hp=75dd52cb19f89a0a584ab3281bc9c24334ae7953;hpb=56d6f17c81105cffb326be040430fefe53f95eea;p=pspp diff --git a/tests/language/stats/quick-cluster.at b/tests/language/stats/quick-cluster.at index 75dd52cb19..34294468c6 100644 --- a/tests/language/stats/quick-cluster.at +++ b/tests/language/stats/quick-cluster.at @@ -1,3 +1,19 @@ +dnl PSPP - a program for statistical analysis. +dnl Copyright (C) 2017 Free Software Foundation, Inc. +dnl +dnl This program is free software: you can redistribute it and/or modify +dnl it under the terms of the GNU General Public License as published by +dnl the Free Software Foundation, either version 3 of the License, or +dnl (at your option) any later version. +dnl +dnl This program is distributed in the hope that it will be useful, +dnl but WITHOUT ANY WARRANTY; without even the implied warranty of +dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +dnl GNU General Public License for more details. +dnl +dnl You should have received a copy of the GNU General Public License +dnl along with this program. If not, see . +dnl AT_BANNER([QUICK CLUSTER]) AT_SETUP([QUICK CLUSTER with small data set]) @@ -29,14 +45,13 @@ z,F8.0 Table: Final Cluster Centers ,Cluster, -,, ,1,2 -,, x,6.50,19.20 y,7.50,3250.00 z,6.50,5058.00 Table: Number of Cases in each Cluster +,,Count Cluster,1,6 ,2,5 Valid,,11 @@ -44,31 +59,31 @@ Valid,,11 AT_CLEANUP AT_SETUP([QUICK CLUSTER with large data set]) +AT_KEYWORDS([slow]) AT_DATA([quick-cluster.sps], [dnl input program. -loop #i = 1 to 500000. +loop #i = 1 to 50000. compute x = 3. end case. end loop. end file. end input program. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) NOINITIAL. ]) AT_CHECK([pspp -o pspp.csv quick-cluster.sps]) AT_CHECK([cat pspp.csv], [0], [dnl Table: Final Cluster Centers ,Cluster,,, -,,,, ,1,2,3,4 -,,,, -x,.00,.00,.00,3.00 +x,NaN,NaN,NaN,3.00 Table: Number of Cases in each Cluster +,,Count Cluster,1,0 ,2,0 ,3,0 -,4,500000 -Valid,,500000 +,4,50000 +Valid,,50000 ]) AT_CLEANUP @@ -91,7 +106,7 @@ end input program. weight by w. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-w.csv qc-weighted.sps]) @@ -106,7 +121,7 @@ end loop. end file. end input program. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-unw.csv qc-unweighted.sps]) @@ -128,7 +143,7 @@ begin data. 2 end data. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-m.csv quick-miss.sps]) @@ -144,7 +159,7 @@ begin data. 2 end data. -QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (100). +QUICK CLUSTER x /CRITERIA = CLUSTER(4) MXITER (10). ]) AT_CHECK([pspp -o pspp-nm.csv quick-nmiss.sps]) @@ -155,6 +170,12 @@ AT_CLEANUP AT_SETUP([QUICK CLUSTER with pairwise missing]) + +dnl This test runs two programs, which are identical except that one +dnl has an extra case with one missing value. Because the syntax uses +dnl NOINITIAL and NOUPDATE, the results should be identical except for +dnl the final classification. + AT_DATA([quick-s.sps], [dnl data list notable list /x * y *. begin data. @@ -169,16 +190,15 @@ begin data. 3.4 3 3.5 2.5 3.1 2.0 -3.9 2.5 -3.8 2.0 end data. QUICK CLUSTER x y - /CRITERIA = CLUSTER(3) MXITER (100) + /PRINT = INITIAL + /CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE . ]) -AT_CHECK([pspp -O format=csv quick-s.sps | tail -5 > pspp-s.csv]) +AT_CHECK([pspp -O format=csv quick-s.sps > pspp-s.csv]) AT_DATA([quick-pw.sps], [dnl data list notable list /x * y *. @@ -194,19 +214,28 @@ begin data. 3.4 3 3.5 2.5 3.1 2.0 -3.9 . -3.8 . +. 2.3 end data. QUICK CLUSTER x y - /CRITERIA = CLUSTER(3) MXITER (100) + /CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE + /PRINT = INITIAL /MISSING = PAIRWISE . ]) -AT_CHECK([pspp -O format=csv quick-pw.sps | tail -5 > pspp-pw.csv]) +AT_CHECK([pspp -O format=csv quick-pw.sps > pspp-pw.csv]) + +AT_CHECK([head -n 13 pspp-s.csv > top-s.csv]) +AT_CHECK([head -n 13 pspp-pw.csv > top-pw.csv]) +AT_CHECK([diff top-s.csv top-pw.csv]) + -AT_CHECK([diff pspp-s.csv pspp-pw.csv], [0]) +AT_CHECK([grep Valid pspp-s.csv], [0], [Valid,,11 +]) + +AT_CHECK([grep Valid pspp-pw.csv], [0], [Valid,,12 +]) AT_CLEANUP @@ -249,7 +278,7 @@ AT_CLEANUP AT_SETUP([QUICK CLUSTER /PRINT subcommand]) AT_DATA([quick-cluster.sps], [dnl -data list notable list /cluster (A8) x (F) y (F). +data list notable list /cluster (A8) x y (F8.0). begin data. A 10.45 9.38 A 10.67 9.17 @@ -318,21 +347,18 @@ QUICK CLUSTER x y AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl Table: Initial Cluster Centers ,Cluster,, -,,, ,1,2,3 -,,, -x,0,0,1 -y,0,1,0 +x,-11,-12,11 +y,-12,11,11 Table: Final Cluster Centers ,Cluster,, -,,, ,1,2,3 -,,, x,-10,-10,10 y,-10,10,10 Table: Number of Cases in each Cluster +,,Count Cluster,1,20 ,2,19 ,3,18 @@ -400,3 +426,162 @@ Case Number,Cluster ]) AT_CLEANUP + + +dnl Test for a crash which happened on bad input syntax +AT_SETUP([QUICK CLUSTER -- Empty Parentheses]) + +AT_DATA([empty-parens.sps], [dnl +data list notable list /x * y *. +begin data. +1 2 +1 2.2 +end data. + +QUICK CLUSTER x y + /CRITERIA = CONVERGE() + . +]) + +AT_CHECK([pspp -o pspp.csv empty-parens.sps], [1], [ignore]) + +AT_CLEANUP + + + +AT_SETUP([QUICK CLUSTER with save]) +AT_DATA([quick-cluster.sps], [dnl +DATA LIST notable LIST /x y z. +BEGIN DATA. +22,2930,4099 +17,3350,4749 +22,2640,3799 +20, 3250,4816 +15,4080,7827 +4,5,4 +5,6,5 +6,7,6 +7,8,7 +8,9,8 +9,10,9 +END DATA. +QUICK CLUSTER x y z + /CRITERIA=CLUSTER(2) MXITER(20) + /SAVE = CLUSTER (cluster) DISTANCE (distance). + +list. +]) + +AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl +Table: Final Cluster Centers +,Cluster, +,1,2 +x,6.50,19.20 +y,7.50,3250.00 +z,6.50,5058.00 + +Table: Number of Cases in each Cluster +,,Count +Cluster,1,6 +,2,5 +Valid,,11 + +Table: Data List +x,y,z,cluster,distance +22.00,2930.00,4099.00,2.00,1010.98 +17.00,3350.00,4749.00,2.00,324.79 +22.00,2640.00,3799.00,2.00,1399.00 +20.00,3250.00,4816.00,2.00,242.00 +15.00,4080.00,7827.00,2.00,2890.72 +4.00,5.00,4.00,1.00,4.33 +5.00,6.00,5.00,1.00,2.60 +6.00,7.00,6.00,1.00,.87 +7.00,8.00,7.00,1.00,.87 +8.00,9.00,8.00,1.00,2.60 +9.00,10.00,9.00,1.00,4.33 +]) +AT_CLEANUP + + +AT_SETUP([QUICK CLUSTER with single save]) +AT_DATA([quick-cluster.sps], [dnl +DATA LIST notable LIST /x y z. +BEGIN DATA. +22,2930,4099 +17,3350,4749 +22,2640,3799 +20, 3250,4816 +15,4080,7827 +4,5,4 +5,6,5 +6,7,6 +7,8,7 +8,9,8 +9,10,9 +END DATA. +QUICK CLUSTER x y z + /CRITERIA=CLUSTER(2) MXITER(20) + /SAVE = DISTANCE. + +list. +]) + +AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl +Table: Final Cluster Centers +,Cluster, +,1,2 +x,6.50,19.20 +y,7.50,3250.00 +z,6.50,5058.00 + +Table: Number of Cases in each Cluster +,,Count +Cluster,1,6 +,2,5 +Valid,,11 + +Table: Data List +x,y,z,QCL_0 +22.00,2930.00,4099.00,1010.98 +17.00,3350.00,4749.00,324.79 +22.00,2640.00,3799.00,1399.00 +20.00,3250.00,4816.00,242.00 +15.00,4080.00,7827.00,2890.72 +4.00,5.00,4.00,4.33 +5.00,6.00,5.00,2.60 +6.00,7.00,6.00,.87 +7.00,8.00,7.00,.87 +8.00,9.00,8.00,2.60 +9.00,10.00,9.00,4.33 +]) +AT_CLEANUP + + +dnl This one was noticed to crash at one point. +AT_SETUP([QUICK CLUSTER crash on bizarre input]) +AT_DATA([badn.sps], [dnl +data list notable list /x. +begin da\a* +22 +17 +22 +22 +15 +4, +5, +6, +7,j8, +9, +end data. + +quick cluster x +" /criteria=cluster(2) mxiter(20) + /save = distance + . + +list. +]) + +AT_CHECK([pspp -O format=csv badn.sps], [1], [ignore]) + +AT_CLEANUP