+dnl PSPP - a program for statistical analysis.
+dnl Copyright (C) 2017 Free Software Foundation, Inc.
+dnl
+dnl This program is free software: you can redistribute it and/or modify
+dnl it under the terms of the GNU General Public License as published by
+dnl the Free Software Foundation, either version 3 of the License, or
+dnl (at your option) any later version.
+dnl
+dnl This program is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+dnl GNU General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU General Public License
+dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
+dnl
AT_BANNER([QUICK CLUSTER])
AT_SETUP([QUICK CLUSTER with small data set])
Table: Final Cluster Centers
,Cluster,
-,,
,1,2
-,,
x,6.50,19.20
y,7.50,3250.00
z,6.50,5058.00
Table: Number of Cases in each Cluster
+,,Count
Cluster,1,6
,2,5
Valid,,11
AT_CLEANUP
AT_SETUP([QUICK CLUSTER with large data set])
+AT_KEYWORDS([slow])
AT_DATA([quick-cluster.sps], [dnl
input program.
loop #i = 1 to 50000.
AT_CHECK([cat pspp.csv], [0], [dnl
Table: Final Cluster Centers
,Cluster,,,
-,,,,
,1,2,3,4
-,,,,
x,NaN,NaN,NaN,3.00
Table: Number of Cases in each Cluster
+,,Count
Cluster,1,0
,2,0
,3,0
AT_SETUP([QUICK CLUSTER with pairwise missing])
dnl This test runs two programs, which are identical except that one
-dnl has an extra case with one missing value. Becuase the syntax uses
+dnl has an extra case with one missing value. Because the syntax uses
dnl NOINITIAL and NOUPDATE, the results should be identical except for
dnl the final classification.
3.1 2.0
end data.
-QUICK CLUSTER x y
+QUICK CLUSTER x y
/PRINT = INITIAL
/CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE
.
. 2.3
end data.
-QUICK CLUSTER x y
+QUICK CLUSTER x y
/CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE
/PRINT = INITIAL
/MISSING = PAIRWISE
AT_CHECK([pspp -O format=csv quick-pw.sps > pspp-pw.csv])
-AT_CHECK([head -n 18 pspp-s.csv > top-s.csv])
-AT_CHECK([head -n 18 pspp-pw.csv > top-pw.csv])
+AT_CHECK([head -n 13 pspp-s.csv > top-s.csv])
+AT_CHECK([head -n 13 pspp-pw.csv > top-pw.csv])
AT_CHECK([diff top-s.csv top-pw.csv])
1 2.2
end data.
-QUICK CLUSTER x y
+QUICK CLUSTER x y
/CRITERIA = CLUSTER(0)
.
])
AT_SETUP([QUICK CLUSTER /PRINT subcommand])
AT_DATA([quick-cluster.sps], [dnl
-data list notable list /cluster (A8) x (F) y (F).
+data list notable list /cluster (A8) x y (F8.0).
begin data.
A 10.45 9.38
A 10.67 9.17
AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl
Table: Initial Cluster Centers
,Cluster,,
-,,,
,1,2,3
-,,,
x,-11,-12,11
y,-12,11,11
Table: Final Cluster Centers
,Cluster,,
-,,,
,1,2,3
-,,,
x,-10,-10,10
y,-10,10,10
Table: Number of Cases in each Cluster
+,,Count
Cluster,1,20
,2,19
,3,18
AT_CLEANUP
+
+
+AT_SETUP([QUICK CLUSTER with save])
+AT_DATA([quick-cluster.sps], [dnl
+DATA LIST notable LIST /x y z.
+BEGIN DATA.
+22,2930,4099
+17,3350,4749
+22,2640,3799
+20, 3250,4816
+15,4080,7827
+4,5,4
+5,6,5
+6,7,6
+7,8,7
+8,9,8
+9,10,9
+END DATA.
+QUICK CLUSTER x y z
+ /CRITERIA=CLUSTER(2) MXITER(20)
+ /SAVE = CLUSTER (cluster) DISTANCE (distance).
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl
+Table: Final Cluster Centers
+,Cluster,
+,1,2
+x,6.50,19.20
+y,7.50,3250.00
+z,6.50,5058.00
+
+Table: Number of Cases in each Cluster
+,,Count
+Cluster,1,6
+,2,5
+Valid,,11
+
+Table: Data List
+x,y,z,cluster,distance
+22.00,2930.00,4099.00,2.00,1010.98
+17.00,3350.00,4749.00,2.00,324.79
+22.00,2640.00,3799.00,2.00,1399.00
+20.00,3250.00,4816.00,2.00,242.00
+15.00,4080.00,7827.00,2.00,2890.72
+4.00,5.00,4.00,1.00,4.33
+5.00,6.00,5.00,1.00,2.60
+6.00,7.00,6.00,1.00,.87
+7.00,8.00,7.00,1.00,.87
+8.00,9.00,8.00,1.00,2.60
+9.00,10.00,9.00,1.00,4.33
+])
+AT_CLEANUP
+
+
+AT_SETUP([QUICK CLUSTER with single save])
+AT_DATA([quick-cluster.sps], [dnl
+DATA LIST notable LIST /x y z.
+BEGIN DATA.
+22,2930,4099
+17,3350,4749
+22,2640,3799
+20, 3250,4816
+15,4080,7827
+4,5,4
+5,6,5
+6,7,6
+7,8,7
+8,9,8
+9,10,9
+END DATA.
+QUICK CLUSTER x y z
+ /CRITERIA=CLUSTER(2) MXITER(20)
+ /SAVE = DISTANCE.
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl
+Table: Final Cluster Centers
+,Cluster,
+,1,2
+x,6.50,19.20
+y,7.50,3250.00
+z,6.50,5058.00
+
+Table: Number of Cases in each Cluster
+,,Count
+Cluster,1,6
+,2,5
+Valid,,11
+
+Table: Data List
+x,y,z,QCL_0
+22.00,2930.00,4099.00,1010.98
+17.00,3350.00,4749.00,324.79
+22.00,2640.00,3799.00,1399.00
+20.00,3250.00,4816.00,242.00
+15.00,4080.00,7827.00,2890.72
+4.00,5.00,4.00,4.33
+5.00,6.00,5.00,2.60
+6.00,7.00,6.00,.87
+7.00,8.00,7.00,.87
+8.00,9.00,8.00,2.60
+9.00,10.00,9.00,4.33
+])
+AT_CLEANUP
+
+
+dnl This one was noticed to crash at one point.
+AT_SETUP([QUICK CLUSTER crash on bizarre input])
+AT_DATA([badn.sps], [dnl
+data list notable list /x.
+begin da\a*
+22
+17
+22
+22
+15
+4,
+5,
+6,
+7,j8,
+9,
+end data.
+
+quick cluster x
+" /criteria=cluster(2) mxiter(20)
+ /save = distance
+ .
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv badn.sps], [1], [ignore])
+
+AT_CLEANUP