dnl PSPP - a program for statistical analysis.
dnl Copyright (C) 2017 Free Software Foundation, Inc.
-dnl
+dnl
dnl This program is free software: you can redistribute it and/or modify
dnl it under the terms of the GNU General Public License as published by
dnl the Free Software Foundation, either version 3 of the License, or
dnl (at your option) any later version.
-dnl
+dnl
dnl This program is distributed in the hope that it will be useful,
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
dnl GNU General Public License for more details.
-dnl
+dnl
dnl You should have received a copy of the GNU General Public License
dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
-dnl AT_BANNER([QUICK CLUSTER])
+dnl
+AT_BANNER([QUICK CLUSTER])
AT_SETUP([QUICK CLUSTER with small data set])
AT_DATA([quick-cluster.sps], [dnl
Table: Final Cluster Centers
,Cluster,
-,,
,1,2
-,,
x,6.50,19.20
y,7.50,3250.00
z,6.50,5058.00
Table: Number of Cases in each Cluster
+,,Count
Cluster,1,6
,2,5
Valid,,11
AT_CLEANUP
AT_SETUP([QUICK CLUSTER with large data set])
+AT_KEYWORDS([slow])
AT_DATA([quick-cluster.sps], [dnl
input program.
loop #i = 1 to 50000.
AT_CHECK([cat pspp.csv], [0], [dnl
Table: Final Cluster Centers
,Cluster,,,
-,,,,
,1,2,3,4
-,,,,
x,NaN,NaN,NaN,3.00
Table: Number of Cases in each Cluster
+,,Count
Cluster,1,0
,2,0
,3,0
AT_SETUP([QUICK CLUSTER with pairwise missing])
dnl This test runs two programs, which are identical except that one
-dnl has an extra case with one missing value. Becuase the syntax uses
+dnl has an extra case with one missing value. Because the syntax uses
dnl NOINITIAL and NOUPDATE, the results should be identical except for
dnl the final classification.
3.1 2.0
end data.
-QUICK CLUSTER x y
+QUICK CLUSTER x y
/PRINT = INITIAL
/CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE
.
. 2.3
end data.
-QUICK CLUSTER x y
+QUICK CLUSTER x y
/CRITERIA = CLUSTER(3) NOINITIAL NOUPDATE
/PRINT = INITIAL
/MISSING = PAIRWISE
AT_CHECK([pspp -O format=csv quick-pw.sps > pspp-pw.csv])
-AT_CHECK([head -n 18 pspp-s.csv > top-s.csv])
-AT_CHECK([head -n 18 pspp-pw.csv > top-pw.csv])
+AT_CHECK([head -n 13 pspp-s.csv > top-s.csv])
+AT_CHECK([head -n 13 pspp-pw.csv > top-pw.csv])
AT_CHECK([diff top-s.csv top-pw.csv])
1 2.2
end data.
-QUICK CLUSTER x y
+QUICK CLUSTER x y
/CRITERIA = CLUSTER(0)
.
])
AT_SETUP([QUICK CLUSTER /PRINT subcommand])
AT_DATA([quick-cluster.sps], [dnl
-data list notable list /cluster (A8) x (F) y (F).
+data list notable list /cluster (A8) x y (F8.0).
begin data.
A 10.45 9.38
A 10.67 9.17
AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl
Table: Initial Cluster Centers
,Cluster,,
-,,,
,1,2,3
-,,,
x,-11,-12,11
y,-12,11,11
Table: Final Cluster Centers
,Cluster,,
-,,,
,1,2,3
-,,,
x,-10,-10,10
y,-10,10,10
Table: Number of Cases in each Cluster
+,,Count
Cluster,1,20
,2,19
,3,18
AT_CLEANUP
+
+
+AT_SETUP([QUICK CLUSTER with save])
+AT_DATA([quick-cluster.sps], [dnl
+DATA LIST notable LIST /x y z.
+BEGIN DATA.
+22,2930,4099
+17,3350,4749
+22,2640,3799
+20, 3250,4816
+15,4080,7827
+4,5,4
+5,6,5
+6,7,6
+7,8,7
+8,9,8
+9,10,9
+END DATA.
+QUICK CLUSTER x y z
+ /CRITERIA=CLUSTER(2) MXITER(20)
+ /SAVE = CLUSTER (cluster) DISTANCE (distance).
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl
+Table: Final Cluster Centers
+,Cluster,
+,1,2
+x,6.50,19.20
+y,7.50,3250.00
+z,6.50,5058.00
+
+Table: Number of Cases in each Cluster
+,,Count
+Cluster,1,6
+,2,5
+Valid,,11
+
+Table: Data List
+x,y,z,cluster,distance
+22.00,2930.00,4099.00,2.00,1010.98
+17.00,3350.00,4749.00,2.00,324.79
+22.00,2640.00,3799.00,2.00,1399.00
+20.00,3250.00,4816.00,2.00,242.00
+15.00,4080.00,7827.00,2.00,2890.72
+4.00,5.00,4.00,1.00,4.33
+5.00,6.00,5.00,1.00,2.60
+6.00,7.00,6.00,1.00,.87
+7.00,8.00,7.00,1.00,.87
+8.00,9.00,8.00,1.00,2.60
+9.00,10.00,9.00,1.00,4.33
+])
+AT_CLEANUP
+
+
+AT_SETUP([QUICK CLUSTER with single save])
+AT_DATA([quick-cluster.sps], [dnl
+DATA LIST notable LIST /x y z.
+BEGIN DATA.
+22,2930,4099
+17,3350,4749
+22,2640,3799
+20, 3250,4816
+15,4080,7827
+4,5,4
+5,6,5
+6,7,6
+7,8,7
+8,9,8
+9,10,9
+END DATA.
+QUICK CLUSTER x y z
+ /CRITERIA=CLUSTER(2) MXITER(20)
+ /SAVE = DISTANCE.
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv quick-cluster.sps], [0], [dnl
+Table: Final Cluster Centers
+,Cluster,
+,1,2
+x,6.50,19.20
+y,7.50,3250.00
+z,6.50,5058.00
+
+Table: Number of Cases in each Cluster
+,,Count
+Cluster,1,6
+,2,5
+Valid,,11
+
+Table: Data List
+x,y,z,QCL_0
+22.00,2930.00,4099.00,1010.98
+17.00,3350.00,4749.00,324.79
+22.00,2640.00,3799.00,1399.00
+20.00,3250.00,4816.00,242.00
+15.00,4080.00,7827.00,2890.72
+4.00,5.00,4.00,4.33
+5.00,6.00,5.00,2.60
+6.00,7.00,6.00,.87
+7.00,8.00,7.00,.87
+8.00,9.00,8.00,2.60
+9.00,10.00,9.00,4.33
+])
+AT_CLEANUP
+
+
+dnl This one was noticed to crash at one point.
+AT_SETUP([QUICK CLUSTER crash on bizarre input])
+AT_DATA([badn.sps], [dnl
+data list notable list /x.
+begin da\a*
+22
+17
+22
+22
+15
+4,
+5,
+6,
+7,j8,
+9,
+end data.
+
+quick cluster x
+" /criteria=cluster(2) mxiter(20)
+ /save = distance
+ .
+
+list.
+])
+
+AT_CHECK([pspp -O format=csv badn.sps], [1], [ignore])
+
+AT_CLEANUP