DESCRIPTIVES: Fix treatment of FILTER in presence of Z scores.
[pspp] / tests / language / stats / descriptives.at
1 AT_BANNER([DESCRIPTIVES procedure])
2
3 AT_SETUP([DESCRIPTIVES basics])
4 AT_DATA([descriptives.sps],
5   [title 'Test DESCRIPTIVES procedure'.
6
7 data list / V0 to V16 1-17.
8 begin data.
9 12128989012389023
10 34128080123890128
11 56127781237893217
12 78127378123793112
13 90913781237892318
14 37978547878935789
15 52878237892378279
16 12377912789378932
17 26787654347894348
18 29137178947891888
19 end data.
20
21 descript all/stat=all/format=serial.
22 ])
23 AT_CHECK([pspp -O format=csv descriptives.sps], [0],
24   [Title: Test DESCRIPTIVES procedure
25
26 Table: Reading 1 record from INLINE.
27 Variable,Record,Columns,Format
28 V0,1,1-  1,F1.0
29 V1,1,2-  2,F1.0
30 V2,1,3-  3,F1.0
31 V3,1,4-  4,F1.0
32 V4,1,5-  5,F1.0
33 V5,1,6-  6,F1.0
34 V6,1,7-  7,F1.0
35 V7,1,8-  8,F1.0
36 V8,1,9-  9,F1.0
37 V9,1,10- 10,F1.0
38 V10,1,11- 11,F1.0
39 V11,1,12- 12,F1.0
40 V12,1,13- 13,F1.0
41 V13,1,14- 14,F1.0
42 V14,1,15- 15,F1.0
43 V15,1,16- 16,F1.0
44 V16,1,17- 17,F1.0
45
46 Table: Valid cases = 10; cases with missing value(s) = 0.
47 Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum
48 V0,10,0,3.80,.84,2.66,7.07,-.03,1.33,.89,.69,8.00,1.00,9.00,38.00
49 V1,10,0,4.60,.96,3.03,9.16,-1.39,1.33,-.03,.69,9.00,.00,9.00,46.00
50 V2,10,0,4.10,1.16,3.67,13.43,-2.02,1.33,.48,.69,8.00,1.00,9.00,41.00
51 V3,10,0,4.10,.87,2.77,7.66,-2.05,1.33,.42,.69,7.00,1.00,8.00,41.00
52 V4,10,0,7.00,.47,1.49,2.22,7.15,1.33,-2.52,.69,5.00,3.00,8.00,70.00
53 V5,10,0,4.90,1.03,3.25,10.54,-1.40,1.33,-.20,.69,9.00,.00,9.00,49.00
54 V6,10,0,5.90,.80,2.51,6.32,-.29,1.33,-.96,.69,7.00,1.00,8.00,59.00
55 V7,10,0,4.70,1.10,3.47,12.01,-1.99,1.33,-.16,.69,9.00,.00,9.00,47.00
56 V8,10,0,4.10,1.10,3.48,12.10,-1.93,1.33,.37,.69,9.00,.00,9.00,41.00
57 V9,10,0,4.30,.87,2.75,7.57,-.87,1.33,.73,.69,8.00,1.00,9.00,43.00
58 V10,10,0,5.50,.85,2.68,7.17,-1.84,1.33,-.33,.69,7.00,2.00,9.00,55.00
59 V11,10,0,6.50,.78,2.46,6.06,-1.28,1.33,-.89,.69,6.00,3.00,9.00,65.00
60 V12,10,0,7.90,.60,1.91,3.66,5.24,1.33,-2.21,.69,6.00,3.00,9.00,79.00
61 V13,10,0,4.30,.99,3.13,9.79,-1.25,1.33,.33,.69,9.00,.00,9.00,43.00
62 V14,10,0,3.60,1.01,3.20,10.27,-.96,1.33,.81,.69,9.00,.00,9.00,36.00
63 V15,10,0,3.70,.92,2.91,8.46,-1.35,1.33,.71,.69,7.00,1.00,8.00,37.00
64 V16,10,0,6.40,.91,2.88,8.27,-1.14,1.33,-.92,.69,7.00,2.00,9.00,64.00
65 ])
66 AT_CLEANUP
67
68 m4_define([DESCRIPTIVES_MISSING_DATA],
69   [data list notable / V1 TO V3 1-3.
70 mis val v1 to v3 (1).
71 begin data.
72 111
73    
74  1 
75 1 1
76 112
77 123
78 234
79 end data.
80 ])
81
82 AT_SETUP([DESCRIPTIVES -- excluding missing data])
83 AT_DATA([descriptives.sps],
84   [DESCRIPTIVES_MISSING_DATA
85 descript all/stat=all/format=serial.
86 ])
87 AT_CHECK([pspp -O format=csv descriptives.sps], [0],
88   [Table: Valid cases = 7; cases with missing value(s) = 6.
89 Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum
90 V1,1,6,2.00,.  ,.  ,.  ,.  ,.  ,.  ,.  ,.00,2.00,2.00,2.00
91 V2,2,5,2.50,.50,.71,.50,.  ,.  ,.  ,.  ,1.00,2.00,3.00,5.00
92 V3,3,4,3.00,.58,1.00,1.00,.  ,.  ,.00,1.22,2.00,2.00,4.00,9.00
93 ])
94 AT_CLEANUP
95
96 AT_SETUP([DESCRIPTIVES -- including missing data])
97 AT_DATA([descriptives.sps],
98   [DESCRIPTIVES_MISSING_DATA
99 descript all/stat=all/format=serial/missing=include.
100 ])
101 AT_CHECK([pspp -O format=csv descriptives.sps], [0],
102   [Table: Valid cases = 7; cases with missing value(s) = 3.
103 Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum
104 V1,5,2,1.20,.20,.45,.20,5.00,2.00,2.24,.91,1.00,1.00,2.00,6.00
105 V2,5,2,1.60,.40,.89,.80,.31,2.00,1.26,.91,2.00,1.00,3.00,8.00
106 V3,5,2,2.20,.58,1.30,1.70,-1.49,2.00,.54,.91,3.00,1.00,4.00,11.00
107 ])
108 AT_CLEANUP
109
110 AT_SETUP([DESCRIPTIVES -- excluding missing data listwise])
111 AT_DATA([descriptives.sps],
112   [DESCRIPTIVES_MISSING_DATA
113 descript all/stat=all/format=serial/missing=listwise.
114 ])
115 AT_CHECK([pspp -O format=csv descriptives.sps], [0],
116   [Table: Valid cases = 1; cases with missing value(s) = 6.
117 Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum
118 V1,1,0,2.00,.  ,.  ,.  ,.  ,.  ,.  ,.  ,.00,2.00,2.00,2.00
119 V2,1,0,3.00,.  ,.  ,.  ,.  ,.  ,.  ,.  ,.00,3.00,3.00,3.00
120 V3,1,0,4.00,.  ,.  ,.  ,.  ,.  ,.  ,.  ,.00,4.00,4.00,4.00
121 ])
122 AT_CLEANUP
123
124 AT_SETUP([DESCRIPTIVES -- including missing data listwise])
125 AT_DATA([descriptives.sps],
126   [DESCRIPTIVES_MISSING_DATA
127 descript all/stat=all/format=serial/missing=listwise include.
128 ])
129 AT_CHECK([pspp -O format=csv descriptives.sps], [0],
130   [Table: Valid cases = 4; cases with missing value(s) = 3.
131 Variable,Valid N,Missing N,Mean,S.E. Mean,Std Dev,Variance,Kurtosis,S.E. Kurt,Skewness,S.E. Skew,Range,Minimum,Maximum,Sum
132 V1,4,0,1.25,.25,.50,.25,4.00,2.62,2.00,1.01,1.00,1.00,2.00,5.00
133 V2,4,0,1.75,.48,.96,.92,-1.29,2.62,.85,1.01,2.00,1.00,3.00,7.00
134 V3,4,0,2.50,.65,1.29,1.67,-1.20,2.62,.00,1.01,3.00,1.00,4.00,10.00
135 ])
136 AT_CLEANUP
137
138 AT_SETUP([DESCRIPTIVES bug calculating mean only])
139 AT_DATA([descriptives.sps],
140   [SET FORMAT F8.3.
141
142 data list notable / X 1.
143 begin data.
144 0
145 1
146 2
147 3
148 4
149 5
150 end data.
151
152 descript all/stat=mean.
153 ])
154 AT_CHECK([pspp -O format=csv descriptives.sps], [0],
155   [Table: Valid cases = 6; cases with missing value(s) = 0.
156 Variable,N,Mean
157 X,6,2.500
158 ])
159 AT_CLEANUP
160
161 dnl Git history shows that this was probably a bug in the PSPP
162 dnl core regarding multipass procedures, not anything specific
163 dnl to DESCRIPTIVES.
164 AT_SETUP([DESCRIPTIVES bug with TEMPORARY])
165 AT_DATA([descriptives.sps], [dnl
166 DATA LIST LIST NOTABLE /id * abc *.
167 BEGIN DATA.
168 1 3.5
169 2 2.0
170 3 2.0
171 4 3.5
172 5 3.0
173 6 4.0
174 7 5.0
175 END DATA.
176
177 TEMPORARY.
178 SELECT IF id < 7 .
179
180 DESCRIPTIVES /VAR=abc.
181 ])
182 AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl
183 Table: Valid cases = 6; cases with missing value(s) = 0.
184 Variable,N,Mean,Std Dev,Minimum,Maximum
185 abc,6,3.00,.84,2.00,4.00
186 ])
187 AT_CLEANUP
188
189 AT_SETUP([DESCRIPTIVES -- Z scores])
190 AT_DATA([descriptives.sps], [dnl
191 DATA LIST LIST NOTABLE /a b.
192 BEGIN DATA.
193 1 50
194 2 60
195 3 70
196 END DATA.
197
198 DESCRIPTIVES /VAR=a b /SAVE.
199 LIST.
200 ])
201 AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl
202 Table: Mapping of variables to corresponding Z-scores.
203 Source,Target
204 a,Za
205 b,Zb
206
207 Table: Valid cases = 3; cases with missing value(s) = 0.
208 Variable,N,Mean,Std Dev,Minimum,Maximum
209 a,3,2.00,1.00,1.00,3.00
210 b,3,60.00,10.00,50.00,70.00
211
212 Table: Data List
213 a,b,Za,Zb
214 1.00,50.00,-1.00,-1.00
215 2.00,60.00,.00,.00
216 3.00,70.00,1.00,1.00
217 ])
218 AT_CLEANUP
219
220 AT_SETUP([DESCRIPTIVES -- Z scores with SPLIT FILE])
221 AT_DATA([descriptives.sps], [dnl
222 DATA LIST LIST NOTABLE /group a b.
223 BEGIN DATA.
224 1 1 50
225 1 2 60
226 1 3 70
227 2 100 6000
228 2 200 7000
229 2 400 9000
230 2 500 10000
231 END DATA.
232
233 SPLIT FILE BY group.
234 DESCRIPTIVES /VAR=a b /SAVE.
235 LIST.
236 ])
237 AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl
238 Table: Mapping of variables to corresponding Z-scores.
239 Source,Target
240 a,Za
241 b,Zb
242
243 Variable,Value,Label
244 group,1.00,
245
246 Table: Valid cases = 3; cases with missing value(s) = 0.
247 Variable,N,Mean,Std Dev,Minimum,Maximum
248 a,3,2.00,1.00,1.00,3.00
249 b,3,60.00,10.00,50.00,70.00
250
251 Variable,Value,Label
252 group,2.00,
253
254 Table: Valid cases = 4; cases with missing value(s) = 0.
255 Variable,N,Mean,Std Dev,Minimum,Maximum
256 a,4,300.00,182.57,100.00,500.00
257 b,4,8000.00,1825.74,6000.00,10000.00
258
259 Variable,Value,Label
260 group,1.00,
261
262 Table: Data List
263 group,a,b,Za,Zb
264 1.00,1.00,50.00,-1.00,-1.00
265 1.00,2.00,60.00,.00,.00
266 1.00,3.00,70.00,1.00,1.00
267
268 Variable,Value,Label
269 group,2.00,
270
271 Table: Data List
272 group,a,b,Za,Zb
273 2.00,100.00,6000.00,-1.10,-1.10
274 2.00,200.00,7000.00,-.55,-.55
275 2.00,400.00,9000.00,.55,.55
276 2.00,500.00,10000.00,1.10,1.10
277 ])
278 AT_CLEANUP
279
280 dnl Ideally DESCRIPTIVES would not make temporary transformations permanent
281 dnl as it does now (bug #38786), so these results are imperfect.  However,
282 dnl this test does verify that DESCRIPTIVES does not crash in this situation
283 dnl (as it once did).
284 AT_SETUP([DESCRIPTIVES -- Z scores bug with TEMPORARY])
285 AT_DATA([descriptives.sps], [dnl
286 DATA LIST LIST NOTABLE /id abc.
287 BEGIN DATA.
288 1 3.5
289 2 2.0
290 3 2.0
291 4 3.5
292 5 3.0
293 6 4.0
294 7 5.0
295 END DATA.
296
297 TEMPORARY.
298 SELECT IF id < 7 .
299
300 DESCRIPTIVES /VAR=abc/SAVE.
301 LIST.
302 ])
303 AT_CHECK([pspp -O format=csv descriptives.sps], [0], [dnl
304 descriptives.sps:15: warning: DESCRIPTIVES: DESCRIPTIVES with Z scores ignores TEMPORARY.  Temporary transformations will be made permanent.
305
306 Table: Mapping of variables to corresponding Z-scores.
307 Source,Target
308 abc,Zabc
309
310 Table: Valid cases = 6; cases with missing value(s) = 0.
311 Variable,N,Mean,Std Dev,Minimum,Maximum
312 abc,6,3.00,.84,2.00,4.00
313
314 Table: Data List
315 id,abc,Zabc
316 1.00,3.50,.60
317 2.00,2.00,-1.20
318 3.00,2.00,-1.20
319 4.00,3.50,.60
320 5.00,3.00,.00
321 6.00,4.00,1.20
322 ])
323 AT_CLEANUP
324
325 dnl This test was supplied by Mindaugus as part of the report for bug #42012.
326 AT_SETUP([DESCRIPTIVES -- Z scores with FILTER])
327 AT_DATA([descriptives.sps], [dnl
328 DATA LIST LIST/filter1 filter2 x.
329 BEGIN DATA.
330 0,0,300
331 0,1,200
332 0,1,100
333 1,0,5
334 1,0,4
335 1,1,3
336 1,1,2
337 1,1,1
338 END DATA.
339
340 FILTER OFF.
341 SPLIT FILE OFF.
342 DESCRIPTIVES /VARIABLES=X /SAVE.
343
344 FILTER BY filter1.
345 SPLIT FILE OFF.
346 DESCRIPTIVES /VARIABLES=X /SAVE.
347
348 FILTER OFF.
349 SORT CASES BY filter1.
350 SPLIT FILE BY filter1.
351 DESCRIPTIVES /VARIABLES=X /SAVE.
352
353 FILTER BY filter2.
354 SPLIT FILE BY filter1.
355 DESCRIPTIVES /VARIABLES=X /SAVE.
356
357 FILTER OFF.
358 SORT CASES BY filter1 filter2.
359 SPLIT FILE BY filter1 filter2.
360 DESCRIPTIVES /VARIABLES=X /SAVE.
361 EXECUTE.
362
363 SPLIT FILE OFF.
364 LIST.
365 ])
366 AT_CHECK([pspp -o pspp.csv descriptives.sps])
367 AT_CHECK([sed -n '/Table: Data List/,$p' < pspp.csv], [0], [dnl
368 Table: Data List
369 filter1,filter2,x,Zx,ZSC001,ZSC002,ZSC003,ZSC004
370 .00,.00,300.00,1.94,.  ,1.00,.  ,.  @&t@
371 .00,1.00,200.00,1.07,.  ,.00,.71,.71
372 .00,1.00,100.00,.20,.  ,-1.00,-.71,-.71
373 1.00,.00,5.00,-.62,1.26,1.26,.  ,.71
374 1.00,.00,4.00,-.63,.63,.63,.  ,-.71
375 1.00,1.00,3.00,-.64,.00,.00,1.00,1.00
376 1.00,1.00,2.00,-.65,-.63,-.63,.00,.00
377 1.00,1.00,1.00,-.66,-1.26,-1.26,-1.00,-1.00
378 ])
379 AT_CLEANUP