1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
17 AT_BANNER([MEANS procedure])
19 AT_SETUP([MEANS simple])
20 AT_KEYWORDS([categorical categoricals])
22 AT_DATA([means-simple.sps], [dnl
23 data list notable list /hand * score * w *.
34 means tables = score by hand
38 AT_CHECK([pspp -O format=csv means-simple.sps], [0], [dnl
39 Table: Case Processing Summary
41 ,Included,,Excluded,,Total,
42 ,N,Percent,N,Percent,N,Percent
43 score * hand,19,100.0%,0,.0%,19,100.0%
54 AT_SETUP([MEANS very simple])
55 AT_KEYWORDS([categorical categoricals])
57 AT_DATA([very-simple.sps], [dnl
58 data list notable list /score *.
85 AT_CHECK([pspp -O format=csv very-simple.sps], [0], [dnl
86 Table: Case Processing Summary
88 ,Included,,Excluded,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 score,19,100.0%,0,.0%,19,100.0%
100 AT_SETUP([MEANS empty factor spec])
101 AT_KEYWORDS([categorical categoricals])
103 AT_DATA([means-bad.sps], [dnl
104 data list list /outcome *.
111 MEANS TABLES = outcome
115 AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
121 AT_SETUP([MEANS parser bug])
122 AT_KEYWORDS([categorical categoricals])
124 dnl This bug caused an infinite loop
125 AT_DATA([means-bad.sps], [dnl
126 DATA LIST notable LIST /a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 fylo *.
128 1 2 3 4 5 6 7 8 9 0 11
131 MEANS TABLES = a1 a2 a3 a4 a5 a6 a7 a8 a9 a10a BY fylo.
134 AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
139 dnl This example is based upon info from https://libguides.library.kent.edu/SPSS/CompareMeans
140 AT_SETUP([MEANS default missing behaviour])
141 AT_KEYWORDS([categorical categoricals])
143 AT_DATA([means-missing.sps], [dnl
144 data list notable list /w * score * a * b *.
163 MEANS tables=score by a
166 MEANS tables=score by a by b
170 AT_CHECK([pspp -O format=csv means-missing.sps], [0], [dnl
171 Table: Case Processing Summary
173 ,Included,,Excluded,,Total,
174 ,N,Percent,N,Percent,N,Percent
175 score,392,90.1%,43,9.9%,435,100.0%
181 Table: Case Processing Summary
183 ,Included,,Excluded,,Total,
184 ,N,Percent,N,Percent,N,Percent
185 score * a,392,90.1%,43,9.9%,435,100.0%
193 Table: Case Processing Summary
195 ,Included,,Excluded,,Total,
196 ,N,Percent,N,Percent,N,Percent
197 score * a * b,383,88.0%,52,12.0%,435,100.0%
215 dnl This example from https://www.spss-tutorials.com/spss-means-command/
216 AT_SETUP([MEANS two way])
217 AT_KEYWORDS([categorical categoricals])
219 AT_DATA([means-freelancer.sps], [dnl
220 data list notable list /income_2010 * gender sector_2010.
264 means income_2010 by gender by sector_2010
265 /cells count min mean stddev.
268 AT_CHECK([pspp -O format=csv means-freelancer.sps], [0], [dnl
269 Table: Case Processing Summary
271 ,Included,,Excluded,,Total,
272 ,N,Percent,N,Percent,N,Percent
273 income_2010 * gender * sector_2010,37,92.5%,3,7.5%,40,100.0%
276 gender,sector_2010,N,Minimum,Mean,Std. Deviation
277 .00,1.00,3,26586.48,35652.47,8078.46
278 ,2.00,4,14912.82,28319.78,11482.43
279 ,3.00,2,64857.02,67921.56,4333.91
280 ,4.00,7,45907.58,66849.04,11787.11
281 ,5.00,2,6072.40,32495.63,37368.09
282 ,Total,18,6072.40,49389.68,22371.48
283 1.00,1.00,2,23544.95,28367.79,6820.53
284 ,2.00,3,36205.85,46189.08,11949.93
285 ,3.00,4,29076.24,50083.97,16084.44
286 ,4.00,6,12706.65,45812.78,24995.16
287 ,5.00,4,16338.36,36235.92,14311.04
288 ,Total,19,12706.65,42918.90,17851.64
289 Total,1.00,5,23544.95,32738.60,7757.62
290 ,2.00,7,14912.82,35978.05,14309.27
291 ,3.00,6,29076.24,56029.83,15615.06
292 ,4.00,13,12706.65,57139.99,21187.85
293 ,5.00,6,6072.40,34989.15,20146.69
294 ,Total,37,6072.40,46066.84,20160.12
300 dnl Check that rows are suppressed and that things generally work ok
301 dnl when there are a 2 way instance contains an unbalanced set of
302 dnl categorical values.
303 AT_SETUP([MEANS unbalanced])
304 AT_KEYWORDS([categorical categoricals])
306 AT_DATA([means-unbalanced.sps], [dnl
307 data list notable list /b c x *.
316 * The data above lack a 5 1 case.
324 AT_CHECK([pspp -O format=csv means-unbalanced.sps], [0], [dnl
325 Table: Case Processing Summary
327 ,Included,,Excluded,,Total,
328 ,N,Percent,N,Percent,N,Percent
329 x * b * c,5,100.0%,0,.0%,5,100.0%
348 dnl This example kindly provided by Dana Williams
349 AT_SETUP([MEANS three way])
350 AT_KEYWORDS([categorical categoricals])
352 AT_DATA([means-threeway.sps], [dnl
353 data list notable list /score a b c.
366 means score by a by b by c.
369 AT_CHECK([pspp -O format=csv means-threeway.sps], [0], [dnl
370 Table: Case Processing Summary
372 ,Included,,Excluded,,Total,
373 ,N,Percent,N,Percent,N,Percent
374 score * a * b * c,9,100.0%,0,.0%,9,100.0%
377 a,b,c,Mean,N,Std. Deviation
378 .00,.00,.00,3.00,1,NaN
381 ,,Total,16.00,3,21.66
385 ,Total,.00,4.00,2,1.41
388 ,,Total,11.80,5,16.36
389 1.00,.00,.00,7.00,1,NaN
395 ,Total,.00,8.00,2,1.41
398 Total,.00,.00,5.00,2,2.83
401 ,,Total,12.60,5,16.01
402 ,1.00,.00,7.00,2,2.83
405 ,Total,.00,6.00,4,2.58
408 ,,Total,10.33,9,11.73
413 dnl The above example again, but with string variables for
414 dnl the control vars.
415 AT_SETUP([MEANS three way string])
416 AT_KEYWORDS([categorical categoricals])
418 AT_DATA([means-threeway-string.sps], [dnl
419 data list notable list /score (f22.2) a (a24) b (a16) c (a8).
421 3 fooberrycrumblexzaQ fosilationwereqd zero
422 4 fooberrycrumblexzaQ fosilationwereqd one
423 41 fooberrycrumblexzaQ fosilationwereqd two
424 5 fooberrycrumblexzaQ onlyonekonboys zero
425 6 fooberrycrumblexzaQ onlyonekonboys one
426 7 wontledingbatsXASDF fosilationwereqd zero
427 8 wontledingbatsXASDF fosilationwereqd one
428 9 wontledingbatsXASDF onlyonekonboys zero
429 10 wontledingbatsXASDF onlyonekonboys one
432 means score by a by b by c.
435 AT_CHECK([pspp -O format=csv means-threeway-string.sps], [0], [dnl
436 Table: Case Processing Summary
438 ,Included,,Excluded,,Total,
439 ,N,Percent,N,Percent,N,Percent
440 score * a * b * c,9,100.0%,0,.0%,9,100.0%
443 a,b,c,Mean,N,Std. Deviation
444 fooberrycrumblexzaQ ,fosilationwereqd,one ,4.00,1,NaN
447 ,,Total,16.00,3,21.66
448 ,onlyonekonboys ,one ,6.00,1,NaN
451 ,Total,one ,5.00,2,1.41
454 ,,Total,11.80,5,16.36
455 wontledingbatsXASDF ,fosilationwereqd,one ,8.00,1,NaN
458 ,onlyonekonboys ,one ,10.00,1,NaN
461 ,Total,one ,9.00,2,1.41
464 Total,fosilationwereqd,one ,6.00,2,2.83
467 ,,Total,12.60,5,16.01
468 ,onlyonekonboys ,one ,8.00,2,2.83
471 ,Total,one ,7.00,4,2.58
474 ,,Total,10.33,9,11.73
481 dnl An example with multiple tables
482 AT_SETUP([MEANS multiple tables])
483 AT_KEYWORDS([categorical categoricals])
485 AT_DATA([means-multi-table.sps], [dnl
486 data list notable list /a * b * c * x * y *.
515 means table = x by b by c
521 AT_CHECK([pspp -O format=csv means-multi-table.sps], [0], [dnl
522 Table: Case Processing Summary
524 ,Included,,Excluded,,Total,
525 ,N,Percent,N,Percent,N,Percent
526 x * b * c,24,100.0%,0,.0%,24,100.0%
543 Table: Case Processing Summary
545 ,Included,,Excluded,,Total,
546 ,N,Percent,N,Percent,N,Percent
547 x * b,24,100.0%,0,.0%,24,100.0%
556 Table: Case Processing Summary
558 ,Included,,Excluded,,Total,
559 ,N,Percent,N,Percent,N,Percent
560 y * a * b,24,100.0%,0,.0%,24,100.0%
590 dnl An example with more than one dependent variable.
591 dnl This case uses a somewhat different table layout.
592 AT_SETUP([MEANS multi variable])
593 AT_KEYWORDS([categorical categoricals])
595 AT_DATA([means-multi-variable.sps], [dnl
596 data list notable list /b c x y.
625 table = x y by b by c
629 AT_CHECK([pspp -O format=csv means-multi-variable.sps], [0], [dnl
630 Table: Case Processing Summary
632 ,Included,,Excluded,,Total,
633 ,N,Percent,N,Percent,N,Percent
634 x * b * c,24,100.0%,0,.0%,24,100.0%
635 y * b * c,23,95.8%,1,4.2%,24,100.0%
639 3.00,.00,Mean,654.00,11.00
641 ,,Std. Deviation,.00,.00
642 ,1.00,Mean,789.00,55.00
644 ,,Std. Deviation,.00,.00
645 ,Total,Mean,721.50,33.00
647 ,,Std. Deviation,72.16,23.52
648 4.00,.00,Mean,987.00,99.00
650 ,,Std. Deviation,.00,.00
651 ,1.00,Mean,456.00,44.00
653 ,,Std. Deviation,.00,.00
654 ,Total,Mean,721.50,71.50
656 ,,Std. Deviation,283.83,29.40
657 5.00,.00,Mean,246.00,99.00
659 ,,Std. Deviation,.00,.00
660 ,1.00,Mean,123.00,55.00
662 ,,Std. Deviation,.00,.00
663 ,Total,Mean,184.50,73.86
665 ,,Std. Deviation,65.75,23.52
666 Total,.00,Mean,629.00,67.00
668 ,,Std. Deviation,316.50,44.40
669 ,1.00,Mean,456.00,51.33
671 ,,Std. Deviation,283.98,5.42
672 ,Total,Mean,542.50,58.83
674 ,,Std. Deviation,307.06,31.22
681 dnl This example is based upon one kindly provided by Dana Williams
682 dnl It exercises the most complex case where there are multiple
683 dnl dependent variables AND multiple control variables in each layer.
684 AT_SETUP([MEANS multi combination])
685 AT_KEYWORDS([categorical categoricals])
687 AT_DATA([means-multi-combination.sps], [dnl
688 data list notable list /one two three four five six.
756 recode six (2 = 62) (1 = 61).
757 recode five (2 = 52) (1 = 51).
758 recode four (2 = 42) (1 = 41).
759 recode three (2 = 32) (1 = 31).
763 means tables = one two BY three four BY five six.
766 AT_CHECK([pspp -O format=csv means-multi-combination.sps], [0], [dnl
767 Table: Case Processing Summary
769 ,Included,,Excluded,,Total,
770 ,N,Percent,N,Percent,N,Percent
771 one * three * five,64,100.0%,0,.0%,64,100.0%
772 two * three * five,64,100.0%,0,.0%,64,100.0%
773 one * three * six,64,100.0%,0,.0%,64,100.0%
774 two * three * six,64,100.0%,0,.0%,64,100.0%
775 one * four * five,64,100.0%,0,.0%,64,100.0%
776 two * four * five,64,100.0%,0,.0%,64,100.0%
777 one * four * six,64,100.0%,0,.0%,64,100.0%
778 two * four * six,64,100.0%,0,.0%,64,100.0%
780 Table: one * two * three * five
782 31.00,51.00,Mean,1.50000,1.50000
784 ,,Std. Deviation,.51640,.51640
785 ,52.00,Mean,1.50000,1.50000
787 ,,Std. Deviation,.51640,.51640
788 ,Total,Mean,1.50000,1.50000
790 ,,Std. Deviation,.50800,.50800
791 32.00,51.00,Mean,1.50000,1.50000
793 ,,Std. Deviation,.51640,.51640
794 ,52.00,Mean,1.50000,1.50000
796 ,,Std. Deviation,.51640,.51640
797 ,Total,Mean,1.50000,1.50000
799 ,,Std. Deviation,.50800,.50800
800 Total,51.00,Mean,1.50000,1.50000
802 ,,Std. Deviation,.50800,.50800
803 ,52.00,Mean,1.50000,1.50000
805 ,,Std. Deviation,.50800,.50800
806 ,Total,Mean,1.50000,1.50000
808 ,,Std. Deviation,.50395,.50395
810 Table: one * two * three * six
812 31.00,61.00,Mean,1.50000,1.50000
814 ,,Std. Deviation,.51640,.51640
815 ,62.00,Mean,1.50000,1.50000
817 ,,Std. Deviation,.51640,.51640
818 ,Total,Mean,1.50000,1.50000
820 ,,Std. Deviation,.50800,.50800
821 32.00,61.00,Mean,1.50000,1.50000
823 ,,Std. Deviation,.51640,.51640
824 ,62.00,Mean,1.50000,1.50000
826 ,,Std. Deviation,.51640,.51640
827 ,Total,Mean,1.50000,1.50000
829 ,,Std. Deviation,.50800,.50800
830 Total,61.00,Mean,1.50000,1.50000
832 ,,Std. Deviation,.50800,.50800
833 ,62.00,Mean,1.50000,1.50000
835 ,,Std. Deviation,.50800,.50800
836 ,Total,Mean,1.50000,1.50000
838 ,,Std. Deviation,.50395,.50395
840 Table: one * two * four * five
842 41.00,51.00,Mean,1.50000,1.50000
844 ,,Std. Deviation,.51640,.51640
845 ,52.00,Mean,1.50000,1.50000
847 ,,Std. Deviation,.51640,.51640
848 ,Total,Mean,1.50000,1.50000
850 ,,Std. Deviation,.50800,.50800
851 42.00,51.00,Mean,1.50000,1.50000
853 ,,Std. Deviation,.51640,.51640
854 ,52.00,Mean,1.50000,1.50000
856 ,,Std. Deviation,.51640,.51640
857 ,Total,Mean,1.50000,1.50000
859 ,,Std. Deviation,.50800,.50800
860 Total,51.00,Mean,1.50000,1.50000
862 ,,Std. Deviation,.50800,.50800
863 ,52.00,Mean,1.50000,1.50000
865 ,,Std. Deviation,.50800,.50800
866 ,Total,Mean,1.50000,1.50000
868 ,,Std. Deviation,.50395,.50395
870 Table: one * two * four * six
872 41.00,61.00,Mean,1.50000,1.50000
874 ,,Std. Deviation,.51640,.51640
875 ,62.00,Mean,1.50000,1.50000
877 ,,Std. Deviation,.51640,.51640
878 ,Total,Mean,1.50000,1.50000
880 ,,Std. Deviation,.50800,.50800
881 42.00,61.00,Mean,1.50000,1.50000
883 ,,Std. Deviation,.51640,.51640
884 ,62.00,Mean,1.50000,1.50000
886 ,,Std. Deviation,.51640,.51640
887 ,Total,Mean,1.50000,1.50000
889 ,,Std. Deviation,.50800,.50800
890 Total,61.00,Mean,1.50000,1.50000
892 ,,Std. Deviation,.50800,.50800
893 ,62.00,Mean,1.50000,1.50000
895 ,,Std. Deviation,.50800,.50800
896 ,Total,Mean,1.50000,1.50000
898 ,,Std. Deviation,.50395,.50395
904 dnl This example was observed to cause a crash in the
905 dnl destructor. Found by zzuf.
906 AT_SETUP([MEANS clean up])
907 AT_KEYWORDS([categorical categoricals])
909 AT_DATA([means-bad.sps], [dnl
910 data list notable list /one two three four five six.
918 means tables = one two BY thsee four BY five six.
921 AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
926 dnl Another example which caused a crash.
928 AT_SETUP([MEANS control all missing])
929 AT_KEYWORDS([categorical categoricals])
931 AT_DATA([means-bad.sps], [dnl
932 data list notable list /a * b * y * uu *.
939 means table = b by a by y by uu
943 AT_CHECK([pspp -O format=csv means-bad.sps], [0], [dnl
944 Table: Case Processing Summary
946 ,Included,,Excluded,,Total,
947 ,N,Percent,N,Percent,N,Percent
948 b * a * y * uu,0,.0%,3,100.0%,3,100.0%
950 "warning: The table ""a * y * uu"" has no non-empty control variables. No result for this table will be displayed."
956 dnl Do some tests on the MISSING keyword.
957 AT_SETUP([MEANS missing classes])
958 AT_KEYWORDS([categorical categoricals])
960 AT_DATA([means-missing-classes.sps], [dnl
961 data list notable list /hand * score *.
985 missing values score (99).
986 missing values hand (9).
988 means tables=score by hand
993 means tables=score by hand
998 means tables=score by hand
1004 AT_CHECK([pspp -O format=csv means-missing-classes.sps], [0], [dnl
1005 Table: Case Processing Summary
1007 ,Included,,Excluded,,Total,
1008 ,N,Percent,N,Percent,N,Percent
1009 score * hand,18,90.0%,2,10.0%,20,100.0%
1018 Table: Case Processing Summary
1020 ,Included,,Excluded,,Total,
1021 ,N,Percent,N,Percent,N,Percent
1022 score * hand,19,95.0%,1,5.0%,20,100.0%
1031 Table: Case Processing Summary
1033 ,Included,,Excluded,,Total,
1034 ,N,Percent,N,Percent,N,Percent
1035 score * hand,17,85.0%,3,15.0%,20,100.0%
1047 dnl Make sure that behaviour with SPLIT is correct.
1048 AT_SETUP([MEANS split])
1049 AT_KEYWORDS([categorical categoricals])
1051 AT_DATA([means-split.sps], [dnl
1052 data list notable list /b g *.
1064 means b /cells = count mean.
1067 AT_CHECK([pspp -O format=csv means-split.sps], [0], [dnl
1068 Table: Case Processing Summary
1070 ,Included,,Excluded,,Total,
1071 ,N,Percent,N,Percent,N,Percent
1072 b,4,100.0%,0,.0%,4,100.0%
1078 Table: Case Processing Summary
1080 ,Included,,Excluded,,Total,
1081 ,N,Percent,N,Percent,N,Percent
1082 b,2,100.0%,0,.0%,2,100.0%