1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017, 2019 Free Software Foundation, Inc.
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 dnl GNU General Public License for more details.
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
17 AT_BANNER([MEANS procedure])
19 AT_SETUP([MEANS simple])
20 AT_KEYWORDS([categorical categoricals])
22 AT_DATA([means-simple.sps], [dnl
23 data list notable list /hand * score * w *.
34 means tables = score by hand
38 AT_CHECK([pspp -O format=csv means-simple.sps], [0], [dnl
39 Table: Case Processing Summary
41 ,Included,,Excluded,,Total,
42 ,N,Percent,N,Percent,N,Percent
43 score * hand,19,100.0%,0,.0%,19,100.0%
54 AT_SETUP([MEANS very simple])
55 AT_KEYWORDS([categorical categoricals])
57 AT_DATA([very-simple.sps], [dnl
58 data list notable list /score *.
85 AT_CHECK([pspp -O format=csv very-simple.sps], [0], [dnl
86 Table: Case Processing Summary
88 ,Included,,Excluded,,Total,
89 ,N,Percent,N,Percent,N,Percent
90 score,19,100.0%,0,.0%,19,100.0%
100 AT_SETUP([MEANS empty factor spec])
101 AT_KEYWORDS([categorical categoricals])
103 AT_DATA([means-bad.sps], [dnl
104 data list list /outcome *.
111 MEANS TABLES = outcome
115 AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
121 AT_SETUP([MEANS parser bug])
122 AT_KEYWORDS([categorical categoricals])
124 dnl This bug caused an infinite loop
125 AT_DATA([means-bad.sps], [dnl
126 DATA LIST notable LIST /a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 fylo *.
128 1 2 3 4 5 6 7 8 9 0 11
131 MEANS TABLES = a1 a2 a3 a4 a5 a6 a7 a8 a9 a10a BY fylo.
134 AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
139 dnl This example is based upon info from https://libguides.library.kent.edu/SPSS/CompareMeans
140 AT_SETUP([MEANS default missing behaviour])
141 AT_KEYWORDS([categorical categoricals])
143 AT_DATA([means-missing.sps], [dnl
144 data list notable list /w * score * a * b *.
163 MEANS tables=score by a
166 MEANS tables=score by a by b
170 AT_CHECK([pspp -O format=csv means-missing.sps], [0], [dnl
171 Table: Case Processing Summary
173 ,Included,,Excluded,,Total,
174 ,N,Percent,N,Percent,N,Percent
175 score,392,90.1%,43,9.9%,435,100.0%
181 Table: Case Processing Summary
183 ,Included,,Excluded,,Total,
184 ,N,Percent,N,Percent,N,Percent
185 score * a,392,90.1%,43,9.9%,435,100.0%
193 Table: Case Processing Summary
195 ,Included,,Excluded,,Total,
196 ,N,Percent,N,Percent,N,Percent
197 score * a * b,383,88.0%,52,12.0%,435,100.0%
215 dnl This example from https://www.spss-tutorials.com/spss-means-command/
216 AT_SETUP([MEANS two way])
217 AT_KEYWORDS([categorical categoricals])
219 AT_DATA([means-freelancer.sps], [dnl
220 data list notable list /income_2010 * gender sector_2010.
264 means income_2010 by gender by sector_2010
265 /cells count min mean stddev.
268 AT_CHECK([pspp -O format=csv means-freelancer.sps], [0], [dnl
269 Table: Case Processing Summary
271 ,Included,,Excluded,,Total,
272 ,N,Percent,N,Percent,N,Percent
273 income_2010 * gender * sector_2010,37,92.5%,3,7.5%,40,100.0%
276 gender,sector_2010,N,Minimum,Mean,Std. Deviation
277 .00,1.00,3,26586.48,35652.47,8078.46
278 ,2.00,4,14912.82,28319.78,11482.43
279 ,3.00,2,64857.02,67921.56,4333.91
280 ,4.00,7,45907.58,66849.04,11787.11
281 ,5.00,2,6072.40,32495.63,37368.09
282 ,Total,18,6072.40,49389.68,22371.48
283 1.00,1.00,2,23544.95,28367.79,6820.53
284 ,2.00,3,36205.85,46189.08,11949.93
285 ,3.00,4,29076.24,50083.97,16084.44
286 ,4.00,6,12706.65,45812.78,24995.16
287 ,5.00,4,16338.36,36235.92,14311.04
288 ,Total,19,12706.65,42918.90,17851.64
289 Total,1.00,5,23544.95,32738.60,7757.62
290 ,2.00,7,14912.82,35978.05,14309.27
291 ,3.00,6,29076.24,56029.83,15615.06
292 ,4.00,13,12706.65,57139.99,21187.85
293 ,5.00,6,6072.40,34989.15,20146.69
294 ,Total,37,6072.40,46066.84,20160.12
300 dnl Check that rows are suppressed and that things generally work ok
301 dnl when there are a 2 way instance contains an unbalanced set of
302 dnl categorical values.
303 AT_SETUP([MEANS unbalanced])
304 AT_KEYWORDS([categorical categoricals])
306 AT_DATA([means-unbalanced.sps], [dnl
307 data list notable list /b c x *.
316 * The data above lack a 5 1 case.
324 AT_CHECK([pspp -O format=csv means-unbalanced.sps], [0], [dnl
325 Table: Case Processing Summary
327 ,Included,,Excluded,,Total,
328 ,N,Percent,N,Percent,N,Percent
329 x * b * c,5,100.0%,0,.0%,5,100.0%
348 dnl This example kindly provided by Dana Williams
349 AT_SETUP([MEANS three way])
350 AT_KEYWORDS([categorical categoricals])
352 AT_DATA([means-threeway.sps], [dnl
353 data list notable list /score a b c.
366 means score by a by b by c.
369 AT_CHECK([pspp -O format=csv means-threeway.sps], [0], [dnl
370 Table: Case Processing Summary
372 ,Included,,Excluded,,Total,
373 ,N,Percent,N,Percent,N,Percent
374 score * a * b * c,9,100.0%,0,.0%,9,100.0%
377 a,b,c,Mean,N,Std. Deviation
378 .00,.00,.00,3.00,1,NaN
381 ,,Total,16.00,3,21.66
385 ,Total,.00,4.00,2,1.41
388 ,,Total,11.80,5,16.36
389 1.00,.00,.00,7.00,1,NaN
395 ,Total,.00,8.00,2,1.41
398 Total,.00,.00,5.00,2,2.83
401 ,,Total,12.60,5,16.01
402 ,1.00,.00,7.00,2,2.83
405 ,Total,.00,6.00,4,2.58
408 ,,Total,10.33,9,11.73
413 dnl The above example again, but with string variables for
414 dnl the control vars.
415 AT_SETUP([MEANS three way string])
416 AT_KEYWORDS([categorical categoricals])
418 AT_DATA([means-threeway-string.sps], [dnl
419 data list notable list /score (f22.2) a (a24) b (a16) c (a8).
421 3 fooberrycrumblexzaQ fosilationwereqd zero
422 4 fooberrycrumblexzaQ fosilationwereqd one
423 41 fooberrycrumblexzaQ fosilationwereqd two
424 5 fooberrycrumblexzaQ onlyonekonboys zero
425 6 fooberrycrumblexzaQ onlyonekonboys one
426 7 wontledingbatsXASDF fosilationwereqd zero
427 8 wontledingbatsXASDF fosilationwereqd one
428 9 wontledingbatsXASDF onlyonekonboys zero
429 10 wontledingbatsXASDF onlyonekonboys one
432 means score by a by b by c.
435 AT_CHECK([pspp -O format=csv means-threeway-string.sps], [0], [dnl
436 Table: Case Processing Summary
438 ,Included,,Excluded,,Total,
439 ,N,Percent,N,Percent,N,Percent
440 score * a * b * c,9,100.0%,0,.0%,9,100.0%
443 a,b,c,Mean,N,Std. Deviation
444 fooberrycrumblexzaQ,fosilationwereqd,one,4.00,1,NaN
447 ,,Total,16.00,3,21.66
448 ,onlyonekonboys,one,6.00,1,NaN
451 ,Total,one,5.00,2,1.41
454 ,,Total,11.80,5,16.36
455 wontledingbatsXASDF,fosilationwereqd,one,8.00,1,NaN
458 ,onlyonekonboys,one,10.00,1,NaN
461 ,Total,one,9.00,2,1.41
464 Total,fosilationwereqd,one,6.00,2,2.83
467 ,,Total,12.60,5,16.01
468 ,onlyonekonboys,one,8.00,2,2.83
471 ,Total,one,7.00,4,2.58
474 ,,Total,10.33,9,11.73
481 dnl An example with multiple tables
482 AT_SETUP([MEANS multiple tables])
483 AT_KEYWORDS([categorical categoricals])
485 AT_DATA([means-multi-table.sps], [dnl
486 data list notable list /a * b * c * x * y *.
515 means table = x by b by c
521 AT_CHECK([pspp -O format=csv means-multi-table.sps], [0], [dnl
522 Table: Case Processing Summary
524 ,Included,,Excluded,,Total,
525 ,N,Percent,N,Percent,N,Percent
526 x * b * c,24,100.0%,0,.0%,24,100.0%
543 Table: Case Processing Summary
545 ,Included,,Excluded,,Total,
546 ,N,Percent,N,Percent,N,Percent
547 x * b,24,100.0%,0,.0%,24,100.0%
556 Table: Case Processing Summary
558 ,Included,,Excluded,,Total,
559 ,N,Percent,N,Percent,N,Percent
560 y * a * b,24,100.0%,0,.0%,24,100.0%
590 dnl An example with more than one dependent variable.
591 dnl This case uses a somewhat different table layout.
592 AT_SETUP([MEANS multi variable])
593 AT_KEYWORDS([categorical categoricals])
595 AT_DATA([means-multi-variable.sps], [dnl
596 data list notable list /b c x y.
625 table = x y by b by c
629 AT_CHECK([pspp -O format=csv means-multi-variable.sps], [0], [dnl
630 Table: Case Processing Summary
632 ,Included,,Excluded,,Total,
633 ,N,Percent,N,Percent,N,Percent
634 x * b * c,24,100.0%,0,.0%,24,100.0%
635 y * b * c,23,95.8%,1,4.2%,24,100.0%
639 3.00,.00,Mean,654.00,11.00
641 ,,Std. Deviation,.00,.00
642 ,1.00,Mean,789.00,55.00
644 ,,Std. Deviation,.00,.00
645 ,Total,Mean,721.50,33.00
647 ,,Std. Deviation,72.16,23.52
648 4.00,.00,Mean,987.00,99.00
650 ,,Std. Deviation,.00,.00
651 ,1.00,Mean,456.00,44.00
653 ,,Std. Deviation,.00,.00
654 ,Total,Mean,721.50,71.50
656 ,,Std. Deviation,283.83,29.40
657 5.00,.00,Mean,246.00,99.00
659 ,,Std. Deviation,.00,.00
660 ,1.00,Mean,123.00,55.00
662 ,,Std. Deviation,.00,.00
663 ,Total,Mean,184.50,73.86
665 ,,Std. Deviation,65.75,23.52
666 Total,.00,Mean,629.00,67.00
668 ,,Std. Deviation,316.50,44.40
669 ,1.00,Mean,456.00,51.33
671 ,,Std. Deviation,283.98,5.42
672 ,Total,Mean,542.50,58.83
674 ,,Std. Deviation,307.06,31.22
681 dnl This example is based upon one kindly provided by Dana Williams
682 dnl It exercises the most complex case where there are multiple
683 dnl dependent variables AND multiple control variables in each layer.
684 AT_SETUP([MEANS multi combination])
685 AT_KEYWORDS([categorical categoricals])
687 AT_DATA([means-multi-combination.sps], [dnl
688 data list notable list /one (F22.5) two (F22.5) three four five six.
756 recode six (2 = 62) (1 = 61).
757 recode five (2 = 52) (1 = 51).
758 recode four (2 = 42) (1 = 41).
759 recode three (2 = 32) (1 = 31).
761 means tables = one two BY three four BY five six.
764 AT_CHECK([pspp -O format=csv means-multi-combination.sps], [0], [dnl
765 Table: Case Processing Summary
767 ,Included,,Excluded,,Total,
768 ,N,Percent,N,Percent,N,Percent
769 one * three * five,64,100.0%,0,.0%,64,100.0%
770 two * three * five,64,100.0%,0,.0%,64,100.0%
771 one * three * six,64,100.0%,0,.0%,64,100.0%
772 two * three * six,64,100.0%,0,.0%,64,100.0%
773 one * four * five,64,100.0%,0,.0%,64,100.0%
774 two * four * five,64,100.0%,0,.0%,64,100.0%
775 one * four * six,64,100.0%,0,.0%,64,100.0%
776 two * four * six,64,100.0%,0,.0%,64,100.0%
778 Table: one * two * three * five
780 31.00,51.00,Mean,1.50000,1.50000
782 ,,Std. Deviation,.51640,.51640
783 ,52.00,Mean,1.50000,1.50000
785 ,,Std. Deviation,.51640,.51640
786 ,Total,Mean,1.50000,1.50000
788 ,,Std. Deviation,.50800,.50800
789 32.00,51.00,Mean,1.50000,1.50000
791 ,,Std. Deviation,.51640,.51640
792 ,52.00,Mean,1.50000,1.50000
794 ,,Std. Deviation,.51640,.51640
795 ,Total,Mean,1.50000,1.50000
797 ,,Std. Deviation,.50800,.50800
798 Total,51.00,Mean,1.50000,1.50000
800 ,,Std. Deviation,.50800,.50800
801 ,52.00,Mean,1.50000,1.50000
803 ,,Std. Deviation,.50800,.50800
804 ,Total,Mean,1.50000,1.50000
806 ,,Std. Deviation,.50395,.50395
808 Table: one * two * three * six
810 31.00,61.00,Mean,1.50000,1.50000
812 ,,Std. Deviation,.51640,.51640
813 ,62.00,Mean,1.50000,1.50000
815 ,,Std. Deviation,.51640,.51640
816 ,Total,Mean,1.50000,1.50000
818 ,,Std. Deviation,.50800,.50800
819 32.00,61.00,Mean,1.50000,1.50000
821 ,,Std. Deviation,.51640,.51640
822 ,62.00,Mean,1.50000,1.50000
824 ,,Std. Deviation,.51640,.51640
825 ,Total,Mean,1.50000,1.50000
827 ,,Std. Deviation,.50800,.50800
828 Total,61.00,Mean,1.50000,1.50000
830 ,,Std. Deviation,.50800,.50800
831 ,62.00,Mean,1.50000,1.50000
833 ,,Std. Deviation,.50800,.50800
834 ,Total,Mean,1.50000,1.50000
836 ,,Std. Deviation,.50395,.50395
838 Table: one * two * four * five
840 41.00,51.00,Mean,1.50000,1.50000
842 ,,Std. Deviation,.51640,.51640
843 ,52.00,Mean,1.50000,1.50000
845 ,,Std. Deviation,.51640,.51640
846 ,Total,Mean,1.50000,1.50000
848 ,,Std. Deviation,.50800,.50800
849 42.00,51.00,Mean,1.50000,1.50000
851 ,,Std. Deviation,.51640,.51640
852 ,52.00,Mean,1.50000,1.50000
854 ,,Std. Deviation,.51640,.51640
855 ,Total,Mean,1.50000,1.50000
857 ,,Std. Deviation,.50800,.50800
858 Total,51.00,Mean,1.50000,1.50000
860 ,,Std. Deviation,.50800,.50800
861 ,52.00,Mean,1.50000,1.50000
863 ,,Std. Deviation,.50800,.50800
864 ,Total,Mean,1.50000,1.50000
866 ,,Std. Deviation,.50395,.50395
868 Table: one * two * four * six
870 41.00,61.00,Mean,1.50000,1.50000
872 ,,Std. Deviation,.51640,.51640
873 ,62.00,Mean,1.50000,1.50000
875 ,,Std. Deviation,.51640,.51640
876 ,Total,Mean,1.50000,1.50000
878 ,,Std. Deviation,.50800,.50800
879 42.00,61.00,Mean,1.50000,1.50000
881 ,,Std. Deviation,.51640,.51640
882 ,62.00,Mean,1.50000,1.50000
884 ,,Std. Deviation,.51640,.51640
885 ,Total,Mean,1.50000,1.50000
887 ,,Std. Deviation,.50800,.50800
888 Total,61.00,Mean,1.50000,1.50000
890 ,,Std. Deviation,.50800,.50800
891 ,62.00,Mean,1.50000,1.50000
893 ,,Std. Deviation,.50800,.50800
894 ,Total,Mean,1.50000,1.50000
896 ,,Std. Deviation,.50395,.50395
902 dnl This example was observed to cause a crash in the
903 dnl destructor. Found by zzuf.
904 AT_SETUP([MEANS clean up])
905 AT_KEYWORDS([categorical categoricals])
907 AT_DATA([means-bad.sps], [dnl
908 data list notable list /one two three four five six.
916 means tables = one two BY thsee four BY five six.
919 AT_CHECK([pspp -O format=csv means-bad.sps], [1], [ignore])
924 dnl Another example which caused a crash.
926 AT_SETUP([MEANS control all missing])
927 AT_KEYWORDS([categorical categoricals])
929 AT_DATA([means-bad.sps], [dnl
930 data list notable list /a * b * y * uu *.
937 means table = b by a by y by uu
941 AT_CHECK([pspp -O format=csv means-bad.sps], [0], [dnl
942 Table: Case Processing Summary
944 ,Included,,Excluded,,Total,
945 ,N,Percent,N,Percent,N,Percent
946 b * a * y * uu,0,.0%,3,100.0%,3,100.0%
948 "warning: The table ""a * y * uu"" has no non-empty control variables. No result for this table will be displayed."
954 dnl Do some tests on the MISSING keyword.
955 AT_SETUP([MEANS missing classes])
956 AT_KEYWORDS([categorical categoricals])
958 AT_DATA([means-missing-classes.sps], [dnl
959 data list notable list /hand * score *.
983 missing values score (99).
984 missing values hand (9).
986 means tables=score by hand
991 means tables=score by hand
996 means tables=score by hand
1002 AT_CHECK([pspp -O format=csv means-missing-classes.sps], [0], [dnl
1003 Table: Case Processing Summary
1005 ,Included,,Excluded,,Total,
1006 ,N,Percent,N,Percent,N,Percent
1007 score * hand,18,90.0%,2,10.0%,20,100.0%
1016 Table: Case Processing Summary
1018 ,Included,,Excluded,,Total,
1019 ,N,Percent,N,Percent,N,Percent
1020 score * hand,19,95.0%,1,5.0%,20,100.0%
1029 Table: Case Processing Summary
1031 ,Included,,Excluded,,Total,
1032 ,N,Percent,N,Percent,N,Percent
1033 score * hand,17,85.0%,3,15.0%,20,100.0%
1045 dnl Make sure that behaviour with SPLIT is correct.
1046 AT_SETUP([MEANS split])
1047 AT_KEYWORDS([categorical categoricals])
1049 AT_DATA([means-split.sps], [dnl
1050 data list notable list /b g *.
1062 means b /cells = count mean.
1065 AT_CHECK([pspp -O format=csv means-split.sps], [0], [dnl
1066 Table: Case Processing Summary
1068 ,Included,,Excluded,,Total,
1069 ,N,Percent,N,Percent,N,Percent
1070 b,4,100.0%,0,.0%,4,100.0%
1076 Table: Case Processing Summary
1078 ,Included,,Excluded,,Total,
1079 ,N,Percent,N,Percent,N,Percent
1080 b,2,100.0%,0,.0%,2,100.0%
1090 dnl Test the output with unusual dependent variable formats
1091 AT_SETUP([MEANS formats])
1092 AT_KEYWORDS([categorical categoricals])
1094 AT_DATA([means-formats.sps], [dnl
1095 data list notable list /hours (TIME11.0) rate (DOLLAR8.2).
1102 /cells = mean count max range.
1105 AT_CHECK([pspp -O format=csv means-formats.sps], [0], [dnl
1106 Table: Case Processing Summary
1108 ,Included,,Excluded,,Total,
1109 ,N,Percent,N,Percent,N,Percent
1110 hours,2,100.0%,0,.0%,2,100.0%
1111 rate,2,100.0%,0,.0%,2,100.0%
1117 Maximum,14:01:00,$5.23
1118 Range,02:01:00,$1.14