Automatically infer variables' measurement level from format and data.
[pspp] / tests / language / data-io / get-data-spreadsheet.at
1 dnl PSPP - a program for statistical analysis.
2 dnl Copyright (C) 2017 Free Software Foundation, Inc.
3 dnl
4 dnl This program is free software: you can redistribute it and/or modify
5 dnl it under the terms of the GNU General Public License as published by
6 dnl the Free Software Foundation, either version 3 of the License, or
7 dnl (at your option) any later version.
8 dnl
9 dnl This program is distributed in the hope that it will be useful,
10 dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
11 dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 dnl GNU General Public License for more details.
13 dnl
14 dnl You should have received a copy of the GNU General Public License
15 dnl along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 dnl
17 m4_define([SPREADSHEET_TEST_PREP],[dnl
18  AT_KEYWORDS([spreadsheet])
19  m4_if($1,[GNM],[dnl
20     AT_CHECK([gzip -c $top_srcdir/tests/language/data-io/Book1.gnm.unzipped > Book1.gnumeric])dnl
21     m4_define([testsheet],[Book1.gnumeric])dnl
22     ]) dnl
23  m4_if($1,[ODS],[dnl
24     AT_CHECK([cp $top_srcdir/tests/language/data-io/test.ods test.ods])dnl
25     m4_define([testsheet],[test.ods])dnl
26     ])dnl
27 ])
28
29 m4_define([CHECK_SPREADSHEET_READER],
30  [dnl
31 AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE])
32 SPREADSHEET_TEST_PREP($1)
33 AT_DATA([get-data.sps], [dnl
34 GET DATA /TYPE=$1 /FILE='testsheet'  /READNAMES=off /SHEET=name 'This' /CELLRANGE=range 'g9:i13' .
35 DISPLAY VARIABLES.
36 LIST.
37 ])
38 AT_CHECK([pspp -o pspp.csv get-data.sps])
39 AT_CHECK([cat pspp.csv], [0], [dnl
40 Table: Variables
41 Name,Position,Print Format,Write Format
42 VAR001,1,F8.2,F8.2
43 VAR002,2,A8,A8
44 VAR003,3,F8.2,F8.2
45
46 Table: Data List
47 VAR001,VAR002,VAR003
48 .00,fred,20.00
49 1.00,11,21.00
50 2.00,twelve,22.00
51 3.00,13,23.00
52 4.00,14,24.00
53 ])
54 AT_CLEANUP
55
56 AT_SETUP([GET DATA /TYPE=$1 with CELLRANGE and READNAMES])
57 SPREADSHEET_TEST_PREP($1)
58 AT_DATA([get-data.sps], [dnl
59 GET DATA /TYPE=$1 /FILE='testsheet'  /READNAMES=on /SHEET=name 'This' /CELLRANGE=range 'g8:i13' .
60 DISPLAY VARIABLES.
61 LIST.
62 ])
63 AT_CHECK([pspp -o pspp.csv get-data.sps])
64 AT_CHECK([cat pspp.csv], [0], [dnl
65 Table: Variables
66 Name,Position,Print Format,Write Format
67 V1,1,F8.2,F8.2
68 V2,2,A8,A8
69 VAR001,3,F8.2,F8.2
70
71 Table: Data List
72 V1,V2,VAR001
73 .00,fred,20.00
74 1.00,11,21.00
75 2.00,twelve,22.00
76 3.00,13,23.00
77 4.00,14,24.00
78 ])
79 AT_CLEANUP
80
81 AT_SETUP([GET DATA /TYPE=$1 without CELLRANGE])
82 SPREADSHEET_TEST_PREP($1)
83 AT_DATA([get-data.sps], [dnl
84 GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=index 3.
85 DISPLAY VARIABLES.
86 LIST.
87 ])
88 AT_CHECK([pspp -O format=csv get-data.sps], [0], [dnl
89 Table: Variables
90 Name,Position,Print Format,Write Format
91 name,1,A8,A8
92 id,2,F8.2,F8.2
93 height,3,F8.2,F8.2
94
95 warning: Cannot convert the value in the spreadsheet cell C4 to format (F8.2): Field contents are not numeric.
96
97 Table: Data List
98 name,id,height
99 fred,.00,23.40
100 bert,1.00,.56
101 charlie,2.00,.  @&t@
102 dick,3.00,-34.09
103 ])
104 AT_CLEANUP
105
106 AT_SETUP([GET DATA /TYPE=$1 with missing data])
107 SPREADSHEET_TEST_PREP($1)
108 AT_DATA([get-data.sps], [dnl
109 * This sheet has no data in one of its variables
110 GET DATA /TYPE=$1 /FILE='testsheet' /READNAMES=on /SHEET=index 5.
111 DISPLAY VARIABLES.
112 LIST.
113 ])
114 AT_CHECK([pspp -o pspp.csv get-data.sps])
115 AT_CHECK([cat pspp.csv], [0], [dnl
116 Table: Variables
117 Name,Position,Print Format,Write Format
118 vone,1,F8.2,F8.2
119 vtwo,2,F8.2,F8.2
120 vthree,3,A8,A8
121 v4,4,F8.2,F8.2
122
123 Table: Data List
124 vone,vtwo,vthree,v4
125 1.00,3.00,,5.00
126 2.00,4.00,,6.00
127 ])
128 AT_CLEANUP
129
130 dnl This syntax doesn't do anything particularly useful.
131 dnl It has been seen to cause a few crashes, so we check here that it
132 dnl doesn't do anthing bad.
133 AT_SETUP([GET DATA /TYPE=$1 with no options])
134 SPREADSHEET_TEST_PREP($1)
135 AT_DATA([get-data.sps], [dnl
136 * This sheet is empty
137 GET DATA /TYPE=$1 /FILE='testsheet'.
138 DISPLAY DICTIONARY.
139 LIST.
140 ])
141 AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [ignore])
142 AT_CLEANUP
143
144
145
146 AT_SETUP([GET DATA /TYPE=$1 with empty sheet])
147 SPREADSHEET_TEST_PREP($1)
148 AT_DATA([get-data.sps], [dnl
149 * This sheet is empty
150 GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'Empty'.
151 ])
152 AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
153 warning: Selected sheet or range of spreadsheet `testsheet' is empty.
154 ])
155 AT_CLEANUP
156
157 AT_SETUP([GET DATA /TYPE=$1 with nonexistent sheet])
158 SPREADSHEET_TEST_PREP($1)
159 AT_DATA([get-data.sps], [dnl
160 * This sheet doesnt exist.
161 GET DATA /TYPE=$1 /FILE='testsheet' /SHEET=name 'foobarxx'.
162 ])
163 AT_CHECK([pspp -o pspp.csv get-data.sps], [0], [dnl
164 warning: Selected sheet or range of spreadsheet `testsheet' is empty.
165 ])
166 AT_CLEANUP
167 ])
168
169
170 AT_BANNER([GET DATA Spreadsheet /TYPE=GNM])
171
172 CHECK_SPREADSHEET_READER([GNM])
173
174 dnl Check for a bug where gnumeric files were interpreted incorrectly
175 AT_SETUP([GET DATA /TYPE=GNM sheet index bug])
176 AT_KEYWORDS([spreadsheet])
177 AT_DATA([minimal3.gnumeric],[dnl
178 <?xml version="1.0" encoding="UTF-8"?>
179 <gnm:Workbook xmlns:gnm="http://www.gnumeric.org/v10.dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gnumeric.org/v9.xsd">
180   <gnm:Version Epoch="1" Major="10" Minor="8" Full="1.10.8"/>
181   <gnm:SheetNameIndex>
182     <gnm:SheetName gnm:Cols="256" gnm:Rows="65536">Sheet1</gnm:SheetName>
183     <gnm:SheetName gnm:Cols="256" gnm:Rows="65536">Sheet2</gnm:SheetName>
184     <gnm:SheetName gnm:Cols="256" gnm:Rows="65536">Sheet3</gnm:SheetName>
185   </gnm:SheetNameIndex>
186   <gnm:Sheets>
187     <gnm:Sheet DisplayFormulas="0" HideZero="0" HideGrid="0" HideColHeader="0" HideRowHeader="0" DisplayOutlines="1" OutlineSymbolsBelow="1" OutlineSymbolsRight="1" Visibility="GNM_SHEET_VISIBILITY_VISIBLE" GridColor="0:0:0">
188       <gnm:Name>Sheet1</gnm:Name>
189       <gnm:MaxCol>2</gnm:MaxCol>
190       <gnm:MaxRow>3</gnm:MaxRow>
191       <gnm:Names>
192         <gnm:Name>
193           <gnm:name>Print_Area</gnm:name>
194           <gnm:value>#REF!</gnm:value>
195           <gnm:position>A1</gnm:position>
196         </gnm:Name>
197         <gnm:Name>
198           <gnm:name>Sheet_Title</gnm:name>
199           <gnm:value>&quot;Sheet1&quot;</gnm:value>
200           <gnm:position>A1</gnm:position>
201         </gnm:Name>
202       </gnm:Names>
203       <gnm:Cols DefaultSizePts="48">
204         <gnm:ColInfo No="0" Unit="94.5" HardSize="1"/>
205         <gnm:ColInfo No="1" Unit="48" Count="2"/>
206       </gnm:Cols>
207       <gnm:Rows DefaultSizePts="12.75">
208         <gnm:RowInfo No="0" Unit="13.5" Count="4"/>
209       </gnm:Rows>
210       <gnm:Cells>
211         <gnm:Cell Row="0" Col="0" ValueType="60">Name</gnm:Cell>
212         <gnm:Cell Row="0" Col="1" ValueType="60">x</gnm:Cell>
213         <gnm:Cell Row="0" Col="2" ValueType="60">y</gnm:Cell>
214         <gnm:Cell Row="1" Col="0" ValueType="60">Sheet One</gnm:Cell>
215         <gnm:Cell Row="1" Col="1" ValueType="40">1</gnm:Cell>
216         <gnm:Cell Row="1" Col="2" ValueType="40">2</gnm:Cell>
217         <gnm:Cell Row="2" Col="0" ValueType="60">foo</gnm:Cell>
218         <gnm:Cell Row="2" Col="1" ValueType="40">3</gnm:Cell>
219         <gnm:Cell Row="2" Col="2" ValueType="40">4</gnm:Cell>
220         <gnm:Cell Row="3" Col="0" ValueType="60">bar</gnm:Cell>
221         <gnm:Cell Row="3" Col="1" ValueType="40">5</gnm:Cell>
222         <gnm:Cell Row="3" Col="2" ValueType="40">6</gnm:Cell>
223       </gnm:Cells>
224     </gnm:Sheet>
225     <gnm:Sheet DisplayFormulas="0" HideZero="0" HideGrid="0" HideColHeader="0" HideRowHeader="0" DisplayOutlines="1" OutlineSymbolsBelow="1" OutlineSymbolsRight="1" Visibility="GNM_SHEET_VISIBILITY_VISIBLE" GridColor="0:0:0">
226       <gnm:Name>Sheet2</gnm:Name>
227       <gnm:MaxCol>2</gnm:MaxCol>
228       <gnm:MaxRow>2</gnm:MaxRow>
229       <gnm:Names>
230         <gnm:Name>
231           <gnm:name>Print_Area</gnm:name>
232           <gnm:value>#REF!</gnm:value>
233           <gnm:position>A1</gnm:position>
234         </gnm:Name>
235         <gnm:Name>
236           <gnm:name>Sheet_Title</gnm:name>
237           <gnm:value>&quot;Sheet2&quot;</gnm:value>
238           <gnm:position>A1</gnm:position>
239         </gnm:Name>
240       </gnm:Names>
241       <gnm:Cols DefaultSizePts="48">
242         <gnm:ColInfo No="0" Unit="48"/>
243         <gnm:ColInfo No="1" Unit="57.75"/>
244         <gnm:ColInfo No="2" Unit="54.75"/>
245       </gnm:Cols>
246       <gnm:Rows DefaultSizePts="12.75">
247         <gnm:RowInfo No="0" Unit="13.5" Count="3"/>
248       </gnm:Rows>
249       <gnm:Cells>
250         <gnm:Cell Row="0" Col="0" ValueType="60">Comment</gnm:Cell>
251         <gnm:Cell Row="0" Col="1" ValueType="60">DOB</gnm:Cell>
252         <gnm:Cell Row="0" Col="2" ValueType="60">wealth</gnm:Cell>
253         <gnm:Cell Row="1" Col="0" ValueType="60">Sheet Two</gnm:Cell>
254         <gnm:Cell Row="1" Col="1" ValueType="60">24/5/1966</gnm:Cell>
255         <gnm:Cell Row="1" Col="2" ValueType="40" ValueFormat="_($* 0.00_);_($* (0.00);_($* &quot;-&quot;??_);_(@_)">0.02</gnm:Cell>
256         <gnm:Cell Row="2" Col="0" ValueType="60">wee</gnm:Cell>
257         <gnm:Cell Row="2" Col="1" ValueType="40" ValueFormat="dd/mm/yyyy">37145</gnm:Cell>
258         <gnm:Cell Row="2" Col="2" ValueType="40" ValueFormat="_($* 0.00_);_($* (0.00);_($* &quot;-&quot;??_);_(@_)">3000</gnm:Cell>
259       </gnm:Cells>
260     </gnm:Sheet>
261     <gnm:Sheet DisplayFormulas="0" HideZero="0" HideGrid="0" HideColHeader="0" HideRowHeader="0" DisplayOutlines="1" OutlineSymbolsBelow="1" OutlineSymbolsRight="1" Visibility="GNM_SHEET_VISIBILITY_VISIBLE" GridColor="0:0:0">
262       <gnm:Name>Sheet3</gnm:Name>
263       <gnm:MaxCol>2</gnm:MaxCol>
264       <gnm:MaxRow>2</gnm:MaxRow>
265       <gnm:Names>
266         <gnm:Name>
267           <gnm:name>Print_Area</gnm:name>
268           <gnm:value>#REF!</gnm:value>
269           <gnm:position>A1</gnm:position>
270         </gnm:Name>
271         <gnm:Name>
272           <gnm:name>Sheet_Title</gnm:name>
273           <gnm:value>&quot;Sheet3&quot;</gnm:value>
274           <gnm:position>A1</gnm:position>
275         </gnm:Name>
276       </gnm:Names>
277       <gnm:Cols DefaultSizePts="48">
278         <gnm:ColInfo No="0" Unit="48" Count="3"/>
279       </gnm:Cols>
280       <gnm:Rows DefaultSizePts="12.75">
281         <gnm:RowInfo No="0" Unit="13.5"/>
282         <gnm:RowInfo No="1" Unit="12.75" Count="2"/>
283       </gnm:Rows>
284       <gnm:Cells>
285         <gnm:Cell Row="0" Col="0" ValueType="40">3</gnm:Cell>
286         <gnm:Cell Row="0" Col="1" ValueType="40">4</gnm:Cell>
287         <gnm:Cell Row="0" Col="2" ValueType="40">5</gnm:Cell>
288         <gnm:Cell Row="1" Col="0" ValueType="40">6</gnm:Cell>
289         <gnm:Cell Row="1" Col="1" ValueType="40">7</gnm:Cell>
290         <gnm:Cell Row="1" Col="2" ValueType="40">8</gnm:Cell>
291         <gnm:Cell Row="2" Col="0" ValueType="40">9</gnm:Cell>
292         <gnm:Cell Row="2" Col="1" ValueType="40">10</gnm:Cell>
293         <gnm:Cell Row="2" Col="2" ValueType="40">11</gnm:Cell>
294       </gnm:Cells>
295     </gnm:Sheet>
296   </gnm:Sheets>
297 </gnm:Workbook>
298 ])
299
300 AT_DATA([gnum.sps], [dnl
301 GET DATA
302         /TYPE=GNM
303         /FILE='minimal3.gnumeric'
304         /SHEET=index 3
305         /READNAMES=off
306         .
307
308 LIST.
309 ])
310
311 AT_CHECK([pspp -O format=csv gnum.sps], [0], [dnl
312 Table: Data List
313 VAR001,VAR002,VAR003
314 3,4.00,5.00
315 6,7.00,8.00
316 9,10.00,11.00
317 ])
318
319
320 AT_CLEANUP
321
322
323 dnl Check for a bug where certain gnumeric files failed an assertion
324 AT_SETUP([GET DATA /TYPE=GNM assert-fail])
325 AT_KEYWORDS([spreadsheet])
326 AT_DATA([read.sps],[dnl
327 GET DATA
328         /TYPE=GNM
329         /FILE='crash.gnumeric'
330         .
331 list.
332 ])
333
334
335 AT_DATA([crash.gnumeric],[dnl
336 <?xml version="1.0" encoding="UTF-8"?>
337 <gnm:Workbook xmlns:gnm="http://www.gnumeric.org/v10.dtd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.gnumeric.org/v9.xsd">
338   <office:document-meta xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:ooo="http://openoffice.org/2004/office" office:version="1.1">
339   </office:document-meta>
340   <gnm:SheetNameIndex>
341     <gnm:SheetName gnm:Cols="256" gnm:Rows="65536">Sheet1</gnm:SheetName>
342   </gnm:SheetNameIndex>
343   <gnm:Sheets>
344     <gnm:Sheet DisplayFormulas="0" HideZero="0" HideGrid="0" HideColHeader="0" HideRowHeader="0" DisplayOutlines="1" OutlineSymbolsBelow="1" OutlineSymbolsRight="1" Visibility="GNM_SHEET_VISIBILITY_VISIBLE" GridColor="0:0:0">
345       <gnm:Name>Sheet1</gnm:Name>
346       <gnm:MaxCol>2</gnm:MaxCol>
347       <gnm:MaxRow>4</gnm:MaxRow>
348       <gnm:Styles>
349         <gnm:StyleRegion startCol="0" startRow="0" endCol="255" endRow="65535">
350           <gnm:Style HAlign="1" VAlign="2" WrapText="0" ShrinkToFit="0" Rotation="0" Shade="0" Indent="0" Locked="1" Hidden="0" Fore="0:0:0" Back="FFFF:FFFF:FFFF" PatternColor="0:0:0" Format="General">
351           </gnm:Style>
352         </gnm:StyleRegion>
353       </gnm:Styles>
354       <gnm:Cells>
355         <gnm:Cell Row="1" Col="1" ValueType="60">one</gnm:Cell>
356         <gnm:Cell Row="1" Col="2" ValueType="60">two</gnm:Cell>
357         <gnm:Cell Row="2" Col="1" ValueType="40">1</gnm:Cell>
358         <gnm:Cell Row="2" Col="2" ValueType="40">2</gnm:Cell>
359         <gnm:Cell Row="3" Col="1" ValueType="40">1</gnm:Cell>
360         <gnm:Cell Row="3" Col="2" ValueType="40">2</gnm:Cell>
361         <gnm:Cell Row="4" Col="1" ValueType="40">1</gnm:Cell>
362         <gnm:Cell Row="4" Col="2" ValueType="40">2</gnm:Cell>
363       </gnm:Cells>
364     </gnm:Sheet>
365   </gnm:Sheets>
366 </gnm:Workbook>
367 ])
368
369 AT_CHECK([pspp -O format=csv read.sps], [0], [ignore])
370
371
372 AT_CLEANUP
373
374
375
376 AT_BANNER([GET DATA Spreadsheet /TYPE=ODS])
377
378 CHECK_SPREADSHEET_READER([ODS])
379
380
381 AT_SETUP([GET DATA /TYPE=ODS crash])
382 AT_KEYWORDS([spreadsheet])
383
384
385 AT_CHECK([cp $top_srcdir/tests/language/data-io/newone.ods this.ods])dnl
386
387 AT_DATA([crash.sps],[dnl
388 GET DATA /TYPE=ODS /FILE='this.ods' /CELLRANGE=RANGE 'A1:C8'  /READNAMES=ON
389 LIST.
390 ])
391
392 AT_CHECK([pspp -O format=csv crash.sps], [0], [ignore])
393
394 AT_CLEANUP
395
396
397 AT_SETUP([GET DATA /TYPE=ODS readnames])
398 AT_KEYWORDS([spreadsheet])
399
400 dnl Check for a bug where in the ODS reader /READNAMES incorrectly
401 dnl dealt with repeated names.
402 AT_CHECK([cp $top_srcdir/tests/language/data-io/readnames.ods this.ods])dnl
403
404 AT_DATA([readnames.sps],[dnl
405 GET DATA /TYPE=ODS /FILE='this.ods' /CELLRANGE=RANGE 'A1:H8' /READNAMES=ON
406 EXECUTE.
407 DISPLAY DICTIONARY.
408 LIST.
409 ])
410
411
412 AT_CHECK([pspp -O format=csv readnames.sps], [0], [dnl
413 Table: Variables
414 Name,Position,Measurement Level,Role,Width,Alignment,Print Format,Write Format
415 freda,1,Nominal,Input,8,Right,F8.2,F8.2
416 fred,2,Nominal,Input,8,Right,F8.2,F8.2
417 fred_A,3,Nominal,Input,8,Right,F8.2,F8.2
418 fred_B,4,Nominal,Input,8,Right,F8.2,F8.2
419 fred_C,5,Nominal,Input,8,Right,F8.2,F8.2
420 fred_D,6,Nominal,Input,8,Right,F8.2,F8.2
421 fred_E,7,Nominal,Input,8,Right,F8.2,F8.2
422
423 Table: Data List
424 freda,fred,fred_A,fred_B,fred_C,fred_D,fred_E
425 1.00,2.00,3.00,4.00,5.00,6.00,7.00
426 8.00,9.00,10.00,11.00,12.00,13.00,14.00
427 ])
428
429 AT_CLEANUP
430