From: Ben Pfaff Date: Mon, 25 Apr 2011 04:42:54 +0000 (-0700) Subject: sys-file-reader: Add tests for non-ASCII characters and encodings. X-Git-Tag: v0.7.8~33 X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?p=pspp-builds.git;a=commitdiff_plain;h=de8fc417b1268addaa8b65f7be78c80c61bc0b7d sys-file-reader: Add tests for non-ASCII characters and encodings. --- diff --git a/tests/data/sys-file-reader.at b/tests/data/sys-file-reader.at index 2564e7d1..ca45de37 100644 --- a/tests/data/sys-file-reader.at +++ b/tests/data/sys-file-reader.at @@ -8,10 +8,11 @@ dnl File header. 2; dnl Layout code 22; dnl Nominal case size 0; dnl Not compressed -0; dnl Not weighted +0; dnl Not weighted 1; dnl 1 case. 100.0; dnl Bias. -"01 Jan 11"; "20:53:52"; s64 "PSPP synthetic test file"; +"01 Jan 11"; "20:53:52"; +"PSPP synthetic test file: "; i8 244; i8 245; i8 246; i8 248; s34 ""; i8 0 *3; dnl Numeric variable, no label or missing values. @@ -19,7 +20,7 @@ dnl Numeric variable, no label or missing values. dnl Numeric variable, variable label. 2; 0; 1; 0; 0x050800 *2; s8 "NUM2"; -26; "Numeric variable 2's label"; i8 0 *2; +32; "Numeric variable 2's label ("; i8 249; i8 250; i8 251; ")"; dnl Numeric variable, one missing value. 2; 0; 0; 1; 0x050800 *2; s8 "NUM3"; @@ -42,7 +43,8 @@ dnl Numeric variable, range of missing values. dnl Numeric variables, range of missing values plus discrete value. 2; 0; 0; -3; 0x050800 *2; s8 "NUM8"; 1.0; 3.0; 5.0; 2; 0; 0; -3; 0x050800 *2; s8 "NUM9"; 1.0; HIGHEST; -5.0; -2; 0; 0; -3; 0x050800 *2; s8 "NUM10"; LOWEST; 1.0; 5.0; +2; 0; 0; -3; 0x050800 *2; "NUM"; i8 192; i8 200; i8 204; i8 209; i8 210; +LOWEST; 1.0; 5.0; dnl String variable, no label or missing values. 2; 4; 0; 0; 0x010400 *2; s8 "STR1"; @@ -94,22 +96,25 @@ s8 "abcd"; s8 "efgh"; s8 "ijkl"; s8 "mnop"; s8 "qrst"; s8 "uvwx"; s16 "yzABCDEFGHI"; s32 "JKLMNOPQRSTUVWXYZ01234567"; ]) for variant in \ - "be e07ee28eaf7bceca017e83e9fd46be3c" \ - "le c357aa20227c856b8a80d1b840722da1" + "be 94338da4d8d44244d43f31e2ea4d0a6a" \ + "le e3e7eefb984b81be5531b579293cb127" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] ]) AT_DATA([sys-file.sps], [dnl GET FILE='sys-file.sav'. +DISPLAY FILE LABEL. DISPLAY DICTIONARY. LIST. ]) AT_CHECK([pspp -o pspp.csv sys-file.sps]) AT_CHECK([grep -v Measure pspp.csv | grep -v Display], [0], [dnl +File label: PSPP synthetic test file: ôõöø + Variable,Description,,Position num1,Format: F8.0,,1 -num2,Numeric variable 2's label,,2 +num2,Numeric variable 2's label (ùúû),,2 ,Format: F8.0,, num3,Format: F8.0,,3 ,Missing Values: 1,, @@ -126,7 +131,7 @@ num8,Format: F8.0,,8 ,Missing Values: 1 THRU 3; 5,, num9,Format: F8.0,,9 ,Missing Values: 1 THRU HIGHEST; -5,, -num10,Format: F8.0,,10 +numÀÈÌÑÒ,Format: F8.0,,10 ,Missing Values: LOWEST THRU 1; 5,, str1,Format: A4,,11 str2,String variable 2's label,,12 @@ -146,7 +151,7 @@ str8,25-byte string,,18 ,Format: A25,, Table: Data List -num1,num2,num3,num4,num5,num6,num7,num8,num9,num10,str1,str2,str3,str4,str5,str6,str7,str8 +num1,num2,num3,num4,num5,num6,num7,num8,num9,numÀÈÌÑÒ,str1,str2,str3,str4,str5,str6,str7,str8 1,2,3,4,5,6,7,8,9,10,abcd,efgh,ijkl,mnop,qrst,uvwx,yzABCDEFGHI,JKLMNOPQRSTUVWXYZ01234567 ]) done @@ -291,7 +296,7 @@ dnl String variables. 2; 6; 0; 0; 0x010600 *2; s8 "STR6"; dnl index 11 2; 7; 0; 0; 0x010700 *2; s8 "STR7"; dnl index 12 2; 8; 0; 0; 0x010800 *2; s8 "STR8"; dnl index 13 -2; 9; 0; 0; 0x010900 *2; s8 "STR9"; dnl index 14 +2; 9; 0; 0; 0x010900 *2; "STR9"; i8 230; s3 ""; dnl index 14 2; -1; 0; 0; 0; 0; s8 ""; 2; 12; 0; 0; 0x010c00 *2; s8 "STR12"; dnl index 16 2; -1; 0; 0; 0; 0; s8 ""; @@ -301,7 +306,7 @@ dnl String variables. ( 2; -1; 0; 0; 0; 0; s8 ""; ) * 2; dnl One value label for NUM1. -3; 1; 1.0; i8 3; s7 "one"; 4; 1; 1; +3; 1; 1.0; i8 17; i8 238; i8 228; i8 232; i8 237; s19 " (in Russian)"; 4; 1; 1; dnl Two value labels for NUM2, as a single pair of type 3 and type 4 records. 3; 2; 1.0; i8 3; s7 "one"; 2.0; i8 3; s7 "two"; 4; 1; 2; @@ -345,9 +350,15 @@ dnl One value label for STR6, STR7, STR8. 3; 1; s8 "JKLMNOP"; i8 25; s31 "value label for `JKLMNOP'"; 4; 1; 12; 3; 1; s8 "JKLMNOPQ"; i8 26; s31 "value label for `JKLMNOPQ'"; 4; 1; 13; +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1251; + +dnl Character encoding record. +7; 20; 1; 12; "windows-1251"; + 7; 21; 1; COUNT ( -dnl One value label for STR9, -COUNT("STR9"); 9; 1; COUNT("RSTUVWXYZ"); COUNT("value label for `RSTUVWXYZ'"); +dnl One value label for STR9ж, +COUNT("STR9"; i8 230); 9; 1; COUNT("RSTUVWXYZ"); COUNT("value label for `RSTUVWXYZ'"); dnl Two value labels for STR12. COUNT("STR12"); 12; 2; @@ -358,7 +369,7 @@ dnl Three value labels for STR16. COUNT("STR16"); 16; 3; COUNT("opqrstuvwxyzABCD"); COUNT("value label for `opqrstuvwxyzABCD'"); COUNT("EFGHIJKLMNOPQRST"); COUNT("value label for `EFGHIJKLMNOPQRST'"); -COUNT("UVWXYZ0123456789"); COUNT("value label for `UVWXYZ0123456789'"); +COUNT("UVWXYZ0123456789"); COUNT("value label for `UVWXYZ0123456789' with Cyrillic letters: `"; i8 244; i8 245; i8 246; "'"); dnl One value label for STR17. COUNT("STR17"); 17; 1; @@ -369,8 +380,8 @@ dnl Dictionary termination record. 999; 0; ]) for variant in \ - "be 1de55cc9fb523c8f9b014cdc5387c12b" \ - "le 76a6974012df7351b591c5964c41e582" + "be b27d766d8a5ad9e901c8b244591a5942" \ + "le eb2e93f3cc29acd605b80e6c3af25ba6" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] @@ -383,7 +394,7 @@ DISPLAY DICTIONARY. AT_CHECK([grep -v Measure pspp.csv | grep -v Display], [0], [dnl Variable,Description,,Position num1,Format: F8.0,,1 -,1,one, +,1,один (in Russian), num2,Format: F8.0,,2 ,1,one, ,2,two, @@ -424,14 +435,14 @@ str7,Format: A7,,12 ,JKLMNOP,value label for `JKLMNOP', str8,Format: A8,,13 ,JKLMNOPQ,value label for `JKLMNOPQ', -str9,Format: A9,,14 +str9ж,Format: A9,,14 ,RSTUVWXYZ,value label for `RSTUVWXYZ', str12,Format: A12,,15 ,0123456789ab,value label for `0123456789ab', ,cdefghijklmn,value label for `cdefghijklmn', str16,Format: A16,,16 ,EFGHIJKLMNOPQRST,value label for `EFGHIJKLMNOPQRST', -,UVWXYZ0123456789,value label for `UVWXYZ0123456789', +,UVWXYZ0123456789,value label for `UVWXYZ0123456789' with Cyrillic letters: `фхц', ,opqrstuvwxyzABCD,value label for `opqrstuvwxyzABCD', str17,Format: A17,,17 ,abcdefghijklmnopq,value label for `abcdefghijklmnopq', @@ -456,13 +467,21 @@ i8 0 *3; dnl Numeric variable, no label or missing values. 2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + dnl Document record. -6; 4; +6; 5; s80 "First line of documents"; s80 "Second line of documents"; +"abb"; i8 233; " appliqu"; i8 233; " attach"; i8 233; " blas"; i8 233; " caf"; i8 233; " canap"; i8 233; " clich"; i8 233; " consomm"; i8 233; +s25 ""; s80 ""; s80 "Last line of documents"; +dnl Character encoding record. +7; 20; 1; 12; "windows-1252"; + dnl Dictionary termination record. 999; 0; @@ -470,8 +489,8 @@ dnl Data. 1.0; ]) for variant in \ - "be 8738124d7932cc8ff803142fbf38710b" \ - "le f3ca2123ec9e8bda91c6b865ba39f506" + "be 3555f74f3e714a3a703de7df56ce6d24" \ + "le ede5a0f805a1aab096ea86abf677ff34" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] @@ -489,6 +508,8 @@ First line of documents Second line of documents +abbé appliqué attaché blasé café canapé cliché consommé + Last line of documents @@ -515,7 +536,7 @@ dnl File header. i8 0 *3; dnl $a -2; 0; 0; 0; 0x050800 *2; s8 "A"; +2; 0; 0; 0; 0x050800 *2; i8 0x82; i8 0xa0; s6 ""; 2; 0; 0; 0; 0x050800 *2; s8 "B"; 2; 0; 0; 0; 0x050800 *2; s8 "C"; @@ -526,9 +547,9 @@ dnl $b 2; 0; 0; 0; 0x050800 *2; s8 "G"; dnl $c -2; 3; 0; 0; 0x010300 *2; s8 "H"; -2; 3; 0; 0; 0x010300 *2; s8 "I"; -2; 3; 0; 0; 0x010300 *2; s8 "J"; +2; 4; 0; 0; 0x010400 *2; s8 "H"; +2; 4; 0; 0; 0x010400 *2; s8 "I"; +2; 4; 0; 0; 0x010400 *2; s8 "J"; dnl $d 2; 0; 0; 0; 0x050800 *2; s8 "K"; @@ -540,23 +561,29 @@ dnl $e 2; 6; 0; 0; 0x010600 *2; s8 "O"; 2; 6; 0; 0; 0x010600 *2; s8 "P"; +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 932; + 7; 7; 1; COUNT( - "$a=C 10 my mcgroup a b c"; i8 10; + "$a=C 10 my mcgroup "; i8 0x82; i8 0xa0; " b c"; i8 10; "$b=D2 55 0 g e f d"; i8 10; - "$c=D3 Yes 10 mdgroup #2 h i j"; i8 10); + "$c=D4 "; i8 0x82; i8 0xcd; i8 0x82; i8 0xa2; " 10 mdgroup #2 h i j"; i8 10); 7; 19; 1; COUNT( "$d=E 1 2 34 13 third mdgroup k l m"; i8 10; "$e=E 11 6 choice 0 n o p"; i8 10); +dnl Character encoding record. +7; 20; 1; 9; "shift_jis"; + dnl Dictionary termination record. 999; 0; ]) for variant in \ - "be 0caa3446d7a3f6985e79fd1fcc999b10" \ - "le 9dbeba699e4149ed836f55bad7346d67" + "be fdf260a05220e08c748967dcb90d8b15" \ + "le 4c9b0c0636bc0aa0cc16684c8188d1c7" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] @@ -569,7 +596,7 @@ MRSETS /DISPLAY NAME=ALL. AT_CHECK([cat pspp.csv], [0], [dnl Table: Multiple Response Sets Name,Variables,Details -$a,"a +$a,"あ b c ","Multiple category set @@ -589,7 +616,7 @@ j ","Multiple dichotomy set Label: mdgroup #2 Label source: Provided by user -Counted value: `Yes' +Counted value: `はい' Category label source: Variable labels " $d,"k @@ -859,7 +886,7 @@ AT_DATA([sys-file.sack], [dnl dnl File header. "$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; 2; dnl Layout code -4; dnl Nominal case size +7; dnl Nominal case size 0; dnl Not compressed 0; dnl Not weighted 0; dnl No cases. @@ -872,21 +899,36 @@ dnl Numeric variables. 2; 0; 0; 0; 0x050800 *2; s8 "LONGVA_A"; 2; 0; 0; 0; 0x050800 *2; s8 "LONGVA_B"; 2; 0; 0; 0; 0x050800 *2; s8 "LONGVA_C"; +2; 0; 0; 0; 0x050800 *2; "CO"; i8 214; "RDINA"; +2; 0; 0; 0; 0x050800 *2; "CO"; i8 214; "RDI_A"; +2; 0; 0; 0; 0x050800 *2; "CO"; i8 214; "RDI_B"; + +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + +dnl Machine floating-point info record. +7; 4; 8; 3; SYSMIS; HIGHEST; LOWEST; dnl Long variable names. 7; 13; 1; COUNT ( "LONGVARI=LongVariableName1"; i8 9; "LONGVA_A=LongVariableName2"; i8 9; "LONGVA_B=LongVariableName3"; i8 9; -"LONGVA_C=LongVariableName4"; +"LONGVA_C=LongVariableName4"; i8 9; +"CO"; i8 214; "RDINA=Co"; i8 246; "rdinate_X"; i8 9; +"CO"; i8 214; "RDI_A=Co"; i8 246; "rdinate_Y"; i8 9; +"CO"; i8 214; "RDI_B=Co"; i8 246; "rdinate_Z"; ); +dnl Character encoding record. +7; 20; 1; 12; "windows-1252"; + dnl Dictionary termination record. 999; 0; ]) for variant in \ - "be eb7a8b4055a5d880a185a566048876b3" \ - "le dd4ecd7541320b9b51746717ef20973f" + "be 8ea5a72f3ae6e732371e92a7719c3951" \ + "le 02bcf02cf08b1e8fc80a858101ae22fc" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] @@ -902,6 +944,9 @@ LongVariableName1,Format: F8.0,,1 LongVariableName2,Format: F8.0,,2 LongVariableName3,Format: F8.0,,3 LongVariableName4,Format: F8.0,,4 +Coördinate_X,Format: F8.0,,5 +Coördinate_Y,Format: F8.0,,6 +Coördinate_Z,Format: F8.0,,7 ]) done AT_CLEANUP @@ -921,9 +966,9 @@ dnl File header. i8 0 *3; dnl 256-byte string. -2; 255; 0; 0; 0x01FF00 *2; s8 "STR256"; +2; 255; 0; 0; 0x01FF00 *2; "S"; i8 201; s6 "Q256"; (2; -1; 0; 0; 0; 0; s8 "") * 31; -2; 4; 0; 0; 0x010400 *2; s8 "STR256_1"; +2; 4; 0; 0; 0x010400 *2; "S"; i8 201; "Q256_1"; dnl 600-byte string. 2; 255; 0; 0; 0x01FF00 *2; s8 "STR600"; @@ -933,12 +978,18 @@ dnl 600-byte string. 2; 96; 0; 0; 0x016000 *2; s8 "STR600_2"; (2; -1; 0; 0; 0; 0; s8 "") * 11; +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + dnl Very long string record. 7; 14; 1; COUNT ( -"STR256=00256"; i8 0; i8 9; +"S"; i8 201; "Q256=00256"; i8 0; i8 9; "STR600=00600"; i8 0; i8 9; ); +dnl Character encoding record. +7; 20; 1; 12; "windows-1252"; + dnl Dictionary termination record. 999; 0; @@ -949,8 +1000,8 @@ dnl Data. "abcdefghijklmnopqrstuvwxyzABCDEF"; ]) for variant in \ - "be 40a4327805d8b59891084317248f5d4a" \ - "le ced2584a43037b893b7feb068e2cb9d6" + "be 844a4704f669dfe292482e587d690133" \ + "le b76025f602bdff6a42c1e0795a8b62ff" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] @@ -963,11 +1014,11 @@ LIST. AT_CHECK([pspp -o pspp.csv sys-file.sps]) AT_CHECK([grep -v Measure pspp.csv | grep -v Display], [0], [dnl Variable,Description,,Position -str256,Format: A256,,1 +sÉq256,Format: A256,,1 str600,Format: A600,,2 Table: Data List -str256,str600 +sÉq256,str600 abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@a,abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@#abcdefghijklmnopqrstuvwxyz ]) done @@ -991,33 +1042,40 @@ dnl Variables. 2; 0; 0; 0; 0x050800 *2; s8 "FIRSTVAR"; 2; 0; 0; 0; 0x050800 *2; s8 "SECONDVA"; +dnl Machine integer info record. +7; 3; 4; 8; 1; 2; 3; -1; 1; 1; ENDIAN; 1252; + dnl Long variable names. 7; 13; 1; COUNT ( "FIRSTVAR=FirstVariable"; i8 9; -"SECONDVA=SecondVariable"; i8 9; +"SECONDVA=S"; i8 233; "condVariable"; i8 9; ); dnl Data file attributes record. 7; 17; 1; COUNT ( -"Attr1('Value1'"; i8 10; "''QuotedValue''"; i8 10; ")"; -"SecondAttr('123'"; i8 10; "'456'"; i8 10; ")"; +"Attr1('Value1'"; i8 10; "''d"; i8 233; "claration''"; i8 10; ")"; +"S"; i8 233; "condAttr('123'"; i8 10; "'456'"; i8 10; ")"; ); dnl Variable attributes record. 7; 18; 1; COUNT ( "FirstVariable:"; - "fred('23'"; i8 10; "'34'"; i8 10; ")"; + "ad"; i8 232; "le('23'"; i8 10; "'34'"; i8 10; ")"; "bert('123'"; i8 10; ")"; -"/SecondVariable:"; +"/S"; i8 233; "condVariable:"; "xyzzy('quux'"; i8 10; ")"; ); + +dnl Character encoding record. +7; 20; 1; 12; "windows-1252"; + dnl Dictionary termination record. 999; 0; ]) for variant in \ - "be 955802de462daf810c0ecc81ee2320a1" \ - "le 7fc6439aedfa00615bb1fe94d6701305" + "be c7cae57af35662acec3b945abcf7927c" \ + "le eb6b4ab9c27bfa0daa49bf2770bccb70" do set $variant AT_CHECK_UNQUOTED([sack --$[1] sys-file.sack > sys-file.sav], [0], [], [$[2] @@ -1031,17 +1089,17 @@ DISPLAY ATTRIBUTES. [[Variable,Description, FirstVariable,Custom attributes:, ,bert,123 -,fred[1],23 -,fred[2],34 -SecondVariable,Custom attributes:, +,adèle[1],23 +,adèle[2],34 +SécondVariable,Custom attributes:, ,xyzzy,quux Table: Custom data file attributes. Attribute,Value -SecondAttr[1],123 -SecondAttr[2],456 +SécondAttr[1],123 +SécondAttr[2],456 Attr1[1],Value1 -Attr1[2],'QuotedValue' +Attr1[2],'déclaration' ]]) done AT_CLEANUP