From: Ben Pfaff Date: Thu, 10 Jul 2025 00:48:13 +0000 (-0700) Subject: rust: Add support for decrypting encrypted SPSS files. X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=73b0ae791c31cdee17a3ec1e7a73e2d5eb17e816;p=pspp rust: Add support for decrypting encrypted SPSS files. Also, fix a bug in the C implementation of password decoding. --- diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 318d4e09c4..1e7795ae4c 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -385,6 +385,17 @@ version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +[[package]] +name = "cmac" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8543454e3c3f5126effff9cd44d562af4e31fb8ce1cc0d3dcd8f084515dbc1aa" +dependencies = [ + "cipher", + "dbl", + "digest", +] + [[package]] name = "color" version = "0.2.4" @@ -474,6 +485,15 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "dbl" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd2735a791158376708f9347fe8faba9667589d82427ef3aed6794a8981de3d9" +dependencies = [ + "generic-array", +] + [[package]] name = "deflate64" version = "0.1.9" @@ -1581,6 +1601,7 @@ dependencies = [ name = "pspp" version = "0.1.0" dependencies = [ + "aes", "anyhow", "binrw", "bitflags 2.9.1", @@ -1588,6 +1609,7 @@ dependencies = [ "chardetng", "chrono", "clap", + "cmac", "color", "csv", "derive_more", @@ -1612,6 +1634,7 @@ dependencies = [ "pspp-derive", "quick-xml", "rand", + "readpass", "serde", "smallstr", "smallvec", @@ -1621,6 +1644,7 @@ dependencies = [ "unicode-width", "windows-sys 0.48.0", "xmlwriter", + "zeroize", "zip", ] @@ -1704,6 +1728,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" +[[package]] +name = "readpass" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85614414429758be439b3cfc7ec2d883df5f3fa7027cc38f3b967ce72bfee60e" +dependencies = [ + "libc", + "windows-sys 0.60.2", + "zeroize", +] + [[package]] name = "redox_syscall" version = "0.5.12" @@ -2464,6 +2499,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -2488,13 +2532,29 @@ dependencies = [ "windows_aarch64_gnullvm 0.52.6", "windows_aarch64_msvc 0.52.6", "windows_i686_gnu 0.52.6", - "windows_i686_gnullvm", + "windows_i686_gnullvm 0.52.6", "windows_i686_msvc 0.52.6", "windows_x86_64_gnu 0.52.6", "windows_x86_64_gnullvm 0.52.6", "windows_x86_64_msvc 0.52.6", ] +[[package]] +name = "windows-targets" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" +dependencies = [ + "windows_aarch64_gnullvm 0.53.0", + "windows_aarch64_msvc 0.53.0", + "windows_i686_gnu 0.53.0", + "windows_i686_gnullvm 0.53.0", + "windows_i686_msvc 0.53.0", + "windows_x86_64_gnu 0.53.0", + "windows_x86_64_gnullvm 0.53.0", + "windows_x86_64_msvc 0.53.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -2507,6 +2567,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" + [[package]] name = "windows_aarch64_msvc" version = "0.48.5" @@ -2519,6 +2585,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" + [[package]] name = "windows_i686_gnu" version = "0.48.5" @@ -2531,12 +2603,24 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" +[[package]] +name = "windows_i686_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" + [[package]] name = "windows_i686_gnullvm" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" + [[package]] name = "windows_i686_msvc" version = "0.48.5" @@ -2549,6 +2633,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" +[[package]] +name = "windows_i686_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" + [[package]] name = "windows_x86_64_gnu" version = "0.48.5" @@ -2561,6 +2651,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" + [[package]] name = "windows_x86_64_gnullvm" version = "0.48.5" @@ -2573,6 +2669,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" + [[package]] name = "windows_x86_64_msvc" version = "0.48.5" @@ -2585,6 +2687,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" + [[package]] name = "winnow" version = "0.7.10" diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index b13efd3b55..043fbae2a4 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -45,6 +45,10 @@ pangocairo = "0.20.7" zip = "4.0.0" xmlwriter = "0.1.0" csv = "1.3.1" +cmac = "0.7.2" +aes = "0.8.4" +readpass = "1.0.3" +zeroize = "1.8.1" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.48.0", features = ["Win32_Globalization"] } diff --git a/rust/pspp/src/crypto/mod.rs b/rust/pspp/src/crypto/mod.rs new file mode 100644 index 0000000000..40ccfe0ca2 --- /dev/null +++ b/rust/pspp/src/crypto/mod.rs @@ -0,0 +1,666 @@ +//! # Decryption for SPSS encrypted files +//! +//! SPSS supports encryption using a password for data, viewer, and syntax +//! files. The encryption mechanism is poorly designed, so this module provides +//! support for decrypting, but not encrypting, the SPSS format. +//! Use [EncryptedFile] as the starting point for reading an encrypted file. +//! +//! SPSS also supports what calls "encrypted passwords". Use [EncodedPassword] +//! to encode and decode these passwords. + +// Warn about missing docs, but not for items declared with `#[cfg(test)]`. +#![cfg_attr(not(test), warn(missing_docs))] + +use aes::{ + cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit}, + Aes256, Aes256Dec, +}; +use cmac::{Cmac, Mac}; +use smallvec::SmallVec; +use std::{ + fmt::Debug, + io::{BufRead, Error as IoError, ErrorKind, Read, Seek, SeekFrom}, +}; +use thiserror::Error as ThisError; + +use binrw::{io::NoSeek, BinRead}; + +/// Error reading an encrypted file. +#[derive(Clone, Debug, ThisError)] +pub enum Error { + /// I/O error. + #[error("I/O error reading encrypted file wrapper ({0})")] + IoError(ErrorKind), + + /// Invalid padding in final encrypted data block. + #[error("Invalid padding in final encrypted data block")] + InvalidPadding, + + /// Not an encrypted file. + #[error("Not an encrypted file")] + NotEncrypted, + + /// Encrypted file has invalid length. + #[error("Encrypted file has invalid length {0} (expected 4 more than a multiple of 16).")] + InvalidLength(u64), + + /// Unknown file type. + #[error("Unknown file type {0:?}.")] + UnknownFileType(String), +} + +impl From for Error { + fn from(value: std::io::Error) -> Self { + Self::IoError(value.kind()) + } +} + +#[derive(BinRead)] +struct EncryptedHeader { + /// Fixed as `1c 00 00 00 00 00 00 00` in practice. + _ignore: [u8; 8], + + /// File type. + #[br(magic = b"ENCRYPTED")] + file_type: [u8; 3], + + /// Fixed as `15 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00` in practice. + _ignore2: [u8; 16], +} + +/// An encrypted file. +pub struct EncryptedFile { + reader: R, + file_type: FileType, + + /// Length of the ciphertext (excluding the 36-byte header). + length: u64, + + /// First block of ciphertext, for verifying that any password the user + /// tries is correct. + first_block: [u8; 16], + + /// Last block of ciphertext, for checking padding and determining the + /// plaintext length. + last_block: [u8; 16], +} + +/// Type of encrypted file. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum FileType { + /// A `.sps` syntax file. + Syntax, + + /// A `.spv` viewer file. + Viewer, + + /// A `.sav` data file. + Data, +} + +impl EncryptedFile +where + R: Read + Seek, +{ + /// Opens `reader` as an encrypted file. + /// + /// This reads enough of the file to verify that it is in the expected + /// format and returns an error if it cannot be read or is not the expected + /// format. + pub fn new(mut reader: R) -> Result { + let header = + EncryptedHeader::read_le(&mut NoSeek::new(&mut reader)).map_err( + |error| match error { + binrw::Error::BadMagic { .. } => Error::NotEncrypted, + binrw::Error::Io(error) => Error::IoError(error.kind()), + _ => unreachable!(), + }, + )?; + let file_type = match &header.file_type { + b"SAV" => FileType::Data, + b"SPV" => FileType::Viewer, + b"SPS" => FileType::Syntax, + _ => { + return Err(Error::UnknownFileType( + header.file_type.iter().map(|b| *b as char).collect(), + )) + } + }; + let mut first_block = [0; 16]; + reader.read_exact(&mut first_block)?; + let length = reader.seek(SeekFrom::End(-16))? + 16; + if length < 36 + 16 || (length - 36) % 16 != 0 { + return Err(Error::InvalidLength(length + 36)); + } + let mut last_block = [0; 16]; + reader.read_exact(&mut last_block)?; + reader.seek(SeekFrom::Start(36))?; + Ok(Self { + reader, + file_type, + length, + first_block, + last_block, + }) + } + + /// Tries to unlock the encrypted file using both `password` and with + /// `password` decoded with [EncodedPassword::decode]. If successful, + /// returns an [EncryptedReader] for the file; on failure, returns the + /// [EncryptedFile] again for another try. + pub fn unlock(self, password: &[u8]) -> Result, Self> { + self.unlock_literal(password).or_else(|this| { + match EncodedPassword::from_encoded(password) { + Some(encoded) => this.unlock_literal(&encoded.decode()), + None => Err(this), + } + }) + } + + /// Tries to unlock the encrypted file using just `password`. If + /// successful, returns an [EncryptedReader] for the file; on failure, + /// returns the [EncryptedFile] again for another try. + /// + /// If the password itself might be encoded ("encrypted"), instead use + /// [Self::unlock] to try it both ways. + pub fn unlock_literal(self, password: &[u8]) -> Result, Self> { + // NIST SP 800-108 fixed data. + #[rustfmt::skip] + static FIXED: &[u8] = &[ + // i + 0x00, 0x00, 0x00, 0x01, + + // label + 0x35, 0x27, 0x13, 0xcc, 0x53, 0xa7, 0x78, 0x89, + 0x87, 0x53, 0x22, 0x11, 0xd6, 0x5b, 0x31, 0x58, + 0xdc, 0xfe, 0x2e, 0x7e, 0x94, 0xda, 0x2f, 0x00, + 0xcc, 0x15, 0x71, 0x80, 0x0a, 0x6c, 0x63, 0x53, + + // delimiter + 0x00, + + // context + 0x38, 0xc3, 0x38, 0xac, 0x22, 0xf3, 0x63, 0x62, + 0x0e, 0xce, 0x85, 0x3f, 0xb8, 0x07, 0x4c, 0x4e, + 0x2b, 0x77, 0xc7, 0x21, 0xf5, 0x1a, 0x80, 0x1d, + 0x67, 0xfb, 0xe1, 0xe1, 0x83, 0x07, 0xd8, 0x0d, + + // L + 0x00, 0x00, 0x01, 0x00, + ]; + + // Truncate password to at most 10 bytes. + let password = password.get(..10).unwrap_or(password); + let n = password.len(); + + // padded_password = password padded with zeros to 32 bytes. + let mut padded_password = [0; 32]; + padded_password[..n].copy_from_slice(password); + + // cmac = CMAC(padded_password, fixed). + let mut cmac = as Mac>::new_from_slice(&padded_password).unwrap(); + cmac.update(FIXED); + let cmac = cmac.finalize().into_bytes(); + + // The key is the cmac repeated twice. + let mut key = [0; 32]; + key[..16].copy_from_slice(cmac.as_slice()); + key[16..].copy_from_slice(cmac.as_slice()); + + // Use key to initialize AES. + let aes = ::new_from_slice(&key).unwrap(); + + // Decrypt first block to verify password. + let mut out = [0; 16]; + aes.decrypt_block_b2b( + &GenericArray::from_slice(&self.first_block), + GenericArray::from_mut_slice(&mut out), + ); + static MAGIC: &[&[u8]] = &[ + b"$FL2@(#)", + b"$FL3@(#)", + b"* Encoding", + b"PK\x03\x04\x14\0\x08", + ]; + if !MAGIC.iter().any(|magic| out.starts_with(*magic)) { + return Err(self); + } + + // Decrypt last block to check padding and get final length. + aes.decrypt_block_b2b( + &GenericArray::from_slice(&self.last_block), + GenericArray::from_mut_slice(&mut out), + ); + let Some(padding_length) = parse_padding(&out) else { + return Err(self); + }; + + Ok(EncryptedReader::new( + self.reader, + aes, + self.file_type, + self.length - 36 - padding_length as u64, + )) + } + + /// Returns the type of encrypted file. + pub fn file_type(&self) -> FileType { + self.file_type + } +} + +fn parse_padding(block: &[u8; 16]) -> Option { + let pad = block[15] as usize; + if (1..=16).contains(&pad) && block[16 - pad..].iter().all(|b| *b == pad as u8) { + Some(pad) + } else { + None + } +} + +impl Debug for EncryptedFile +where + R: Read, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "EncryptedFile({:?})", &self.file_type) + } +} + +/// Encrypted file reader. +/// +/// This implements [Read] and [Seek] for SPSS encrypted files. To construct an +/// [EncryptedReader], call [EncryptedFile::new], then [EncryptedFile::unlock]. +pub struct EncryptedReader { + /// Underlying reader. + reader: R, + + /// AES-256 decryption key. + aes: Aes256Dec, + + /// Type of file. + file_type: FileType, + + /// Plaintext file length (not including the file header or padding). + length: u64, + + /// Plaintext data buffer. + buffer: Box<[u8; 4096]>, + + /// Plaintext offset of the byte in `buffer[0]`. A multiple of 16 less than + /// or equal to `length`. + start: u64, + + /// Number of bytes in buffer (`0 <= head <= 4096`). + head: usize, + + /// Offset in buffer of the next byte to read (`head <= tail`). + tail: usize, +} + +impl EncryptedReader { + fn new(reader: R, aes: Aes256Dec, file_type: FileType, length: u64) -> Self { + Self { + reader, + aes, + file_type, + length, + buffer: Box::new([0; 4096]), + start: 0, + head: 0, + tail: 0, + } + } + + fn read_buffer(&mut self, buf: &mut [u8]) -> Result { + let n = buf.len().min(self.head - self.tail); + buf[..n].copy_from_slice(&self.buffer[self.tail..n + self.tail]); + self.tail += n; + Ok(n) + } + + /// Returns the type of encrypted file. + pub fn file_type(&self) -> FileType { + self.file_type + } +} + +impl EncryptedReader +where + R: Read, +{ + fn fill_buffer(&mut self, offset: u64) -> Result<(), IoError> { + self.start = offset / 16 * 16; + self.head = 0; + self.tail = (offset % 16) as usize; + let n = self.buffer.len().min((self.length - self.start) as usize); + self.reader + .read_exact(&mut self.buffer[..n.next_multiple_of(16)])?; + for offset in (0..n).step_by(16) { + self.aes.decrypt_block(GenericArray::from_mut_slice( + &mut self.buffer[offset..offset + 16], + )); + } + self.head = n; + Ok(()) + } +} + +impl Read for EncryptedReader +where + R: Read, +{ + fn read(&mut self, buf: &mut [u8]) -> Result { + if self.tail < self.head { + self.read_buffer(buf) + } else { + let offset = self.start + self.head as u64; + if offset < self.length { + self.fill_buffer(offset)?; + self.read_buffer(buf) + } else { + Ok(0) + } + } + } +} + +impl Seek for EncryptedReader +where + R: Read + Seek, +{ + fn seek(&mut self, pos: SeekFrom) -> Result { + let offset = match pos { + SeekFrom::Start(offset) => Some(offset), + SeekFrom::End(relative) => self.length.checked_add_signed(relative), + SeekFrom::Current(relative) => { + (self.start + self.tail as u64).checked_add_signed(relative) + } + } + .filter(|offset| *offset < u64::MAX - 36) + .ok_or(IoError::from(ErrorKind::InvalidInput))?; + if offset != self.start + self.tail as u64 { + self.reader.seek(SeekFrom::Start(offset / 16 * 16 + 36))?; + self.fill_buffer(offset)?; + } + Ok(offset) + } +} + +impl BufRead for EncryptedReader +where + R: Read + Seek, +{ + fn fill_buf(&mut self) -> std::io::Result<&[u8]> { + if self.tail >= self.head { + let offset = self.start + self.head as u64; + if offset < self.length { + self.fill_buffer(offset)?; + } + } + Ok(&self.buffer[self.tail..self.head]) + } + + fn consume(&mut self, amount: usize) { + self.tail += amount; + debug_assert!(self.tail <= self.head); + } +} + +const fn b(x: i32) -> u16 { + 1 << x +} + +static AH: [[u16; 2]; 4] = [ + [b(2), b(2) | b(3) | b(6) | b(7)], + [b(3), b(0) | b(1) | b(4) | b(5)], + [b(4) | b(7), b(8) | b(9) | b(12) | b(13)], + [b(5) | b(6), b(10) | b(11) | b(14) | b(15)], +]; + +static AL: [[u16; 2]; 4] = [ + [b(0) | b(3) | b(12) | b(15), b(0) | b(1) | b(4) | b(5)], + [b(1) | b(2) | b(13) | b(14), b(2) | b(3) | b(6) | b(7)], + [b(4) | b(7) | b(8) | b(11), b(8) | b(9) | b(12) | b(13)], + [b(5) | b(6) | b(9) | b(10), b(10) | b(11) | b(14) | b(15)], +]; + +static BH: [[u16; 2]; 4] = [ + [b(2), b(1) | b(3) | b(9) | b(11)], + [b(3), b(0) | b(2) | b(8) | b(10)], + [b(4) | b(7), b(4) | b(6) | b(12) | b(14)], + [b(5) | b(6), b(5) | b(7) | b(13) | b(15)], +]; + +static BL: [[u16; 2]; 4] = [ + [b(0) | b(3) | b(12) | b(15), b(0) | b(2) | b(8) | b(10)], + [b(1) | b(2) | b(13) | b(14), b(1) | b(3) | b(9) | b(11)], + [b(4) | b(7) | b(8) | b(11), b(4) | b(6) | b(12) | b(14)], + [b(5) | b(6) | b(9) | b(10), b(5) | b(7) | b(13) | b(15)], +]; + +fn decode_nibble(table: &[[u16; 2]; 4], nibble: u8) -> u16 { + for section in table.iter() { + if section[0] & (1 << nibble) != 0 { + return section[1]; + } + } + 0 +} + +fn find_1bit(x: u16) -> Option { + x.is_power_of_two().then(|| x.trailing_zeros() as u8) +} + +fn decode_pair(a: u8, b: u8) -> Option { + let x = find_1bit(decode_nibble(&AH, a >> 4) & decode_nibble(&BH, b >> 4))?; + let y = find_1bit(decode_nibble(&AL, a & 15) & decode_nibble(&BL, b & 15))?; + Some((x << 4) | y) +} + +fn encode_nibble(table: &[[u16; 2]; 4], nibble: u8) -> Vec { + for section in table.iter() { + if section[1] & (1 << nibble) != 0 { + let mut outputs = Vec::with_capacity(4); + let mut bits = section[0]; + while bits != 0 { + outputs.push(bits.trailing_zeros() as u8); + bits &= bits - 1; + } + return outputs; + } + } + unreachable!() +} + +fn encode_byte(hi_table: &[[u16; 2]; 4], lo_table: &[[u16; 2]; 4], byte: u8) -> Vec { + let hi_variants = encode_nibble(hi_table, byte >> 4); + let lo_variants = encode_nibble(lo_table, byte & 15); + let mut variants = Vec::with_capacity(hi_variants.len() * lo_variants.len()); + for hi in hi_variants.iter().copied() { + for lo in lo_variants.iter().copied() { + let byte = (hi << 4) | lo; + if byte != 127 { + variants.push(byte as char); + } + } + } + variants +} + +/// An encoded password. +/// +/// SPSS calls these "encrypted passwords", but they are not encrypted. They +/// are encoded with a simple scheme, analogous to base64 encoding but +/// one-to-many: any plaintext password maps to many possible encoded passwords. +/// +/// The encoding scheme maps each plaintext password byte to 2 ASCII characters, +/// using only at most the first 10 bytes of the plaintext password. Thus, an +/// encoded password is always a multiple of 2 characters long, and never longer +/// than 20 characters. The characters in an encoded password are always in the +/// graphic ASCII range 33 through 126. Each successive pair of characters in +/// the password encodes a single byte in the plaintext password. +/// +/// This struct supports both encoding and decoding passwords. +#[derive(Clone, Debug)] +pub struct EncodedPassword(Vec>); + +impl EncodedPassword { + /// Creates an [EncodedPassword] from an already-encoded password `encoded`. + /// Returns `None` if `encoded` is not a valid encoded password. + pub fn from_encoded(encoded: &[u8]) -> Option { + if encoded.len() > 20 + || encoded.len() % 2 != 0 + || !encoded.iter().all(|byte| (32..=127).contains(byte)) + { + return None; + } + + Some(EncodedPassword( + encoded.iter().map(|byte| vec![*byte as char]).collect(), + )) + } + + /// Returns an [EncodedPassword] as an encoded version of the given + /// `plaintext` password. Only the first 10 bytes, at most, of the + /// plaintext password is used. + pub fn from_plaintext(plaintext: &[u8]) -> EncodedPassword { + let input = plaintext.get(..10).unwrap_or(plaintext); + EncodedPassword( + input + .iter() + .copied() + .map(|byte| [encode_byte(&AH, &AL, byte), encode_byte(&BH, &BL, byte)]) + .flatten() + .collect(), + ) + } + + /// Returns the number of variations of this encoded password. + /// + /// An [EncodedPassword] created by [EncodedPassword::from_plaintext] has + /// many variations: between `16**n` and `32**n` for an `n`-byte plaintext + /// password, so up to `32**10` (about 1e15) for the 10-byte longest + /// plaintext passwords. + /// + /// An [EncodedPassword] created by [EncodedPassword::from_encoded] has only + /// a single variation, the one passed in by that function. + pub fn n_variants(&self) -> u64 { + self.0 + .iter() + .map(|variants| variants.len() as u64) + .product() + } + + /// Returns one variation of this encoded password, numbered `index`. All + /// variations decode the same way. + pub fn variant(&self, mut index: u64) -> String { + let mut output = String::with_capacity(20); + for variants in &self.0 { + let n = variants.len() as u64; + output.push(variants[(index % n) as usize]); + index /= n; + } + output + } + + /// Returns the decoded version of this encoded password. + pub fn decode(&self) -> SmallVec<[u8; 10]> { + let mut output = SmallVec::new(); + for [a, b] in self.0.as_chunks::<2>().0 { + output.push(decode_pair(a[0] as u8, b[0] as u8).unwrap()); + } + output + } +} + +#[cfg(test)] +mod test { + use std::{io::Cursor, path::Path}; + + use crate::crypto::{EncodedPassword, EncryptedFile, FileType}; + + fn test_decrypt(input_name: &Path, expected_name: &Path, password: &str, file_type: FileType) { + let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/crypto/testdata") + .join(input_name); + let input = std::fs::read(&input_filename).unwrap(); + let mut cursor = Cursor::new(&input); + let file = EncryptedFile::new(&mut cursor).unwrap(); + assert_eq!(file.file_type(), file_type); + let mut reader = file.unlock_literal(password.as_bytes()).unwrap(); + assert_eq!(reader.file_type(), file_type); + let mut actual = Vec::new(); + std::io::copy(&mut reader, &mut actual).unwrap(); + + let expected_filename = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/crypto/testdata") + .join(expected_name); + let expected = std::fs::read(&expected_filename).unwrap(); + if actual != expected { + panic!(); + } + } + + #[test] + fn sys_file() { + test_decrypt( + Path::new("test-encrypted.sav"), + Path::new("test.sav"), + "pspp", + FileType::Data, + ); + } + + #[test] + fn syntax_file() { + test_decrypt( + Path::new("test-encrypted.sps"), + Path::new("test.sps"), + "password", + FileType::Syntax, + ); + } + + #[test] + fn spv_file() { + test_decrypt( + Path::new("test-encrypted.spv"), + Path::new("test.spv"), + "Password1", + FileType::Viewer, + ); + } + + #[test] + fn password_encoding() { + // Decode a few specific passwords. + assert_eq!( + EncodedPassword::from_encoded(b"-|") + .unwrap() + .decode() + .as_slice(), + b"b" + ); + assert_eq!( + EncodedPassword::from_encoded(b" A") + .unwrap() + .decode() + .as_slice(), + b"a" + ); + + // Check that the encoding and decoding algorithms are inverses + // for individual characters at least. + for plaintext in 0..=255 { + let encoded = EncodedPassword::from_plaintext(&[plaintext]); + for variant in 0..encoded.n_variants() { + let encoded_variant = encoded.variant(variant); + let decoded = EncodedPassword::from_encoded(encoded_variant.as_bytes()) + .unwrap() + .decode(); + assert_eq!(&[plaintext], decoded.as_slice()); + } + } + } +} diff --git a/rust/pspp/src/crypto/testdata/test-encrypted.sav b/rust/pspp/src/crypto/testdata/test-encrypted.sav new file mode 100644 index 0000000000..2d9f531102 Binary files /dev/null and b/rust/pspp/src/crypto/testdata/test-encrypted.sav differ diff --git a/rust/pspp/src/crypto/testdata/test-encrypted.sps b/rust/pspp/src/crypto/testdata/test-encrypted.sps new file mode 100644 index 0000000000..58ed181f3a Binary files /dev/null and b/rust/pspp/src/crypto/testdata/test-encrypted.sps differ diff --git a/rust/pspp/src/crypto/testdata/test-encrypted.spv b/rust/pspp/src/crypto/testdata/test-encrypted.spv new file mode 100644 index 0000000000..da8be2c80f Binary files /dev/null and b/rust/pspp/src/crypto/testdata/test-encrypted.spv differ diff --git a/rust/pspp/src/crypto/testdata/test.sav b/rust/pspp/src/crypto/testdata/test.sav new file mode 100644 index 0000000000..a84e8f15a8 Binary files /dev/null and b/rust/pspp/src/crypto/testdata/test.sav differ diff --git a/rust/pspp/src/crypto/testdata/test.sps b/rust/pspp/src/crypto/testdata/test.sps new file mode 100644 index 0000000000..0f48aa196d --- /dev/null +++ b/rust/pspp/src/crypto/testdata/test.sps @@ -0,0 +1,13 @@ +* Encoding: windows-1252. +DATA LIST LIST /name (a25) quantity (f8). +BEGIN DATA. +widgets 10345 +oojars 2345 +dubreys 98 +thingumies 518 +END DATA. + +LIST. + +DESCRIPTIVES /quantity + /statistics ALL. \ No newline at end of file diff --git a/rust/pspp/src/crypto/testdata/test.spv b/rust/pspp/src/crypto/testdata/test.spv new file mode 100644 index 0000000000..891263dccd Binary files /dev/null and b/rust/pspp/src/crypto/testdata/test.spv differ diff --git a/rust/pspp/src/lib.rs b/rust/pspp/src/lib.rs index 7fde26fc5e..8b5d81f400 100644 --- a/rust/pspp/src/lib.rs +++ b/rust/pspp/src/lib.rs @@ -16,6 +16,7 @@ pub mod calendar; pub mod command; +pub mod crypto; pub mod dictionary; pub mod endian; pub mod engine; diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs index c874f5dcd7..8432f4267d 100644 --- a/rust/pspp/src/main.rs +++ b/rust/pspp/src/main.rs @@ -14,9 +14,10 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -use anyhow::Result; +use anyhow::{anyhow, Result}; use clap::{Args, Parser, Subcommand, ValueEnum}; use encoding_rs::Encoding; +use pspp::crypto::EncryptedFile; use pspp::sys::cooked::{Error, Headers}; use pspp::sys::raw::{encoding_from_headers, Decoder, Magic, Reader, Record, Warning}; use std::fs::File; @@ -24,6 +25,7 @@ use std::io::{stdout, BufReader, Write}; use std::path::{Path, PathBuf}; use std::str; use thiserror::Error as ThisError; +use zeroize::Zeroizing; /// PSPP, a program for statistical analysis of sampled data. #[derive(Parser, Debug)] @@ -117,6 +119,42 @@ impl Convert { } } +/// Decrypts an encrypted SPSS data, output, or syntax file. +#[derive(Args, Clone, Debug)] +struct Decrypt { + /// Input file name. + input: PathBuf, + + /// Output file name. + output: PathBuf, + + /// Password for decryption, with or without what SPSS calls "password encryption". + /// + /// If omitted, PSPP will prompt interactively for the password. + #[clap(short, long)] + password: Option, +} + +impl Decrypt { + fn run(self) -> Result<()> { + let input = EncryptedFile::new(File::open(&self.input)?)?; + let password = match self.password { + Some(password) => Zeroizing::new(password), + None => { + eprintln!("Please enter the password for {}:", self.input.display()); + readpass::from_tty().unwrap() + } + }; + let mut reader = match input.unlock(password.as_bytes()) { + Ok(reader) => reader, + Err(_) => return Err(anyhow!("Incorrect password.")), + }; + let mut writer = File::create(self.output)?; + std::io::copy(&mut reader, &mut writer)?; + Ok(()) + } +} + /// Dissects SPSS system files. #[derive(Args, Clone, Debug)] struct Dissect { @@ -149,6 +187,7 @@ impl Dissect { #[derive(Subcommand, Clone, Debug)] enum Command { Convert(Convert), + Decrypt(Decrypt), Dissect(Dissect), } @@ -156,6 +195,7 @@ impl Command { fn run(self) -> Result<()> { match self { Command::Convert(convert) => convert.run(), + Command::Decrypt(decrypt) => decrypt.run(), Command::Dissect(dissect) => dissect.run(), } } diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index fc39ec2730..16ec17551e 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -14,9 +14,15 @@ // You should have received a copy of the GNU General Public License along with // this program. If not, see . -use std::{io::Cursor, path::Path, sync::Arc}; +use std::{ + fs::File, + io::{Cursor, Read, Seek}, + path::Path, + sync::Arc, +}; use crate::{ + crypto::EncryptedFile, endian::Endian, output::{ pivot::{test::assert_lines_eq, Axis3, Dimension, Group, PivotTable, Value}, @@ -542,12 +548,31 @@ fn duplicate_variable_name() { test_sack_sysfile("duplicate_variable_name"); } +#[test] +fn encrypted_file() { + test_encrypted_sysfile("test-encrypted.sav", "pspp"); +} + fn test_raw_sysfile(name: &str) { let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) .join("src/sys/testdata") .join(name) .with_extension("sav"); - let sysfile = std::fs::read(&input_filename).unwrap(); + let sysfile = File::open(&input_filename).unwrap(); + let expected_filename = input_filename.with_extension("expected"); + let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap(); + test_sysfile(sysfile, &expected, &expected_filename); +} + +fn test_encrypted_sysfile(name: &str, password: &str) { + let input_filename = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("src/sys/testdata") + .join(name) + .with_extension("sav"); + let sysfile = EncryptedFile::new(File::open(&input_filename).unwrap()) + .unwrap() + .unlock(password.as_bytes()) + .unwrap(); let expected_filename = input_filename.with_extension("expected"); let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap(); test_sysfile(sysfile, &expected, &expected_filename); @@ -570,14 +595,16 @@ fn test_sack_sysfile(name: &str) { }, ); let sysfile = sack(&input, Some(&input_filename), endian).unwrap(); - test_sysfile(sysfile, &expected, &expected_filename); + test_sysfile(Cursor::new(sysfile), &expected, &expected_filename); } } -fn test_sysfile(sysfile: Vec, expected: &str, expected_filename: &Path) { - let cursor = Cursor::new(sysfile); +fn test_sysfile(sysfile: R, expected: &str, expected_filename: &Path) +where + R: Read + Seek + 'static, +{ let mut warnings = Vec::new(); - let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap(); + let mut reader = Reader::new(sysfile, |warning| warnings.push(warning)).unwrap(); let output = match reader.headers().collect() { Ok(headers) => { let cases = reader.cases(); diff --git a/rust/pspp/src/sys/testdata/test-encrypted.expected b/rust/pspp/src/sys/testdata/test-encrypted.expected new file mode 100644 index 0000000000..e98ba62f71 --- /dev/null +++ b/rust/pspp/src/sys/testdata/test-encrypted.expected @@ -0,0 +1,93 @@ +╭──────────────────────┬────────────────────────────────────────────╮ +│ Created │ 04-OCT-2013 19:13:09│ +├──────────────────────┼────────────────────────────────────────────┤ +│Writer Product │@(#) IBM SPSS STATISTICS MS Windows 22.0.0.0│ +│ Version │22.0.0 │ +├──────────────────────┼────────────────────────────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│ 17│ +╰──────────────────────┴────────────────────────────────────────────╯ + +╭─────────┬─╮ +│Variables│5│ +╰─────────┴─╯ + +╭────────────────────────────────────────────────────┬────────┬────────────────────────────────────────────────────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│ Label │Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────────────────────────────────────────────────────┼────────┼────────────────────────────────────────────────────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│I am satisfied with the level of service │ 1│I am satisfied with the level of service │Ordinal │Input│ 8│Right │F8.0 │F8.0 │ │ +│The value for money was good │ 2│The value for money was good │Ordinal │Input│ 8│Right │F8.0 │F8.0 │ │ +│The staff were slow in responding │ 3│The staff were slow in responding │Ordinal │Input│ 8│Right │F8.0 │F8.0 │ │ +│My concerns were dealt with in an efficient manner │ 4│My concerns were dealt with in an efficient manner │Ordinal │Input│ 8│Right │F8.0 │F8.0 │ │ +│There was too much noise in the rooms │ 5│There was too much noise in the rooms │Ordinal │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────────────────────────────────────────────────────┴────────┴────────────────────────────────────────────────────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ + +╭──────────────────────────────────────────────────────┬─────────────────╮ +│Variable Value │ │ +├──────────────────────────────────────────────────────┼─────────────────┤ +│I am satisfied with the level of service 1│Strongly Disagree│ +│ 2│Disagree │ +│ 3│No Opinion │ +│ 4│Agree │ +│ 5│Strongly Agree │ +├──────────────────────────────────────────────────────┼─────────────────┤ +│The value for money was good 1│Strongly Disagree│ +│ 2│Disagree │ +│ 3│No Opinion │ +│ 4│Agree │ +│ 5│Strongly Agree │ +├──────────────────────────────────────────────────────┼─────────────────┤ +│The staff were slow in responding 1│Strongly Disagree│ +│ 2│Disagree │ +│ 3│No Opinion │ +│ 4│Agree │ +│ 5│Strongly Agree │ +├──────────────────────────────────────────────────────┼─────────────────┤ +│My concerns were dealt with in an efficient manner 1│Strongly Disagree│ +│ 2│Disagree │ +│ 3│No Opinion │ +│ 4│Agree │ +│ 5│Strongly Agree │ +├──────────────────────────────────────────────────────┼─────────────────┤ +│There was too much noise in the rooms 1│Strongly Disagree│ +│ 2│Disagree │ +│ 3│No Opinion │ +│ 4│Agree │ +│ 5│Strongly Agree │ +╰──────────────────────────────────────────────────────┴─────────────────╯ + +╭───────────────────────────────────────────────────────────┬─────╮ +│Variable and Name │Value│ +├───────────────────────────────────────────────────────────┼─────┤ +│I am satisfied with the level of service $@Role│0 │ +├───────────────────────────────────────────────────────────┼─────┤ +│The value for money was good $@Role│0 │ +├───────────────────────────────────────────────────────────┼─────┤ +│The staff were slow in responding $@Role│0 │ +├───────────────────────────────────────────────────────────┼─────┤ +│My concerns were dealt with in an efficient manner $@Role│0 │ +├───────────────────────────────────────────────────────────┼─────┤ +│There was too much noise in the rooms $@Role│0 │ +╰───────────────────────────────────────────────────────────┴─────╯ + +╭────┬────────────────────────────────────────┬────────────────────────────┬────────────────────────────────────┬────────────────────────────────────────────────────┬────────────────────────────────────────╮ +│Case│I am satisfied with the level of service│The value for money was good│The staff were slow in responding │My concerns were dealt with in an efficient manner │There was too much noise in the rooms │ +├────┼────────────────────────────────────────┼────────────────────────────┼────────────────────────────────────┼────────────────────────────────────────────────────┼────────────────────────────────────────┤ +│1 │ 4.00│ 2.00│ 3.00│ 4.00│ 1.00│ +│2 │ 1.00│ 1.00│ 3.00│ 1.00│ 1.00│ +│3 │ 5.00│ 2.00│ 2.00│ 3.00│ 4.00│ +│4 │ 3.00│ 1.00│ 3.00│ 1.00│ 2.00│ +│5 │ 5.00│ 3.00│ 1.00│ 5.00│ 3.00│ +│6 │ 1.00│ 2.00│ 5.00│ 4.00│ 2.00│ +│7 │ 3.00│ 2.00│ 4.00│ 3.00│ 1.00│ +│8 │ 1.00│ 4.00│ 5.00│ 2.00│ 1.00│ +│9 │ 3.00│ 2.00│ 3.00│ 1.00│ 2.00│ +│10 │ 2.00│ 5.00│ 4.00│ 2.00│ 1.00│ +│11 │ 4.00│ 2.00│ 2.00│ 3.00│ 5.00│ +│12 │ 2.00│ 1.00│ 4.00│ 1.00│ 1.00│ +│13 │ 1.00│ 2.00│ 5.00│ 5.00│ 2.00│ +│14 │ 2.00│ 3.00│ 3.00│ 3.00│ 1.00│ +│15 │ 4.00│ 1.00│ 1.00│ 1.00│ 3.00│ +│16 │ 1.00│ 1.00│ 5.00│ 1.00│ 2.00│ +│17 │ 2.00│ 5.00│ 5.00│ 2.00│ 2.00│ +╰────┴────────────────────────────────────────┴────────────────────────────┴────────────────────────────────────┴────────────────────────────────────────────────────┴────────────────────────────────────────╯ diff --git a/rust/pspp/src/sys/testdata/test-encrypted.sav b/rust/pspp/src/sys/testdata/test-encrypted.sav new file mode 100644 index 0000000000..2d9f531102 Binary files /dev/null and b/rust/pspp/src/sys/testdata/test-encrypted.sav differ diff --git a/src/data/encrypted-file.c b/src/data/encrypted-file.c index c0124cbee9..2c82f8f177 100644 --- a/src/data/encrypted-file.c +++ b/src/data/encrypted-file.c @@ -176,7 +176,7 @@ encrypted_file_close (struct encrypted_file *f) static const uint16_t m0[4][2] = { { b(2), b(2) | b(3) | b(6) | b(7) }, { b(3), b(0) | b(1) | b(4) | b(5) }, - { b(4) | b(7), b(8) | b(9) | b(12) | b(14) }, + { b(4) | b(7), b(8) | b(9) | b(12) | b(13) }, { b(5) | b(6), b(10) | b(11) | b(14) | b(15) }, };