From 44d6106d18d8592e2996b1225a469f0acac04922 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Tue, 17 Jun 2025 15:13:41 -0700 Subject: [PATCH] tests --- rust/Cargo.lock | 7 ---- rust/pspp/Cargo.toml | 1 - rust/pspp/src/hexfloat.rs | 8 +++-- rust/pspp/src/lib.rs | 1 + rust/pspp/src/locale_charset.rs | 8 ++--- rust/pspp/src/sys/cooked.rs | 35 ++++++++++++++++++- rust/pspp/src/sys/sack.rs | 3 +- rust/pspp/src/sys/test.rs | 5 +++ .../testdata/wrong_special_floats.expected | 24 +++++++++++++ .../sys/testdata/wrong_special_floats.sack | 15 ++++++++ 10 files changed, 88 insertions(+), 19 deletions(-) create mode 100644 rust/pspp/src/sys/testdata/wrong_special_floats.expected create mode 100644 rust/pspp/src/sys/testdata/wrong_special_floats.sack diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 62cc5b18cf..44608da21e 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -645,12 +645,6 @@ dependencies = [ "miniz_oxide", ] -[[package]] -name = "float_next_after" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" - [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1589,7 +1583,6 @@ dependencies = [ "finl_unicode", "flagset", "flate2", - "float_next_after", "hexplay", "indexmap", "itertools", diff --git a/rust/pspp/Cargo.toml b/rust/pspp/Cargo.toml index 34cbc57c04..038d467bc2 100644 --- a/rust/pspp/Cargo.toml +++ b/rust/pspp/Cargo.toml @@ -9,7 +9,6 @@ anyhow = "1.0.69" clap = { version = "4.1.7", features = ["derive", "wrap_help"] } encoding_rs = "0.8.32" flate2 = "1.0.26" -float_next_after = "1.0.0" hexplay = "0.2.1" lazy_static = "1.4.0" num = "0.4.0" diff --git a/rust/pspp/src/hexfloat.rs b/rust/pspp/src/hexfloat.rs index b885fb2266..ffa0ae66cd 100644 --- a/rust/pspp/src/hexfloat.rs +++ b/rust/pspp/src/hexfloat.rs @@ -1,5 +1,8 @@ use num::Float; -use std::{num::FpCategory, fmt::{Display, Formatter, Result}}; +use std::{ + fmt::{Display, Formatter, Result}, + num::FpCategory, +}; pub struct HexFloat(pub T); @@ -34,7 +37,7 @@ impl Display for HexFloat { #[cfg(test)] mod hex_float_tests { - use crate::HexFloat; + use crate::hexfloat::HexFloat; use num::Float; #[test] @@ -49,4 +52,3 @@ mod hex_float_tests { assert_eq!(format!("{}", HexFloat(f64::neg_zero())), "-0.0"); } } - diff --git a/rust/pspp/src/lib.rs b/rust/pspp/src/lib.rs index b78b711bb4..d28e5e827a 100644 --- a/rust/pspp/src/lib.rs +++ b/rust/pspp/src/lib.rs @@ -4,6 +4,7 @@ pub mod dictionary; pub mod endian; pub mod engine; pub mod format; +pub mod hexfloat; pub mod identifier; pub mod integer; pub mod lex; diff --git a/rust/pspp/src/locale_charset.rs b/rust/pspp/src/locale_charset.rs index 596fd62406..f3ab0c1841 100644 --- a/rust/pspp/src/locale_charset.rs +++ b/rust/pspp/src/locale_charset.rs @@ -18,7 +18,7 @@ // Written by Bruno Haible . Translated to Rust by Ben Pfaff // . -use lazy_static::lazy_static; +use std::sync::LazyLock; fn map_aliases(s: &str) -> &'static str { #[cfg(target_os = "freebsd")] @@ -298,9 +298,7 @@ mod inner { /// Returns the character set used by the locale configured in the operating /// system. pub fn locale_charset() -> &'static str { - lazy_static! { - static ref LOCALE_CHARSET: &'static str = - map_aliases(&inner::locale_charset().unwrap_or(String::from("UTF-8"))); - } + static LOCALE_CHARSET: LazyLock<&'static str> = + LazyLock::new(|| map_aliases(&inner::locale_charset().unwrap_or(String::from("UTF-8")))); &LOCALE_CHARSET } diff --git a/rust/pspp/src/sys/cooked.rs b/rust/pspp/src/sys/cooked.rs index 86c4ca9a17..fc0a28988e 100644 --- a/rust/pspp/src/sys/cooked.rs +++ b/rust/pspp/src/sys/cooked.rs @@ -8,6 +8,7 @@ use crate::{ }, endian::Endian, format::{Error as FormatError, Format, UncheckedFormat}, + hexfloat::HexFloat, identifier::{ByIdentifier, Error as IdError, Identifier}, output::pivot::{Group, Value}, sys::{ @@ -30,7 +31,7 @@ use thiserror::Error as ThisError; pub use crate::sys::raw::{CategoryLabels, Compression}; -#[derive(ThisError, Clone, Debug, PartialEq, Eq)] +#[derive(ThisError, Clone, Debug)] pub enum Error { #[error("Missing header record")] MissingHeaderRecord, @@ -224,6 +225,17 @@ pub enum Error { )] UnexpectedEndianess { actual: i32, expected: i32 }, + #[error( + "System file specifies value {actual:?} ({}) as {name} but {expected:?} ({}) was expected.", + HexFloat(*actual), + HexFloat(*expected) + )] + UnexpectedFloatValue { + actual: f64, + expected: f64, + name: &'static str, + }, + #[error("Details TBD (cooked)")] TBD, } @@ -562,6 +574,27 @@ pub fn decode( } }; + if let Some(float_info) = &headers.float_info { + for (expected, expected2, actual, name) in [ + (f64::MIN, None, float_info.sysmis, "SYSMIS"), + (f64::MAX, None, float_info.highest, "HIGHEST"), + ( + f64::MIN, + Some(f64::MIN.next_up()), + float_info.lowest, + "LOWEST", + ), + ] { + if actual != expected && expected2.is_none_or(|expected2| expected2 != actual) { + warn(Error::UnexpectedFloatValue { + expected, + actual, + name, + }); + } + } + } + if let Some(nominal_case_size) = headers.header.nominal_case_size { let n_vars = headers.variable.len(); if n_vars != nominal_case_size as usize diff --git a/rust/pspp/src/sys/sack.rs b/rust/pspp/src/sys/sack.rs index 4620c4c9a8..160fccf50b 100644 --- a/rust/pspp/src/sys/sack.rs +++ b/rust/pspp/src/sys/sack.rs @@ -1,4 +1,3 @@ -use float_next_after::NextAfter; use num::{Bounded, Zero}; use ordered_float::OrderedFloat; use std::{ @@ -496,7 +495,7 @@ impl<'a> Lexer<'a> { "i64" => Token::I64, "SYSMIS" => Token::Float(OrderedFloat(-f64::MAX)), "PCSYSMIS" => Token::PcSysmis, - "LOWEST" => Token::Float((-f64::MAX).next_after(0.0).into()), + "LOWEST" => Token::Float((-f64::MAX).next_up().into()), "HIGHEST" => Token::Float(f64::MAX.into()), "ENDIAN" => Token::Integer(if self.endian == Endian::Big { 1 } else { 2 }), "COUNT" => Token::Count, diff --git a/rust/pspp/src/sys/test.rs b/rust/pspp/src/sys/test.rs index 479f1d623c..bebe3b67ad 100644 --- a/rust/pspp/src/sys/test.rs +++ b/rust/pspp/src/sys/test.rs @@ -222,6 +222,11 @@ fn bad_machine_float_info_size() { test_sysfile("bad_machine_float_info_size"); } +#[test] +fn wrong_special_floats() { + test_sysfile("wrong_special_floats"); +} + /// Duplicate variable name handling negative test. /// /// SPSS-generated system file can contain duplicate variable names (see bug diff --git a/rust/pspp/src/sys/testdata/wrong_special_floats.expected b/rust/pspp/src/sys/testdata/wrong_special_floats.expected new file mode 100644 index 0000000000..6e55891088 --- /dev/null +++ b/rust/pspp/src/sys/testdata/wrong_special_floats.expected @@ -0,0 +1,24 @@ +System file specifies value 0.0 (0.0) as SYSMIS but -1.7976931348623157e308 (-0x1.fffffffffffffp1023) was expected. + +System file specifies value 1.0 (0x1.0p0) as HIGHEST but 1.7976931348623157e308 (0x1.fffffffffffffp1023) was expected. + +System file specifies value 2.0 (0x1.0p1) as LOWEST but -1.7976931348623157e308 (-0x1.fffffffffffffp1023) was expected. + +╭──────────────────────┬────────────────────────╮ +│ Created │ 01-JAN-2011 20:53:52│ +├──────────────────────┼────────────────────────┤ +│Writer Product │PSPP synthetic test file│ +├──────────────────────┼────────────────────────┤ +│ Compression │SAV │ +│ Number of Cases│Unknown │ +╰──────────────────────┴────────────────────────╯ + +╭─────────┬─╮ +│Variables│1│ +╰─────────┴─╯ + +╭────┬────────┬─────┬─────────────────┬─────┬─────┬─────────┬────────────┬────────────┬──────────────╮ +│ │Position│Label│Measurement Level│ Role│Width│Alignment│Print Format│Write Format│Missing Values│ +├────┼────────┼─────┼─────────────────┼─────┼─────┼─────────┼────────────┼────────────┼──────────────┤ +│num1│ 1│ │ │Input│ 8│Right │F8.0 │F8.0 │ │ +╰────┴────────┴─────┴─────────────────┴─────┴─────┴─────────┴────────────┴────────────┴──────────────╯ diff --git a/rust/pspp/src/sys/testdata/wrong_special_floats.sack b/rust/pspp/src/sys/testdata/wrong_special_floats.sack new file mode 100644 index 0000000000..12f270ae7a --- /dev/null +++ b/rust/pspp/src/sys/testdata/wrong_special_floats.sack @@ -0,0 +1,15 @@ +# File header. +"$FL2"; s60 "$(#) SPSS DATA FILE PSPP synthetic test file"; +2; 1; 1; 0; -1; 100.0; "01 Jan 11"; "20:53:52"; s64 ""; i8 0 *3; + +# Numeric variable, no label or missing values. +2; 0; 0; 0; 0x050800 *2; s8 "NUM1"; + +# Machine floating-point info record. +7; 4; 8; 3; >>0.0<<; >>1.0<<; >>2.0<<; + +# Character encoding record. +7; 20; 1; 12; "windows-1252"; + +# End of dictionary. +999; 0; -- 2.30.2