use core::str;
use std::{
borrow::Cow,
- cmp::Ordering,
collections::{btree_set, BTreeMap, BTreeSet, HashMap, HashSet},
fmt::{Debug, Display, Formatter, Result as FmtResult},
hash::{DefaultHasher, Hash, Hasher},
}
/// [VarType], plus a width for [VarType::String].
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)]
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
pub enum VarWidth {
Numeric,
String(u16),
}
-impl PartialOrd for VarWidth {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- match (self, other) {
- (VarWidth::Numeric, VarWidth::Numeric) => Some(Ordering::Equal),
- (VarWidth::String(a), VarWidth::String(b)) => Some(a.cmp(b)),
- _ => None,
- }
- }
-}
-
impl VarWidth {
pub const MAX_STRING: u16 = 32767;
Err(MissingValuesError::MixedTypes)
} else if value == Datum::Number(None) {
Err(MissingValuesError::SystemMissing)
- } else if value.resize(self.width).is_err() {
+ } else if value.resize(self.width.min(VarWidth::String(8))).is_err() {
Err(MissingValuesError::TooWide)
} else {
value.trim_end();
),
}
+impl Record {
+ pub fn as_long_string_missing_values(
+ &self,
+ ) -> Option<&LongStringMissingValueRecord<ByteString>> {
+ match self {
+ Record::LongStringMissingValues(long_string_missing_value_record) => {
+ Some(long_string_missing_value_record)
+ }
+ _ => None,
+ }
+ }
+}
+
/// A [Record] that has been decoded to a more usable form.
///
/// Some records can be understand raw, but others need to have strings decoded
}
}
-/// An 8-byte raw string whose type and encoding are unknown.
-#[derive(Copy, Clone)]
-pub struct RawStrArray<const N: usize>(
- /// Content.
- pub [u8; N],
-);
-
-impl<const N: usize> From<[u8; N]> for RawStrArray<N> {
- fn from(source: [u8; N]) -> Self {
- Self(source)
- }
-}
-
-impl<const N: usize> Debug for RawStrArray<N> {
- fn fmt(&self, f: &mut Formatter) -> FmtResult {
- write!(f, "{:?}", ByteStr(&self.0))
- }
-}
-
-impl<const N: usize> Serialize for RawStrArray<N> {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- ByteStr(&self.0).serialize(serializer)
- }
-}
-
fn skip_bytes<R: Read>(r: &mut R, mut n: usize) -> Result<(), IoError> {
thread_local! {
static BUF: RefCell<[u8; 256]> = RefCell::new([0u8; 256]);
};
use crate::{
- data::{ByteString, Datum},
+ data::{ByteStrArray, ByteString, Datum},
dictionary::{
Alignment, Attributes, CategoryLabels, Measure, MissingValueRange, MissingValues,
MissingValuesError, VarType, VarWidth,
sys::{
raw::{
read_bytes, read_string, read_vec, Decoder, Error, ErrorDetails, Magic, RawDatum,
- RawStrArray, RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails,
+ RawWidth, Record, UntypedDatum, VarTypes, Warning, WarningDetails,
},
serialize_endian, ProductVersion,
},
}
/// One line in a document.
-pub type RawDocumentLine = RawStrArray<DOC_LINE_LEN>;
+pub type RawDocumentLine = ByteStrArray<DOC_LINE_LEN>;
/// Length of a line in a document. Document lines are fixed-length and
/// padded on the right with spaces.
let offsets = start_offset..start_offset.saturating_add((n * DOC_LINE_LEN) as u64);
let mut lines = Vec::with_capacity(n);
for _ in 0..n {
- lines.push(RawStrArray(
+ lines.push(ByteStrArray(
read_bytes(r).map_err(|e| Error::new(Some(offsets.clone()), e.into()))?,
));
}
pub var_name: N,
/// Missing values.
- pub missing_values: Vec<RawStrArray<8>>,
+ pub missing_values: Vec<ByteStrArray<8>>,
}
impl LongStringMissingValues<ByteString> {
}
let value: [u8; 8] = read_bytes(&mut input)?;
- missing_values.push(RawStrArray(value));
+ missing_values.push(ByteStrArray(value));
}
missing_value_set.push(LongStringMissingValues {
var_name,
}
}
- pub(super) fn read<R: Read + Seek>(
+ pub fn read<R: Read + Seek>(
r: &mut R,
endian: Endian,
var_types: &VarTypes,
use binrw::{BinRead, Endian};
use encoding_rs::UTF_8;
use itertools::Itertools;
+ use unicase::UniCase;
use crate::{
- data::{ByteString, Datum},
+ data::{ByteString, Datum, RawString},
dictionary::{
- CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet, Dictionary,
- MissingValueRange, MultipleResponseType, VarWidth, Variable,
+ Alignment, CategoryLabels, DictIndexMultipleResponseSet, DictIndexVariableSet,
+ Dictionary, Measure, MissingValueRange, MultipleResponseType, VarWidth, Variable,
},
identifier::{ByIdentifier, Identifier},
sys::{
raw::{
- records::{DocumentRecord, RawHeader, RawVariableRecord, VariableRecord},
- Decoder,
+ records::{
+ DocumentRecord, Extension, RawHeader, RawVariableRecord, VariableRecord,
+ },
+ Decoder, VarTypes,
},
write::DictionaryWriter,
ReadOptions, WriteOptions,
],
None,
),
+ (
+ VarWidth::String(10),
+ vec![
+ Datum::String(ByteString::from("abcdeasd")),
+ Datum::String(ByteString::from("qwioejdf")),
+ ],
+ None,
+ ),
+ (
+ VarWidth::String(11),
+ vec![Datum::String(ByteString::from("abcdeasd"))],
+ None,
+ ),
];
for (width, values, range) in test_cases {
}
dictionary.add_var(variable).unwrap();
- let mut raw = Vec::new();
+ // Write and check variable records.
+ let mut raw_variables = Vec::new();
DictionaryWriter::new(
&WriteOptions::reproducible(None),
- &mut Cursor::new(&mut raw),
+ &mut Cursor::new(&mut raw_variables),
&dictionary,
)
.write_variables()
.unwrap();
- let mut cursor = Cursor::new(&raw[4..]);
+ let mut cursor = Cursor::new(&raw_variables[4..]);
let record =
VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
if !width.is_long_string() {
assert_eq!(&record.missing_values.values, &vec![]);
}
assert_eq!(&record.missing_values.range, &range);
+
+ // Write and check long string missing value record.
+ let mut raw_long_missing = Vec::new();
+ DictionaryWriter::new(
+ &WriteOptions::reproducible(None),
+ &mut Cursor::new(&mut raw_long_missing),
+ &dictionary,
+ )
+ .write_long_string_missing_values()
+ .unwrap();
+
+ if width.is_long_string() {
+ let mut cursor = Cursor::new(&raw_long_missing[4..]);
+ let record = Extension::read(
+ &mut cursor,
+ Endian::Little,
+ &VarTypes::new(),
+ &mut |_| panic!(),
+ )
+ .unwrap()
+ .unwrap()
+ .as_long_string_missing_values()
+ .unwrap()
+ .clone()
+ .decode(&mut Decoder::new(UTF_8, |_| panic!()));
+
+ assert_eq!(record.values.len(), 1);
+ assert_eq!(&record.values[0].var_name.0, &UniCase::new("var"));
+ let actual = record.values[0]
+ .missing_values
+ .iter()
+ .map(|v| v.raw_string_bytes());
+ let expected = values
+ .iter()
+ .map(|v| v.as_string().unwrap().raw_string_bytes());
+ for (actual, expected) in actual.zip_eq(expected) {
+ assert_eq!(actual, expected);
+ }
+ } else {
+ assert_eq!(raw_long_missing.len(), 0);
+ }
}
}
"
);
}
+
+ #[test]
+ fn variable_display_parameters() {
+ let variables = [
+ (None, Alignment::Left, 10),
+ (Some(Measure::Nominal), Alignment::Right, 12),
+ (Some(Measure::Ordinal), Alignment::Center, 14),
+ (Some(Measure::Scale), Alignment::Right, 16),
+ ];
+ let mut expected = Dictionary::new(UTF_8);
+ for (index, (measure, alignment, display_width)) in variables.into_iter().enumerate() {
+ let mut variable = Variable::new(
+ Identifier::new(format!("v{index}")).unwrap(),
+ VarWidth::Numeric,
+ UTF_8,
+ );
+ variable.measure = measure;
+ variable.alignment = alignment;
+ variable.display_width = display_width;
+ expected.add_var(variable).unwrap();
+ }
+
+ let raw = WriteOptions::new()
+ .write_writer(&expected, Cursor::new(Vec::new()))
+ .unwrap()
+ .finish()
+ .unwrap()
+ .unwrap()
+ .into_inner();
+ let actual = ReadOptions::new(|_| panic!())
+ .open_reader(Cursor::new(raw))
+ .unwrap()
+ .dictionary;
+
+ fn display_parameters(
+ dictionary: &Dictionary,
+ ) -> impl Iterator<Item = (Option<Measure>, Alignment, u32)> {
+ dictionary
+ .variables
+ .iter()
+ .map(|variable| (variable.measure, variable.alignment, variable.display_width))
+ }
+ assert!(display_parameters(&expected).eq(display_parameters(&actual)));
+ }
+
+ #[test]
+ fn long_variable_names() {
+ let long_name = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789@$";
+
+ let mut expected = Dictionary::new(UTF_8);
+ for name in (1..=64).map(|len| long_name[..len].to_string()) {
+ expected
+ .add_var(Variable::new(
+ Identifier::new(name).unwrap(),
+ VarWidth::Numeric,
+ UTF_8,
+ ))
+ .unwrap();
+ }
+
+ let raw = WriteOptions::new()
+ .write_writer(&expected, Cursor::new(Vec::new()))
+ .unwrap()
+ .finish()
+ .unwrap()
+ .unwrap()
+ .into_inner();
+ let actual = ReadOptions::new(|_| panic!())
+ .open_reader(Cursor::new(raw))
+ .unwrap()
+ .dictionary;
+
+ fn names(dictionary: &Dictionary) -> impl Iterator<Item = &Identifier> {
+ dictionary.variables.iter().map(|variable| &variable.name)
+ }
+ assert!(names(&expected).eq(names(&actual)));
+ }
}