/// string variables is allocated.
pub const SEGMENT_SIZE: usize = 252;
- /// Returns the number of "segments" used for writing case data for a
+ /// Returns an iterator over the "segments" used for writing case data for a
/// variable with this width. A segment is a physical variable in the
/// system file that represents some piece of a logical variable as seen by
- /// a PSPP user. Only very long string variables have more than one
- /// segment.
- pub fn n_segments(&self) -> usize {
- if self.is_very_long_string() {
- self.as_string_width().unwrap().div_ceil(Self::SEGMENT_SIZE)
- } else {
- 1
- }
+ /// a PSPP user. Most variables have one segment whose width is their own
+ /// width, but very long string variables, with width greater than 255, have
+ /// multiple segments each with width 255 or less.
+ pub fn segments(&self) -> Segments {
+ Segments::new(*self)
}
/// Returns the number of 8-byte chunks used for writing case data for a
/// the system file that represents some piece of a logical variable as seen
/// by a PSPP user.
pub fn segment_alloc_width(&self, segment_idx: usize) -> usize {
- debug_assert!(segment_idx < self.n_segments());
+ debug_assert!(segment_idx < self.segments().len());
debug_assert!(self.is_very_long_string());
- if segment_idx < self.n_segments() - 1 {
+ if segment_idx < self.segments().len() - 1 {
255
} else {
self.as_string_width().unwrap() - segment_idx * Self::SEGMENT_SIZE
}
}
+pub struct Segments {
+ width: VarWidth,
+ i: usize,
+ n: usize,
+}
+impl Segments {
+ pub fn new(width: VarWidth) -> Self {
+ Self {
+ width,
+ i: 0,
+ n: if width.is_very_long_string() {
+ width
+ .as_string_width()
+ .unwrap()
+ .div_ceil(VarWidth::SEGMENT_SIZE)
+ } else {
+ 1
+ },
+ }
+ }
+}
+
+impl Iterator for Segments {
+ type Item = VarWidth;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let i = self.i;
+ if i >= self.n {
+ None
+ } else {
+ self.i += 1;
+ match self.width {
+ VarWidth::Numeric => Some(VarWidth::Numeric),
+ VarWidth::String(_) if i < self.n - 1 => Some(VarWidth::String(255)),
+ VarWidth::String(width) => Some(VarWidth::String(
+ width - (self.n as u16 - 1) * VarWidth::SEGMENT_SIZE as u16,
+ )),
+ }
+ }
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let n = self.n - self.i;
+ (n, Some(n))
+ }
+}
+
+impl ExactSizeIterator for Segments {}
+
impl From<VarWidth> for VarType {
fn from(source: VarWidth) -> Self {
match source {
.variables
.iter()
.map(|variable| {
- let n = variable.width.n_segments();
+ let n = variable.width.segments().len();
let mut names = SmallVec::with_capacity(n);
if self.encoding.encode(variable.name.as_str()).0.len() <= 8 {
used_names.insert(variable.name.clone());
/// Output format used on the `WRITE` command.
pub write_format: Format,
- /// Value labels, to associate a number (or a string) with a more meaningful
- /// description, e.g. 1 -> Apple, 2 -> Banana, ...
+ /// Value labels.
pub value_labels: ValueLabels,
/// Variable label, an optional meaningful description for the variable
}
}
+/// Associates values of a variable with meaningful labels.
+///
+/// For example, 1 => strongly disagree, 2 => disagree, 3 => neither agree nor
+/// disagree, ...
#[derive(Clone, Default, PartialEq, Eq, Serialize)]
pub struct ValueLabels(pub HashMap<Datum<ByteString>, String>);
self.0.is_empty()
}
- pub fn get(&self, datum: &Datum<ByteString>) -> Option<&str> {
- self.0.get(datum).map(|s| s.as_str())
+ pub fn get(&self, value: &Datum<ByteString>) -> Option<&str> {
+ self.0.get(value).map(|s| s.as_str())
}
- pub fn insert(&mut self, datum: Datum<ByteString>, label: String) -> Option<String> {
- self.0.insert(datum, label)
+ pub fn insert(&mut self, value: Datum<ByteString>, label: String) -> Option<String> {
+ self.0.insert(value, label)
}
pub fn is_resizable(&self, width: VarWidth) -> bool {
continue;
};
let width = VarWidth::String(record.length);
- let n_segments = width.n_segments();
+ let n_segments = width.segments().len();
if n_segments == 1 {
warn(Error::ShortVeryLongString {
short_name: record.short_name.clone(),
{
let rec_type: u32 = endian.parse(read_bytes(reader)?);
match rec_type {
- 2 => Ok(Some(Record::Variable(VariableRecord::read(reader, endian, warn)?))),
- 3 => Ok(ValueLabelRecord::read(reader, endian, var_types, warn)?),
- 6 => Ok(Some(DocumentRecord::read(reader, endian)?)),
+ 2 => Ok(Some(Record::Variable(VariableRecord::read(
+ reader, endian, warn,
+ )?))),
+ 3 => Ok(
+ ValueLabelRecord::read(reader, endian, var_types, warn)?.map(Record::ValueLabel)
+ ),
+ 6 => Ok(Some(Record::Document(DocumentRecord::read(
+ reader, endian,
+ )?))),
7 => Extension::read(reader, endian, var_types, warn),
999 => Ok(Some(Record::EndOfHeaders(
endian.parse(read_bytes(reader)?),
}
};
match record {
- Record::Variable(VariableRecord { width, .. }) => self.0.var_types.push(width),
+ Record::Variable(VariableRecord { width, .. }) => {
+ if let Ok(width) = width.try_into() {
+ self.0.var_types.push(width)
+ }
+ }
Record::EndOfHeaders(_) => {
self.0.state = if let Some(Compression::ZLib) = self.0.header.compression {
ReaderState::ZlibHeader
}
#[derive(Default)]
-struct VarTypes {
+pub struct VarTypes {
types: Vec<Option<VarWidth>>,
}
Self::default()
}
- pub fn push(&mut self, width: RawWidth) {
- if let Ok(var_width) = VarWidth::try_from(width) {
- self.types.push(Some(var_width));
- for _ in 1..width.n_values().unwrap() {
- self.types.push(None);
- }
+ pub fn push(&mut self, width: VarWidth) {
+ self.types.push(Some(width));
+ for _ in 1..width.n_chunks().unwrap() {
+ self.types.push(None);
}
}
}
impl ValueLabelRecord<RawDatum, ByteString> {
- pub(super) fn read<R: Read + Seek>(
+ pub fn read<R: Read + Seek>(
r: &mut R,
endian: Endian,
var_types: &VarTypes,
warn: &mut dyn FnMut(Warning),
- ) -> Result<Option<Record>, Error> {
+ ) -> Result<Option<Self>, Error> {
let label_offset = r.stream_position()?;
let n: u32 = endian.parse(read_bytes(r)?);
if n > Self::MAX_LABELS {
.collect();
let end_offset = r.stream_position()?;
- Ok(Some(Record::ValueLabel(ValueLabelRecord {
+ Ok(Some(ValueLabelRecord {
offsets: label_offset..end_offset,
labels,
dict_indexes,
var_type,
- })))
+ }))
}
/// Decodes a value label record using `decoder`.
pub const MAX_LINES: usize = i32::MAX as usize / DOC_LINE_LEN;
/// Reads a document record from `r`.
- pub fn read<R>(r: &mut R, endian: Endian) -> Result<Record, Error>
+ pub fn read<R>(r: &mut R, endian: Endian) -> Result<Self, Error>
where
R: Read + Seek,
{
read_bytes(r).map_err(|e| Error::new(Some(offsets.clone()), e.into()))?,
));
}
- Ok(Record::Document(DocumentRecord { offsets, lines }))
+ Ok(DocumentRecord { offsets, lines })
}
}
.iter()
.zip(self.short_names.iter())
{
- let mut segment_widths = SegmentWidths::new(variable.width);
+ let mut segments = variable.width.segments();
let mut short_names = short_names.iter();
- let seg0_width = segment_widths.next().unwrap();
+ let seg0_width = segments.next().unwrap();
let name0 = short_names.next().unwrap();
let record = RawVariableRecord {
width: seg0_width.as_string_width().unwrap_or(0) as i32,
write_variable_continuation_records(&mut self.writer, seg0_width)?;
// Write additional segments for very long string variables.
- for (width, name) in segment_widths.zip(short_names) {
+ for (width, name) in segments.zip(short_names) {
let format: RawFormat = Format::default_for_width(width).try_into().unwrap();
(
2u32,
.or_default()
.push(index as u32);
}
- index += SegmentWidths::new(variable.width)
+ index += variable
+ .width
+ .segments()
.map(|w| w.n_chunks().unwrap())
.sum::<usize>();
}
Alignment::Right => 1,
Alignment::Center => 2,
};
- for (index, segment) in SegmentWidths::new(variable.width).enumerate() {
+ for (index, segment) in variable.width.segments().enumerate() {
let display_width = match index {
0 => variable.display_width,
_ => segment.default_display_width(),
padding_bytes: usize,
}
-struct SegmentWidths {
- width: VarWidth,
- i: usize,
- n: usize,
-}
-impl SegmentWidths {
- pub fn new(width: VarWidth) -> Self {
- Self {
- width,
- i: 0,
- n: width.n_segments(),
- }
- }
-}
-
-impl Iterator for SegmentWidths {
- type Item = VarWidth;
-
- fn next(&mut self) -> Option<Self::Item> {
- let i = self.i;
- if i >= self.n {
- None
- } else {
- self.i += 1;
- match self.width {
- VarWidth::Numeric => Some(VarWidth::Numeric),
- VarWidth::String(_) if i < self.n - 1 => Some(VarWidth::String(255)),
- VarWidth::String(width) => Some(VarWidth::String(
- width - (self.n as u16 - 1) * VarWidth::SEGMENT_SIZE as u16,
- )),
- }
- }
- }
-}
-
enum CaseVar {
Numeric,
String(SmallVec<[StringSegment; 1]>),
VarWidth::String(w) => {
let mut encoding = SmallVec::<[StringSegment; 1]>::new();
let mut remaining = w as usize;
- for segment in SegmentWidths::new(width) {
+ for segment in width.segments() {
let segment = segment.as_string_width().unwrap().next_multiple_of(8);
let data_bytes = remaining.min(segment).min(255);
let padding_bytes = segment - data_bytes;
dictionary::{Dictionary, MissingValueRange, VarWidth, Variable},
identifier::Identifier,
sys::{
- raw::records::{RawHeader, RawVariableRecord, VariableRecord},
+ raw::records::{RawHeader, RawVariableRecord, ValueLabelRecord, VariableRecord},
write::DictionaryWriter,
WriteOptions,
},
)
.write_variables()
.unwrap();
- println!("{}", HexView::new(&raw));
let mut cursor = Cursor::new(&raw[4..]);
let record =
VariableRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
- dbg!(&record);
if !width.is_long_string() {
assert_eq!(&record.missing_values.values, &values);
} else {
assert_eq!(&record.missing_values.range, &range);
}
}
+
+ /// Checks that value labels are written correctly.
+ #[test]
+ fn variables_value_labels() {
+ let test_cases = [(
+ VarWidth::Numeric,
+ 1,
+ vec![(Datum::Number(Some(1.0)), "One")],
+ )];
+
+ for (width, n_chunks, value_labels) in test_cases {
+ let mut dictionary = Dictionary::new(UTF_8);
+ let mut variable = Variable::new(Identifier::new("var").unwrap(), width, UTF_8);
+ for (value, label) in &value_labels {
+ variable
+ .value_labels
+ .insert(value.clone(), (*label).into())
+ .unwrap();
+ }
+ dictionary.add_var(variable).unwrap();
+
+ let mut raw = Vec::new();
+ DictionaryWriter::new(
+ &WriteOptions::reproducible(None),
+ &mut Cursor::new(&mut raw),
+ &dictionary,
+ )
+ .write_value_labels()
+ .unwrap();
+ println!("{}", HexView::new(&raw));
+
+ let mut cursor = Cursor::new(&raw[4..]);
+ //let record =
+ //ValueLabelRecord::read(&mut cursor, Endian::Little, &mut |_| panic!()).unwrap();
+ //dbg!(&record);
+ }
+ }
}