minor cleanup
authorBen Pfaff <blp@cs.stanford.edu>
Mon, 7 Jul 2025 00:08:36 +0000 (17:08 -0700)
committerBen Pfaff <blp@cs.stanford.edu>
Mon, 7 Jul 2025 00:08:57 +0000 (17:08 -0700)
rust/pspp/src/main.rs
rust/pspp/src/output/cairo/fsm.rs
rust/pspp/src/output/cairo/pager.rs
rust/pspp/src/sys/cooked.rs
rust/pspp/src/sys/raw.rs
rust/pspp/src/sys/test.rs

index 57ca1bb260d03afc514a9f3818232ee586e17741..b8fffb3c64ad9372c6a1a9e4de66767304f8a25c 100644 (file)
@@ -17,7 +17,7 @@
 use anyhow::Result;
 use clap::{Args, Parser, Subcommand, ValueEnum};
 use encoding_rs::Encoding;
-use pspp::sys::cooked::{decode, Error, Headers};
+use pspp::sys::cooked::{Error, Headers};
 use pspp::sys::raw::{encoding_from_headers, Decoder, Magic, Reader, Record, Warning};
 use std::fs::File;
 use std::io::{stdout, BufReader, Write};
@@ -72,20 +72,17 @@ impl Convert {
     }
 
     fn run(self) -> Result<()> {
-        let input = BufReader::new(File::open(&self.input)?);
-        let mut reader = Reader::new(input, Self::warn)?;
+        let mut reader = Reader::new(BufReader::new(File::open(&self.input)?), Self::warn)?;
         let headers = reader.headers().collect::<Result<Vec<_>, _>>()?;
-        let cases = reader.cases();
         let encoding = encoding_from_headers(&headers, &mut |w| Self::warn(w))?;
         let mut decoder = Decoder::new(encoding, |w| Self::warn(w));
         let mut decoded_records = Vec::new();
         for header in headers {
             decoded_records.push(header.decode(&mut decoder)?);
         }
-        drop(decoder);
-
         let headers = Headers::new(decoded_records, &mut |e| Self::err(e))?;
-        let (dictionary, metadata, cases) = decode(headers, cases, encoding, |e| Self::err(e))?;
+        let (dictionary, _metadata, cases) =
+            headers.decode(reader.cases(), encoding, |e| Self::err(e))?;
         let writer = match self.output {
             Some(path) => Box::new(File::create(path)?) as Box<dyn Write>,
             None => Box::new(stdout()),
@@ -245,7 +242,7 @@ fn dissect(
             }
             let headers = Headers::new(decoded_records, &mut |e| eprintln!("{e}"))?;
             let (dictionary, metadata, _cases) =
-                decode(headers, None, encoding, |e| eprintln!("{e}"))?;
+                headers.decode(None, encoding, |e| eprintln!("{e}"))?;
             println!("{dictionary:#?}");
             println!("{metadata:#?}");
         }
index e35b7c95b165fcef6ffb50dc58daecdec47c4617..7295347908018e61ef614ae43da161c44cd6b3f2 100644 (file)
@@ -532,7 +532,7 @@ impl Device for CairoDevice<'_> {
         self.layout_cell(cell, bb, &Rect2::default()).y() + margin(cell, Axis2::Y)
     }
 
-    fn adjust_break(&self, cell: &Content, size: Coord2) -> usize {
+    fn adjust_break(&self, _cell: &Content, _size: Coord2) -> usize {
         todo!()
     }
 
index e26a99d46fd74ea7fce9774f4e67998928496e46..2a53c2e3cd9264591af3b386ef0ef84909e6d8e1 100644 (file)
@@ -192,7 +192,7 @@ fn render_heading(
     context: &Context,
     font: &FontDescription,
     heading: &Heading,
-    page_number: i32,
+    _page_number: i32,
     width: usize,
     base_y: usize,
     font_resolution: f64,
index c513c20934142b4b867cd6cb862ae02d3ec50018..36c3e80b9983b3b01551329cc394a44208f0ed16 100644 (file)
@@ -454,6 +454,529 @@ impl Headers {
             z_trailer: take_first(z_trailer, "z_trailer", warn),
         })
     }
+
+    pub fn decode(
+        mut self,
+        mut cases: Option<Cases>,
+        encoding: &'static Encoding,
+        mut warn: impl FnMut(Error),
+    ) -> Result<(Dictionary, Metadata, Option<Cases>), Error> {
+        let mut dictionary = Dictionary::new(encoding);
+
+        let file_label = fix_line_ends(self.header.file_label.trim_end_matches(' '));
+        if !file_label.is_empty() {
+            dictionary.file_label = Some(file_label);
+        }
+
+        for mut attributes in self.file_attributes.drain(..) {
+            dictionary.attributes.append(&mut attributes.0)
+        }
+
+        // Concatenate all the document records (really there should only be one)
+        // and trim off the trailing spaces that pad them to 80 bytes.
+        dictionary.documents = self
+            .document
+            .drain(..)
+            .flat_map(|record| record.lines)
+            .map(trim_end_spaces)
+            .collect();
+
+        if let Some(integer_info) = &self.integer_info {
+            let floating_point_rep = integer_info.floating_point_rep;
+            if floating_point_rep != 1 {
+                warn(Error::UnexpectedFloatFormat(floating_point_rep))
+            }
+
+            let expected = match self.header.endian {
+                Endian::Big => 1,
+                Endian::Little => 2,
+            };
+            let actual = integer_info.endianness;
+            if actual != expected {
+                warn(Error::UnexpectedEndianess { actual, expected });
+            }
+        };
+
+        if let Some(float_info) = &self.float_info {
+            for (expected, expected2, actual, name) in [
+                (f64::MIN, None, float_info.sysmis, "SYSMIS"),
+                (f64::MAX, None, float_info.highest, "HIGHEST"),
+                (
+                    f64::MIN,
+                    Some(f64::MIN.next_up()),
+                    float_info.lowest,
+                    "LOWEST",
+                ),
+            ] {
+                if actual != expected && expected2.is_none_or(|expected2| expected2 != actual) {
+                    warn(Error::UnexpectedFloatValue {
+                        expected,
+                        actual,
+                        name,
+                    });
+                }
+            }
+        }
+
+        if let Some(nominal_case_size) = self.header.nominal_case_size {
+            let n_vars = self.variable.len();
+            if n_vars != nominal_case_size as usize
+                && self
+                    .integer_info
+                    .as_ref()
+                    .is_none_or(|info| info.version.0 != 13)
+            {
+                warn(Error::WrongVariablePositions {
+                    actual: n_vars,
+                    expected: nominal_case_size as usize,
+                });
+            }
+        }
+
+        let mut decoder = Decoder {
+            encoding,
+            n_generated_names: 0,
+        };
+
+        let mut var_index_map = BTreeMap::new();
+        let mut value_index = 0;
+        for (index, input) in self
+            .variable
+            .iter()
+            .enumerate()
+            .filter(|(_index, record)| record.width != RawWidth::Continuation)
+        {
+            let name = trim_end_spaces(input.name.to_string());
+            let name = match Identifier::from_encoding(name, encoding)
+                .and_then(Identifier::must_be_ordinary)
+            {
+                Ok(name) => {
+                    if !dictionary.variables.contains(&name.0) {
+                        name
+                    } else {
+                        let new_name = decoder.generate_name(&dictionary);
+                        warn(Error::DuplicateVariableName {
+                            duplicate_name: name.clone(),
+                            new_name: new_name.clone(),
+                        });
+                        new_name
+                    }
+                }
+                Err(id_error) => {
+                    let new_name = decoder.generate_name(&dictionary);
+                    warn(Error::InvalidVariableName {
+                        id_error,
+                        new_name: new_name.clone(),
+                    });
+                    new_name
+                }
+            };
+            let mut variable = Variable::new(
+                name.clone(),
+                VarWidth::try_from(input.width).unwrap(),
+                encoding,
+            );
+
+            // Set the short name the same as the long name (even if we renamed it).
+            variable.short_names = vec![name];
+
+            variable.label = input.label.clone();
+
+            variable.missing_values = input.missing_values.clone();
+
+            variable.print_format = decode_format(
+                input.print_format,
+                variable.width,
+                |new_spec, format_error| {
+                    warn(Error::InvalidPrintFormat {
+                        new_spec,
+                        variable: variable.name.clone(),
+                        format_error,
+                    })
+                },
+            );
+            variable.write_format = decode_format(
+                input.write_format,
+                variable.width,
+                |new_spec, format_error| {
+                    warn(Error::InvalidWriteFormat {
+                        new_spec,
+                        variable: variable.name.clone(),
+                        format_error,
+                    })
+                },
+            );
+
+            // Check for long string continuation records.
+            let n_values = input.width.n_values().unwrap();
+            for offset in 1..n_values {
+                if self
+                    .variable
+                    .get(index + offset)
+                    .is_none_or(|record| record.width != RawWidth::Continuation)
+                {
+                    warn(Error::MissingLongStringContinuation {
+                        width: input.width,
+                        start_index: index,
+                        end_index: index + n_values - 1,
+                        error_index: index + offset,
+                    });
+                    break;
+                }
+            }
+
+            let dict_index = dictionary.add_var(variable).unwrap();
+            assert_eq!(var_index_map.insert(value_index, dict_index), None);
+            value_index += n_values;
+        }
+
+        if let Some(weight_index) = self.header.weight_index {
+            let index = weight_index as usize - 1;
+            if index >= value_index {
+                warn(Error::WeightIndexOutOfRange {
+                    index: weight_index,
+                    max_index: var_index_map.len(),
+                });
+            } else {
+                let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
+                let variable = &dictionary.variables[*dict_index];
+                if *var_index == index {
+                    if variable.is_numeric() {
+                        dictionary.weight = Some(*dict_index);
+                    } else {
+                        warn(Error::InvalidWeightVar {
+                            index: weight_index,
+                            name: variable.name.clone(),
+                        });
+                    }
+                } else {
+                    warn(Error::WeightIndexStringContinuation {
+                        index: weight_index,
+                        name: variable.name.clone(),
+                    });
+                }
+            }
+        }
+
+        for record in self.value_label.drain(..) {
+            let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
+            let mut long_string_variables = Vec::new();
+            for value_index in record.dict_indexes.iter() {
+                let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
+                    unreachable!()
+                };
+                let variable = &dictionary.variables[*dict_index];
+                if variable.width.is_long_string() {
+                    long_string_variables.push(variable.name.clone());
+                } else {
+                    dict_indexes.push(*dict_index);
+                }
+            }
+            if !long_string_variables.is_empty() {
+                warn(Error::InvalidLongStringValueLabels {
+                    offsets: record.offsets.clone(),
+                    variables: long_string_variables,
+                });
+            }
+
+            let written_by_readstat = self.header.eye_catcher.contains("ReadStat");
+            for dict_index in dict_indexes {
+                let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
+                let mut duplicates = Vec::new();
+                for ValueLabel {
+                    datum: value,
+                    label,
+                } in record.labels.iter().cloned()
+                {
+                    let datum = value.decode(variable.width);
+                    if variable.value_labels.insert(datum, label).is_some() {
+                        duplicates.push(value);
+                    }
+                }
+                if written_by_readstat {
+                    // Ignore any possible duplicates.  ReadStat is buggy and emits
+                    // value labels whose values are longer than string variables'
+                    // widths, that are identical in the actual width of the
+                    // variable, e.g. both values "ABC123" and "ABC456" for a string
+                    // variable with width 3.
+                } else if !duplicates.is_empty() {
+                    warn(Error::DuplicateValueLabels {
+                        variable: variable.name.clone(),
+                        values: duplicates
+                            .iter()
+                            .map(|value| {
+                                value
+                                    .decode(variable.width)
+                                    .display(variable.print_format, variable.encoding)
+                                    .with_trimming()
+                                    .with_quoted_string()
+                                    .to_string()
+                            })
+                            .collect(),
+                    });
+                }
+            }
+        }
+
+        if let Some(display) = &self.var_display {
+            for (index, display) in display.0.iter().enumerate() {
+                if let Some(variable) = dictionary.variables.get_index_mut2(index) {
+                    if let Some(width) = display.width {
+                        variable.display_width = width;
+                    }
+                    if let Some(alignment) = display.alignment {
+                        variable.alignment = alignment;
+                    }
+                    if let Some(measure) = display.measure {
+                        variable.measure = Some(measure);
+                    }
+                } else {
+                    warn(dbg!(Error::TBD));
+                }
+            }
+        }
+
+        for record in self
+            .multiple_response
+            .iter()
+            .flat_map(|record| record.0.iter())
+        {
+            match MultipleResponseSet::decode(&dictionary, record, &mut warn) {
+                Ok(mrset) => {
+                    dictionary.mrsets.insert(ByIdentifier::new(mrset));
+                }
+                Err(error) => warn(error),
+            }
+        }
+
+        if !self.very_long_strings.is_empty() {
+            'outer: for record in self
+                .very_long_strings
+                .drain(..)
+                .flat_map(|record| record.0.into_iter())
+            {
+                let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
+                    warn(dbg!(Error::TBD));
+                    continue;
+                };
+                let width = VarWidth::String(record.length);
+                let n_segments = width.n_segments();
+                if n_segments == 1 {
+                    warn(dbg!(Error::ShortVeryLongString {
+                        short_name: record.short_name.clone(),
+                        width: record.length
+                    }));
+                    continue;
+                }
+                if index + n_segments > dictionary.variables.len() {
+                    warn(dbg!(Error::VeryLongStringOverflow {
+                        short_name: record.short_name.clone(),
+                        width: record.length,
+                        index,
+                        n_segments,
+                        len: dictionary.variables.len()
+                    }));
+                    continue;
+                }
+                let mut short_names = Vec::with_capacity(n_segments);
+                for i in 0..n_segments {
+                    let alloc_width = width.segment_alloc_width(i);
+                    let segment = &dictionary.variables[index + i];
+                    short_names.push(segment.short_names[0].clone());
+                    let segment_width = segment.width.as_string_width().unwrap_or(0);
+                    if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
+                        warn(Error::VeryLongStringInvalidSegmentWidth {
+                            short_name: record.short_name.clone(),
+                            width: record.length,
+                            index: i,
+                            actual: segment_width,
+                            expected: alloc_width,
+                        });
+                        continue 'outer;
+                    }
+                }
+                dictionary.delete_vars(index + 1..index + n_segments);
+                let variable = dictionary.variables.get_index_mut2(index).unwrap();
+                variable.short_names = short_names;
+                variable.resize(width);
+            }
+            cases = cases
+                .take()
+                .map(|cases| cases.with_widths(dictionary.variables.iter().map(|var| var.width)));
+        }
+
+        if self.long_names.is_empty() {
+            // There are no long variable names.  Use the short variable names,
+            // converted to lowercase, as the long variable names.
+            for index in 0..dictionary.variables.len() {
+                let lower = dictionary.variables[index].name.0.as_ref().to_lowercase();
+                if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) {
+                    let _ = dictionary.try_rename_var(index, new_name);
+                }
+            }
+        } else {
+            // Rename each of the variables, one by one.  (In a correctly
+            // constructed system file, this cannot create any intermediate
+            // duplicate variable names, because all of the new variable names are
+            // longer than any of the old variable names and thus there cannot be
+            // any overlaps.)
+            for renaming in self
+                .long_names
+                .iter()
+                .flat_map(|record| record.0.iter().cloned())
+            {
+                let LongName {
+                    short_name,
+                    long_name,
+                } = renaming;
+                if let Some(index) = dictionary.variables.get_index_of(&short_name.0) {
+                    if let Err(long_name) = dictionary.try_rename_var(index, long_name) {
+                        warn(Error::DuplicateLongName(long_name));
+                    }
+                    dictionary
+                        .variables
+                        .get_index_mut2(index)
+                        .unwrap()
+                        .short_names = vec![short_name];
+                } else {
+                    warn(dbg!(Error::TBD));
+                }
+            }
+        }
+
+        for mut attr_set in self
+            .variable_attributes
+            .drain(..)
+            .flat_map(|record| record.0.into_iter())
+        {
+            if let Some((_, variable)) = dictionary
+                .variables
+                .get_full_mut2(&attr_set.long_var_name.0)
+            {
+                variable.attributes.append(&mut attr_set.attributes);
+            } else {
+                warn(dbg!(Error::TBD));
+            }
+        }
+
+        // Assign variable roles.
+        for index in 0..dictionary.variables.len() {
+            let variable = dictionary.variables.get_index_mut2(index).unwrap();
+            match variable.attributes.role() {
+                Ok(Some(role)) => variable.role = role,
+                Ok(None) => (),
+                Err(error) => warn(Error::InvalidRole(error)),
+            }
+        }
+
+        // Long string value labels.
+        for record in self
+            .long_string_value_labels
+            .drain(..)
+            .flat_map(|record| record.0.into_iter())
+        {
+            let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+                warn(Error::UnknownLongStringValueLabelVariable(
+                    record.var_name.clone(),
+                ));
+                continue;
+            };
+            let Some(width) = variable.width.as_string_width() else {
+                warn(Error::LongStringValueLabelNumericVariable(
+                    record.var_name.clone(),
+                ));
+                continue;
+            };
+            for (mut value, label) in record.labels.into_iter() {
+                // XXX warn about too-long value?
+                value.0.resize(width, b' ');
+                // XXX warn abouat duplicate value labels?
+                variable.value_labels.insert(Datum::String(value), label);
+            }
+        }
+
+        for mut record in self
+            .long_string_missing_values
+            .drain(..)
+            .flat_map(|record| record.0.into_iter())
+        {
+            let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
+                warn(Error::LongStringMissingValueUnknownVariable {
+                    name: record.var_name.clone(),
+                });
+                continue;
+            };
+            if !variable.width.is_long_string() {
+                warn(Error::LongStringMissingValueBadWdith {
+                    name: record.var_name.clone(),
+                    width: variable.width,
+                });
+                continue;
+            }
+            if record.missing_values.len() > 3 {
+                warn(Error::LongStringMissingValueInvalidCount {
+                    name: record.var_name.clone(),
+                    count: record.missing_values.len(),
+                });
+                record.missing_values.truncate(3);
+            }
+            let values = record
+                .missing_values
+                .into_iter()
+                .map(|v| {
+                    let mut value = RawString::from(v.0.as_slice());
+                    value.resize(variable.width.as_string_width().unwrap());
+                    Datum::String(value)
+                })
+                .collect::<Vec<_>>();
+            match MissingValues::new(values, None) {
+                Ok(missing_values) => variable.missing_values = missing_values,
+                Err(MissingValuesError::TooWide) => warn(dbg!(Error::TBD)),
+                Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => {
+                    unreachable!()
+                }
+            }
+        }
+
+        for record in self
+            .variable_sets
+            .drain(..)
+            .flat_map(|record| record.sets.into_iter())
+        {
+            let mut variables = Vec::with_capacity(record.variable_names.len());
+            for variable_name in record.variable_names {
+                let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0)
+                else {
+                    warn(Error::UnknownVariableSetVariable {
+                        variable_set: record.name.clone(),
+                        variable: variable_name.clone(),
+                    });
+                    continue;
+                };
+                variables.push(dict_index);
+            }
+            let variable_set = VariableSet {
+                name: record.name,
+                variables,
+            };
+            dictionary.variable_sets.push(variable_set);
+        }
+
+        for record in self.other_extension.drain(..) {
+            warn(Error::UnknownExtensionRecord {
+                offset: record.offsets.start,
+                subtype: record.subtype,
+                size: record.size,
+                count: record.count,
+            });
+        }
+
+        let metadata = Metadata::decode(&self, warn);
+        if let Some(n_cases) = metadata.n_cases {
+            cases = cases.take().map(|cases| cases.with_expected_cases(n_cases))
+        }
+        Ok((dictionary, metadata, cases))
+    }
 }
 
 #[derive(Clone, Debug, PartialEq, Eq)]
@@ -580,528 +1103,6 @@ impl Decoder {
     }
 }
 
-pub fn decode(
-    mut headers: Headers,
-    mut cases: Option<Cases>,
-    encoding: &'static Encoding,
-    mut warn: impl FnMut(Error),
-) -> Result<(Dictionary, Metadata, Option<Cases>), Error> {
-    let mut dictionary = Dictionary::new(encoding);
-
-    let file_label = fix_line_ends(headers.header.file_label.trim_end_matches(' '));
-    if !file_label.is_empty() {
-        dictionary.file_label = Some(file_label);
-    }
-
-    for mut attributes in headers.file_attributes.drain(..) {
-        dictionary.attributes.append(&mut attributes.0)
-    }
-
-    // Concatenate all the document records (really there should only be one)
-    // and trim off the trailing spaces that pad them to 80 bytes.
-    dictionary.documents = headers
-        .document
-        .drain(..)
-        .flat_map(|record| record.lines)
-        .map(trim_end_spaces)
-        .collect();
-
-    if let Some(integer_info) = &headers.integer_info {
-        let floating_point_rep = integer_info.floating_point_rep;
-        if floating_point_rep != 1 {
-            warn(Error::UnexpectedFloatFormat(floating_point_rep))
-        }
-
-        let expected = match headers.header.endian {
-            Endian::Big => 1,
-            Endian::Little => 2,
-        };
-        let actual = integer_info.endianness;
-        if actual != expected {
-            warn(Error::UnexpectedEndianess { actual, expected });
-        }
-    };
-
-    if let Some(float_info) = &headers.float_info {
-        for (expected, expected2, actual, name) in [
-            (f64::MIN, None, float_info.sysmis, "SYSMIS"),
-            (f64::MAX, None, float_info.highest, "HIGHEST"),
-            (
-                f64::MIN,
-                Some(f64::MIN.next_up()),
-                float_info.lowest,
-                "LOWEST",
-            ),
-        ] {
-            if actual != expected && expected2.is_none_or(|expected2| expected2 != actual) {
-                warn(Error::UnexpectedFloatValue {
-                    expected,
-                    actual,
-                    name,
-                });
-            }
-        }
-    }
-
-    if let Some(nominal_case_size) = headers.header.nominal_case_size {
-        let n_vars = headers.variable.len();
-        if n_vars != nominal_case_size as usize
-            && headers
-                .integer_info
-                .as_ref()
-                .is_none_or(|info| info.version.0 != 13)
-        {
-            warn(Error::WrongVariablePositions {
-                actual: n_vars,
-                expected: nominal_case_size as usize,
-            });
-        }
-    }
-
-    let mut decoder = Decoder {
-        encoding,
-        n_generated_names: 0,
-    };
-
-    let mut var_index_map = BTreeMap::new();
-    let mut value_index = 0;
-    for (index, input) in headers
-        .variable
-        .iter()
-        .enumerate()
-        .filter(|(_index, record)| record.width != RawWidth::Continuation)
-    {
-        let name = trim_end_spaces(input.name.to_string());
-        let name = match Identifier::from_encoding(name, encoding)
-            .and_then(Identifier::must_be_ordinary)
-        {
-            Ok(name) => {
-                if !dictionary.variables.contains(&name.0) {
-                    name
-                } else {
-                    let new_name = decoder.generate_name(&dictionary);
-                    warn(Error::DuplicateVariableName {
-                        duplicate_name: name.clone(),
-                        new_name: new_name.clone(),
-                    });
-                    new_name
-                }
-            }
-            Err(id_error) => {
-                let new_name = decoder.generate_name(&dictionary);
-                warn(Error::InvalidVariableName {
-                    id_error,
-                    new_name: new_name.clone(),
-                });
-                new_name
-            }
-        };
-        let mut variable = Variable::new(
-            name.clone(),
-            VarWidth::try_from(input.width).unwrap(),
-            encoding,
-        );
-
-        // Set the short name the same as the long name (even if we renamed it).
-        variable.short_names = vec![name];
-
-        variable.label = input.label.clone();
-
-        variable.missing_values = input.missing_values.clone();
-
-        variable.print_format = decode_format(
-            input.print_format,
-            variable.width,
-            |new_spec, format_error| {
-                warn(Error::InvalidPrintFormat {
-                    new_spec,
-                    variable: variable.name.clone(),
-                    format_error,
-                })
-            },
-        );
-        variable.write_format = decode_format(
-            input.write_format,
-            variable.width,
-            |new_spec, format_error| {
-                warn(Error::InvalidWriteFormat {
-                    new_spec,
-                    variable: variable.name.clone(),
-                    format_error,
-                })
-            },
-        );
-
-        // Check for long string continuation records.
-        let n_values = input.width.n_values().unwrap();
-        for offset in 1..n_values {
-            if headers
-                .variable
-                .get(index + offset)
-                .is_none_or(|record| record.width != RawWidth::Continuation)
-            {
-                warn(Error::MissingLongStringContinuation {
-                    width: input.width,
-                    start_index: index,
-                    end_index: index + n_values - 1,
-                    error_index: index + offset,
-                });
-                break;
-            }
-        }
-
-        let dict_index = dictionary.add_var(variable).unwrap();
-        assert_eq!(var_index_map.insert(value_index, dict_index), None);
-        value_index += n_values;
-    }
-
-    if let Some(weight_index) = headers.header.weight_index {
-        let index = weight_index as usize - 1;
-        if index >= value_index {
-            warn(Error::WeightIndexOutOfRange {
-                index: weight_index,
-                max_index: var_index_map.len(),
-            });
-        } else {
-            let (var_index, dict_index) = var_index_map.range(..=&index).last().unwrap();
-            let variable = &dictionary.variables[*dict_index];
-            if *var_index == index {
-                if variable.is_numeric() {
-                    dictionary.weight = Some(*dict_index);
-                } else {
-                    warn(Error::InvalidWeightVar {
-                        index: weight_index,
-                        name: variable.name.clone(),
-                    });
-                }
-            } else {
-                warn(Error::WeightIndexStringContinuation {
-                    index: weight_index,
-                    name: variable.name.clone(),
-                });
-            }
-        }
-    }
-
-    for record in headers.value_label.drain(..) {
-        let mut dict_indexes = Vec::with_capacity(record.dict_indexes.len());
-        let mut long_string_variables = Vec::new();
-        for value_index in record.dict_indexes.iter() {
-            let Some(dict_index) = var_index_map.get(&(*value_index as usize - 1)) else {
-                unreachable!()
-            };
-            let variable = &dictionary.variables[*dict_index];
-            if variable.width.is_long_string() {
-                long_string_variables.push(variable.name.clone());
-            } else {
-                dict_indexes.push(*dict_index);
-            }
-        }
-        if !long_string_variables.is_empty() {
-            warn(Error::InvalidLongStringValueLabels {
-                offsets: record.offsets.clone(),
-                variables: long_string_variables,
-            });
-        }
-
-        let written_by_readstat = headers.header.eye_catcher.contains("ReadStat");
-        for dict_index in dict_indexes {
-            let variable = dictionary.variables.get_index_mut2(dict_index).unwrap();
-            let mut duplicates = Vec::new();
-            for ValueLabel {
-                datum: value,
-                label,
-            } in record.labels.iter().cloned()
-            {
-                let datum = value.decode(variable.width);
-                if variable.value_labels.insert(datum, label).is_some() {
-                    duplicates.push(value);
-                }
-            }
-            if written_by_readstat {
-                // Ignore any possible duplicates.  ReadStat is buggy and emits
-                // value labels whose values are longer than string variables'
-                // widths, that are identical in the actual width of the
-                // variable, e.g. both values "ABC123" and "ABC456" for a string
-                // variable with width 3.
-            } else if !duplicates.is_empty() {
-                warn(Error::DuplicateValueLabels {
-                    variable: variable.name.clone(),
-                    values: duplicates
-                        .iter()
-                        .map(|value| {
-                            value
-                                .decode(variable.width)
-                                .display(variable.print_format, variable.encoding)
-                                .with_trimming()
-                                .with_quoted_string()
-                                .to_string()
-                        })
-                        .collect(),
-                });
-            }
-        }
-    }
-
-    if let Some(display) = &headers.var_display {
-        for (index, display) in display.0.iter().enumerate() {
-            if let Some(variable) = dictionary.variables.get_index_mut2(index) {
-                if let Some(width) = display.width {
-                    variable.display_width = width;
-                }
-                if let Some(alignment) = display.alignment {
-                    variable.alignment = alignment;
-                }
-                if let Some(measure) = display.measure {
-                    variable.measure = Some(measure);
-                }
-            } else {
-                warn(dbg!(Error::TBD));
-            }
-        }
-    }
-
-    for record in headers
-        .multiple_response
-        .iter()
-        .flat_map(|record| record.0.iter())
-    {
-        match MultipleResponseSet::decode(&dictionary, record, &mut warn) {
-            Ok(mrset) => {
-                dictionary.mrsets.insert(ByIdentifier::new(mrset));
-            }
-            Err(error) => warn(error),
-        }
-    }
-
-    if !headers.very_long_strings.is_empty() {
-        'outer: for record in headers
-            .very_long_strings
-            .drain(..)
-            .flat_map(|record| record.0.into_iter())
-        {
-            let Some(index) = dictionary.variables.get_index_of(&record.short_name.0) else {
-                warn(dbg!(Error::TBD));
-                continue;
-            };
-            let width = VarWidth::String(record.length);
-            let n_segments = width.n_segments();
-            if n_segments == 1 {
-                warn(dbg!(Error::ShortVeryLongString {
-                    short_name: record.short_name.clone(),
-                    width: record.length
-                }));
-                continue;
-            }
-            if index + n_segments > dictionary.variables.len() {
-                warn(dbg!(Error::VeryLongStringOverflow {
-                    short_name: record.short_name.clone(),
-                    width: record.length,
-                    index,
-                    n_segments,
-                    len: dictionary.variables.len()
-                }));
-                continue;
-            }
-            let mut short_names = Vec::with_capacity(n_segments);
-            for i in 0..n_segments {
-                let alloc_width = width.segment_alloc_width(i);
-                let segment = &dictionary.variables[index + i];
-                short_names.push(segment.short_names[0].clone());
-                let segment_width = segment.width.as_string_width().unwrap_or(0);
-                if segment_width.next_multiple_of(8) != alloc_width.next_multiple_of(8) {
-                    warn(Error::VeryLongStringInvalidSegmentWidth {
-                        short_name: record.short_name.clone(),
-                        width: record.length,
-                        index: i,
-                        actual: segment_width,
-                        expected: alloc_width,
-                    });
-                    continue 'outer;
-                }
-            }
-            dictionary.delete_vars(index + 1..index + n_segments);
-            let variable = dictionary.variables.get_index_mut2(index).unwrap();
-            variable.short_names = short_names;
-            variable.resize(width);
-        }
-        cases = cases
-            .take()
-            .map(|cases| cases.with_widths(dictionary.variables.iter().map(|var| var.width)));
-    }
-
-    if headers.long_names.is_empty() {
-        // There are no long variable names.  Use the short variable names,
-        // converted to lowercase, as the long variable names.
-        for index in 0..dictionary.variables.len() {
-            let lower = dictionary.variables[index].name.0.as_ref().to_lowercase();
-            if let Ok(new_name) = Identifier::from_encoding(lower, dictionary.encoding) {
-                let _ = dictionary.try_rename_var(index, new_name);
-            }
-        }
-    } else {
-        // Rename each of the variables, one by one.  (In a correctly
-        // constructed system file, this cannot create any intermediate
-        // duplicate variable names, because all of the new variable names are
-        // longer than any of the old variable names and thus there cannot be
-        // any overlaps.)
-        for renaming in headers
-            .long_names
-            .iter()
-            .flat_map(|record| record.0.iter().cloned())
-        {
-            let LongName {
-                short_name,
-                long_name,
-            } = renaming;
-            if let Some(index) = dictionary.variables.get_index_of(&short_name.0) {
-                if let Err(long_name) = dictionary.try_rename_var(index, long_name) {
-                    warn(Error::DuplicateLongName(long_name));
-                }
-                dictionary
-                    .variables
-                    .get_index_mut2(index)
-                    .unwrap()
-                    .short_names = vec![short_name];
-            } else {
-                warn(dbg!(Error::TBD));
-            }
-        }
-    }
-
-    for mut attr_set in headers
-        .variable_attributes
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        if let Some((_, variable)) = dictionary
-            .variables
-            .get_full_mut2(&attr_set.long_var_name.0)
-        {
-            variable.attributes.append(&mut attr_set.attributes);
-        } else {
-            warn(dbg!(Error::TBD));
-        }
-    }
-
-    // Assign variable roles.
-    for index in 0..dictionary.variables.len() {
-        let variable = dictionary.variables.get_index_mut2(index).unwrap();
-        match variable.attributes.role() {
-            Ok(Some(role)) => variable.role = role,
-            Ok(None) => (),
-            Err(error) => warn(Error::InvalidRole(error)),
-        }
-    }
-
-    // Long string value labels.
-    for record in headers
-        .long_string_value_labels
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
-            warn(Error::UnknownLongStringValueLabelVariable(
-                record.var_name.clone(),
-            ));
-            continue;
-        };
-        let Some(width) = variable.width.as_string_width() else {
-            warn(Error::LongStringValueLabelNumericVariable(
-                record.var_name.clone(),
-            ));
-            continue;
-        };
-        for (mut value, label) in record.labels.into_iter() {
-            // XXX warn about too-long value?
-            value.0.resize(width, b' ');
-            // XXX warn abouat duplicate value labels?
-            variable.value_labels.insert(Datum::String(value), label);
-        }
-    }
-
-    for mut record in headers
-        .long_string_missing_values
-        .drain(..)
-        .flat_map(|record| record.0.into_iter())
-    {
-        let Some((_, variable)) = dictionary.variables.get_full_mut2(&record.var_name.0) else {
-            warn(Error::LongStringMissingValueUnknownVariable {
-                name: record.var_name.clone(),
-            });
-            continue;
-        };
-        if !variable.width.is_long_string() {
-            warn(Error::LongStringMissingValueBadWdith {
-                name: record.var_name.clone(),
-                width: variable.width,
-            });
-            continue;
-        }
-        if record.missing_values.len() > 3 {
-            warn(Error::LongStringMissingValueInvalidCount {
-                name: record.var_name.clone(),
-                count: record.missing_values.len(),
-            });
-            record.missing_values.truncate(3);
-        }
-        let values = record
-            .missing_values
-            .into_iter()
-            .map(|v| {
-                let mut value = RawString::from(v.0.as_slice());
-                value.resize(variable.width.as_string_width().unwrap());
-                Datum::String(value)
-            })
-            .collect::<Vec<_>>();
-        match MissingValues::new(values, None) {
-            Ok(missing_values) => variable.missing_values = missing_values,
-            Err(MissingValuesError::TooWide) => warn(dbg!(Error::TBD)),
-            Err(MissingValuesError::TooMany) | Err(MissingValuesError::MixedTypes) => {
-                unreachable!()
-            }
-        }
-    }
-
-    for record in headers
-        .variable_sets
-        .drain(..)
-        .flat_map(|record| record.sets.into_iter())
-    {
-        let mut variables = Vec::with_capacity(record.variable_names.len());
-        for variable_name in record.variable_names {
-            let Some((dict_index, _)) = dictionary.variables.get_full_mut2(&variable_name.0) else {
-                warn(Error::UnknownVariableSetVariable {
-                    variable_set: record.name.clone(),
-                    variable: variable_name.clone(),
-                });
-                continue;
-            };
-            variables.push(dict_index);
-        }
-        let variable_set = VariableSet {
-            name: record.name,
-            variables,
-        };
-        dictionary.variable_sets.push(variable_set);
-    }
-
-    for record in headers.other_extension.drain(..) {
-        warn(Error::UnknownExtensionRecord {
-            offset: record.offsets.start,
-            subtype: record.subtype,
-            size: record.size,
-            count: record.count,
-        });
-    }
-
-    let metadata = Metadata::decode(&headers, warn);
-    if let Some(n_cases) = metadata.n_cases {
-        cases = cases.take().map(|cases| cases.with_expected_cases(n_cases))
-    }
-    Ok((dictionary, metadata, cases))
-}
-
 impl MultipleResponseSet {
     fn decode(
         dictionary: &Dictionary,
index b1e0528459b2d325375ce4520628ce0c26482b0e..94067aaafa1b8b05c41bfeea62f2d70f39f1368b 100644 (file)
@@ -915,7 +915,6 @@ impl Datum {
     ) -> Result<Option<Vec<Self>>, Error> {
         fn eof<R: Seek>(
             reader: &mut R,
-            case_vars: &[CaseVar],
             case_start: u64,
             n_chunks: usize,
         ) -> Result<Option<Vec<Datum>>, Error> {
@@ -939,7 +938,7 @@ impl Datum {
                 CaseVar::Numeric => {
                     let Some(raw) = Self::read_compressed_chunk(reader, codes, endian, bias)?
                     else {
-                        return eof(reader, case_vars, case_start, n_chunks);
+                        return eof(reader, case_start, n_chunks);
                     };
                     n_chunks += 1;
                     values.push(Datum::Number(endian.parse(raw)));
@@ -953,7 +952,7 @@ impl Datum {
                             let Some(raw) =
                                 Self::read_compressed_chunk(reader, codes, endian, bias)?
                             else {
-                                return eof(reader, case_vars, case_start, n_chunks);
+                                return eof(reader, case_start, n_chunks);
                             };
                             let n_data = data_bytes.min(8);
                             datum.extend_from_slice(&raw[..n_data]);
@@ -1122,14 +1121,11 @@ where
                 Some(Ok(record))
             }
             ReaderState::ZlibHeader => {
-                let zheader = match ZHeader::read(
-                    self.0.reader.as_mut().unwrap(),
-                    self.0.header.endian,
-                    &mut self.0.warn,
-                ) {
-                    Ok(zheader) => zheader,
-                    Err(error) => return Some(Err(error)),
-                };
+                let zheader =
+                    match ZHeader::read(self.0.reader.as_mut().unwrap(), self.0.header.endian) {
+                        Ok(zheader) => zheader,
+                        Err(error) => return Some(Err(error)),
+                    };
                 self.0.state = ReaderState::ZlibTrailer(zheader.clone());
                 Some(Ok(Record::ZHeader(zheader)))
             }
@@ -3408,11 +3404,7 @@ pub struct ZHeader {
 }
 
 impl ZHeader {
-    fn read<R: Read + Seek>(
-        r: &mut R,
-        endian: Endian,
-        warn: &mut dyn FnMut(Warning),
-    ) -> Result<ZHeader, Error> {
+    fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<ZHeader, Error> {
         let offset = r.stream_position()?;
         let zheader_offset: u64 = endian.parse(read_bytes(r)?);
         let ztrailer_offset: u64 = endian.parse(read_bytes(r)?);
index 075c0384ece9db73b88b03ecb4d8c331ebe9a2cf..304bea70b6168af45c5ce274591ff188a6aa7ba2 100644 (file)
@@ -7,7 +7,7 @@ use crate::{
         Details, Item, Text,
     },
     sys::{
-        cooked::{decode, Headers},
+        cooked::Headers,
         raw::{encoding_from_headers, Decoder, Reader},
         sack::sack,
     },
@@ -534,7 +534,7 @@ fn test_raw_sysfile(name: &str) {
     let sysfile = std::fs::read(&input_filename).unwrap();
     let expected_filename = input_filename.with_extension("expected");
     let expected = String::from_utf8(std::fs::read(&expected_filename).unwrap()).unwrap();
-    test_sysfile(name, sysfile, &expected, &expected_filename);
+    test_sysfile(sysfile, &expected, &expected_filename);
 }
 
 fn test_sack_sysfile(name: &str) {
@@ -554,11 +554,11 @@ fn test_sack_sysfile(name: &str) {
             },
         );
         let sysfile = sack(&input, Some(&input_filename), endian).unwrap();
-        test_sysfile(name, sysfile, &expected, &expected_filename);
+        test_sysfile(sysfile, &expected, &expected_filename);
     }
 }
 
-fn test_sysfile(name: &str, sysfile: Vec<u8>, expected: &str, expected_filename: &Path) {
+fn test_sysfile(sysfile: Vec<u8>, expected: &str, expected_filename: &Path) {
     let cursor = Cursor::new(sysfile);
     let mut warnings = Vec::new();
     let mut reader = Reader::new(cursor, |warning| warnings.push(warning)).unwrap();
@@ -577,7 +577,7 @@ fn test_sysfile(name: &str, sysfile: Vec<u8>, expected: &str, expected_filename:
             let mut errors = Vec::new();
             let headers = Headers::new(decoded_records, &mut |e| errors.push(e)).unwrap();
             let (dictionary, metadata, cases) =
-                decode(headers, cases, encoding, |e| errors.push(e)).unwrap();
+                headers.decode(cases, encoding, |e| errors.push(e)).unwrap();
             let (group, data) = metadata.to_pivot_rows();
             let metadata_table = PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
                 data.into_iter()