#[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
enum Source {
- CP,
- IBM,
+ Codepage,
+ Ibm,
Windows,
}
// Code page number.
-type CPNumber = usize;
+type CodepageNumber = usize;
fn process_converter<'a>(
fields: &Vec<&'a str>,
- codepages: &mut BTreeMap<CPNumber, BTreeMap<Source, Vec<&'a str>>>,
+ codepages: &mut BTreeMap<CodepageNumber, BTreeMap<Source, Vec<&'a str>>>,
) {
if fields.is_empty() || fields[0] == "{" {
return;
}
- let mut cps: BTreeMap<Source, CPNumber> = BTreeMap::new();
+ let mut cps: BTreeMap<Source, CodepageNumber> = BTreeMap::new();
let mut iana = VecDeque::new();
let mut other = VecDeque::new();
}
if let Some(number) = name.strip_prefix("cp") {
- if let Ok(number) = number.parse::<CPNumber>() {
- cps.insert(Source::CP, number);
+ if let Ok(number) = number.parse::<CodepageNumber>() {
+ cps.insert(Source::Codepage, number);
}
}
if let Some(number) = name.strip_prefix("windows-") {
- if let Ok(number) = number.parse::<CPNumber>() {
+ if let Ok(number) = number.parse::<CodepageNumber>() {
cps.insert(Source::Windows, number);
}
}
if let Some(number) = name.strip_prefix("ibm-") {
- if let Ok(number) = number.parse::<CPNumber>() {
- cps.insert(Source::IBM, number);
+ if let Ok(number) = number.parse::<CodepageNumber>() {
+ cps.insert(Source::Ibm, number);
}
}
}
return;
}
- let all: Vec<&str> = iana.into_iter().chain(other.into_iter()).collect();
+ let all: Vec<&str> = iana.into_iter().chain(other).collect();
for (source, number) in cps {
codepages
.entry(number)
}
fn write_output(
- codepages: &BTreeMap<CPNumber, BTreeMap<Source, Vec<&str>>>,
+ codepages: &BTreeMap<CodepageNumber, BTreeMap<Source, Vec<&str>>>,
file_name: &PathBuf,
) -> Result<(), IoError> {
let mut file = File::create(file_name)?;
- write!(
- file,
- "{}",
+ file.write_all(
"\
use lazy_static::lazy_static;
use std::collections::HashMap;
static ref CODEPAGE_NUMBER_TO_NAME: HashMap<i32, &'static str> = {
let mut map = HashMap::new();
"
+ .as_bytes(),
)?;
for (&cpnumber, value) in codepages.iter() {
let name = value[source][0];
writeln!(file, " map.insert({cpnumber}, \"{name}\");")?;
}
- write!(
- file,
- "{}",
+ file.write_all(
" map
};
static ref CODEPAGE_NAME_TO_NUMBER: HashMap<&'static str, u32> = {
let mut map = HashMap::new();
"
+ .as_bytes(),
)?;
- let mut names: BTreeMap<String, BTreeMap<Source, Vec<CPNumber>>> = BTreeMap::new();
+ let mut names: BTreeMap<String, BTreeMap<Source, Vec<CodepageNumber>>> = BTreeMap::new();
for (&cpnumber, value) in codepages.iter() {
for (&source, value2) in value.iter() {
for name in value2.iter().map(|name| name.to_ascii_lowercase()) {
writeln!(file, " map.insert(\"{name}\", {});", numbers[0])?;
}
}
- write!(
- file,
- "{}",
+ file.write_all(
" map
};
}
"
+ .as_bytes(),
)?;
Ok(())
let input = read_to_string(&input_file)
.map_err(|e| anyhow!("{}: read failed ({e})", input_file.to_string_lossy()))?;
- let mut codepages: BTreeMap<CPNumber, BTreeMap<Source, Vec<&str>>> = BTreeMap::new();
+ let mut codepages: BTreeMap<CodepageNumber, BTreeMap<Source, Vec<&str>>> = BTreeMap::new();
let mut converter: Vec<&str> = Vec::new();
for line in input.lines() {
let line = line
.map(|position| &line[..position])
.unwrap_or(line)
.trim_end();
- if !line.starts_with(&[' ', '\t']) {
+ if !line.starts_with([' ', '\t']) {
process_converter(&converter, &mut codepages);
converter.clear();
}
-use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat};
+use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range};
use crate::{
encoding::{default_encoding, get_encoding, Error as EncodingError},
#[error("Using default encoding {0}.")]
UsingDefaultEncoding(String),
- #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")]
- InvalidVariableWidth { offset: u64, width: i32 },
+ #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)]
+ InvalidVariableWidth { offsets: Range<u64>, width: i32 },
#[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")]
InvalidLongMissingValueFormat,
raw::Record::VeryLongStrings(ref input) => {
let s = decoder.decode_string_cow(&input.text.0, warn);
output.push(Record::VeryLongStrings(VeryLongStringRecord::parse(
- &mut decoder,
- &s,
- warn,
+ &decoder, &s, warn,
)?));
}
raw::Record::FileAttributes(ref input) => {
fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> {
if let (s, false) = self.encoding.decode_without_bom_handling(input) {
// This is the common case. Usually there will be no errors.
- s.into()
+ s
} else {
// Unusual case. Don't bother to optimize it much.
let mut decoder = self.encoding.new_decoder_without_bom_handling();
-1 => return Ok(None),
_ => {
return Err(Error::InvalidVariableWidth {
- offset: input.offset,
+ offsets: input.offsets.clone(),
width: input.width,
})
}
let label = decoder.decode_string(&label.0, &warn);
let value = Value::decode(
raw::Value::from_raw(*value, var_type, decoder.endian),
- &decoder,
+ decoder,
);
ValueLabel { value, label }
})
impl LongName {
fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result<LongName, Error> {
- let short_name = Identifier::new(short_name, decoder.encoding)
- .map_err(|e| Error::InvalidShortName(e))?;
+ let short_name =
+ Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?;
let long_name =
- Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?;
+ Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?;
Ok(LongName {
short_name,
long_name,
let Some((short_name, length)) = input.split_once('=') else {
return Err(Error::TBD);
};
- let short_name = Identifier::new(short_name, decoder.encoding)
- .map_err(|e| Error::InvalidLongStringName(e))?;
+ let short_name =
+ Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?;
let length: u16 = length.parse().map_err(|_| Error::TBD)?;
if length > VarWidth::MAX_STRING {
return Err(Error::TBD);
}
- Ok(VeryLongString {
- short_name: short_name.into(),
- length,
- })
+ Ok(VeryLongString { short_name, length })
}
}
}
if let Some(rest) = rest.strip_prefix(')') {
let attribute = Identifier::new(name, decoder.encoding)
- .map_err(|e| Error::InvalidAttributeName(e))
+ .map_err(Error::InvalidAttributeName)
.warn_on_error(warn)
.map(|name| Attribute { name, values });
return Ok((attribute, rest));
};
let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?;
let var_attribute = Identifier::new(long_var_name, decoder.encoding)
- .map_err(|e| Error::InvalidAttributeVariableName(e))
+ .map_err(Error::InvalidAttributeVariableName)
.warn_on_error(warn)
.map(|name| VarAttributeSet {
long_var_name: name,
) -> Result<Self, Error> {
let mr_set_name = decoder
.decode_identifier(&input.name.0, warn)
- .map_err(|error| Error::InvalidMrSetName(error))?;
+ .map_err(Error::InvalidMrSetName)?;
let label = decoder.decode_string(&input.label.0, warn);
) -> Result<Self, Error> {
let var_name = decoder.decode_string(&input.var_name.0, warn);
let var_name = Identifier::new(var_name.trim_end(), decoder.encoding)
- .map_err(|e| Error::InvalidLongStringValueLabelName(e))?;
+ .map_err(Error::InvalidLongStringValueLabelName)?;
let min_width = 9;
let max_width = VarWidth::MAX_STRING;
if input.width < 9 || input.width > max_width as u32 {
return Err(Error::InvalidLongValueLabelWidth {
- name: var_name.into(),
+ name: var_name,
width: input.width,
min_width,
max_width,
pub fn default_encoding() -> &'static Encoding {
lazy_static! {
static ref DEFAULT_ENCODING: &'static Encoding =
- Encoding::for_label(locale_charset().as_bytes()).unwrap_or(&UTF_8);
+ Encoding::for_label(locale_charset().as_bytes()).unwrap_or(UTF_8);
}
&DEFAULT_ENCODING
}
return Err(Error::NoEncoding);
};
- Ok(Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))?)
+ Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))
}
let saved_locale = set_locale(LC_CTYPE, None);
set_locale(LC_CTYPE, Some(""));
let codeset = string_from_pointer(nl_langinfo(CODESET));
- set_locale(LC_CTYPE, saved_locale.as_ref().map(|x| x.as_str()));
+ set_locale(LC_CTYPE, saved_locale.as_deref());
codeset
}
}
use flate2::read::ZlibDecoder;
use num::Integer;
use std::borrow::Cow;
+use std::cmp::Ordering;
use std::fmt::{Debug, Formatter, Result as FmtResult};
+use std::ops::Range;
use std::str::from_utf8;
use std::{
collections::VecDeque,
#[error("At offset {offset:#x}, unrecognized record type {rec_type}.")]
BadRecordType { offset: u64, rec_type: u32 },
- #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")]
- BadVariableLabelCode { offset: u64, code: u32 },
+ #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")]
+ BadVariableLabelCode { start_offset: u64, code_offset: u64, code: u32 },
#[error(
"At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3."
// If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it
// decoded as Latin-1 (actually bytes interpreted as Unicode code points).
-fn default_decode<'a>(s: &'a [u8]) -> Cow<'a, str> {
+fn default_decode<>(s: &[u8]) -> Cow<str> {
from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from)
}
ZLib,
}
+trait Header {
+ fn offsets(&self) -> Range<u64>;
+}
+
#[derive(Clone)]
pub struct HeaderRecord {
+ /// Offset in file.
+ pub offsets: Range<u64>,
+
/// Magic number.
pub magic: Magic,
fn fmt(&self, f: &mut Formatter) -> FmtResult {
writeln!(f, "File header record:")?;
self.debug_field(f, "Magic", self.magic)?;
- self.debug_field(f, "Product name", &self.eye_catcher)?;
+ self.debug_field(f, "Product name", self.eye_catcher)?;
self.debug_field(f, "Layout code", self.layout_code)?;
self.debug_field(f, "Nominal case size", self.nominal_case_size)?;
self.debug_field(f, "Compression", self.compression)?;
self.debug_field(f, "Weight index", self.weight_index)?;
self.debug_field(f, "Number of cases", self.n_cases)?;
self.debug_field(f, "Compression bias", self.bias)?;
- self.debug_field(f, "Creation date", &self.creation_date)?;
- self.debug_field(f, "Creation time", &self.creation_time)?;
- self.debug_field(f, "File label", &self.file_label)?;
+ self.debug_field(f, "Creation date", self.creation_date)?;
+ self.debug_field(f, "Creation time", self.creation_time)?;
+ self.debug_field(f, "File label", self.file_label)?;
self.debug_field(f, "Endianness", self.endian)
}
}
impl HeaderRecord {
- fn read<R: Read>(r: &mut R) -> Result<HeaderRecord, Error> {
+ fn read<R: Read + Seek>(r: &mut R) -> Result<HeaderRecord, Error> {
+ let start = r.stream_position()?;
+
let magic: [u8; 4] = read_bytes(r)?;
let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?;
let _: [u8; 3] = read_bytes(r)?;
Ok(HeaderRecord {
+ offsets: start..r.stream_position()?,
magic,
layout_code,
nominal_case_size,
}
}
+impl Header for HeaderRecord {
+ fn offsets(&self) -> Range<u64> {
+ self.offsets.clone()
+ }
+}
+
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
pub struct Magic([u8; 4]);
impl Debug for Magic {
fn fmt(&self, f: &mut Formatter) -> FmtResult {
- let s = match self {
- &Magic::SAV => "$FL2",
- &Magic::ZSAV => "$FL3",
- &Magic::EBCDIC => "($FL2 in EBCDIC)",
+ let s = match *self {
+ Magic::SAV => "$FL2",
+ Magic::ZSAV => "$FL3",
+ Magic::EBCDIC => "($FL2 in EBCDIC)",
_ => return write!(f, "{:?}", self.0),
};
write!(f, "{s}")
#[derive(Clone)]
pub struct VariableRecord {
- /// Offset from the start of the file to the start of the record.
- pub offset: u64,
+ /// Range of offsets in file.
+ pub offsets: Range<u64>,
/// Variable width, in the range -1..=255.
pub width: i32,
f,
"Width: {} ({})",
self.width,
- if self.width > 0 {
- "string"
- } else if self.width == 0 {
- "numeric"
- } else {
- "long string continuation record"
+ match self.width.cmp(&0) {
+ Ordering::Greater => "string",
+ Ordering::Equal => "numeric",
+ Ordering::Less => "long string continuation record",
}
)?;
writeln!(f, "Print format: {:?}", self.print_format)?;
impl VariableRecord {
fn read<R: Read + Seek>(r: &mut R, endian: Endian) -> Result<VariableRecord, Error> {
- let offset = r.stream_position()?;
+ let start_offset = r.stream_position()?;
let width: i32 = endian.parse(read_bytes(r)?);
+ let code_offset = r.stream_position()?;
let has_variable_label: u32 = endian.parse(read_bytes(r)?);
let missing_value_code: i32 = endian.parse(read_bytes(r)?);
let print_format = Spec(endian.parse(read_bytes(r)?));
}
_ => {
return Err(Error::BadVariableLabelCode {
- offset,
+ start_offset,
+ code_offset,
code: has_variable_label,
})
}
};
- let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?;
+ let missing_values = MissingValues::read(r, start_offset, width, missing_value_code, endian)?;
+
+ let end_offset = r.stream_position()?;
Ok(VariableRecord {
- offset,
+ offsets: start_offset..end_offset,
width,
name,
print_format,
impl MultipleResponseType {
fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Error> {
- let (mr_type, input) = match input.get(0) {
+ let (mr_type, input) = match input.first() {
Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]),
Some(b'D') => {
let (value, input) = parse_counted_string(&input[1..])?;
(
MultipleResponseType::MultipleDichotomy {
- value: value.into(),
+ value,
labels: CategoryLabels::VarLabels,
},
input,
let (value, input) = parse_counted_string(input)?;
(
MultipleResponseType::MultipleDichotomy {
- value: value.into(),
+ value,
labels,
},
input,
};
let (name, input) = input.split_at(equals);
let (mr_type, input) = MultipleResponseType::parse(input)?;
- let Some(b' ') = input.get(0) else {
+ let Some(b' ') = input.first() else {
return Err(Error::TBD);
};
let (label, mut input) = parse_counted_string(&input[1..])?;
let mut vars = Vec::new();
- while input.get(0) == Some(&b' ') {
+ while input.first() == Some(&b' ') {
input = &input[1..];
let Some(length) = input.iter().position(|b| b" \n".contains(b)) else {
return Err(Error::TBD);
}
input = &input[length..];
}
- if input.get(0) != Some(&b'\n') {
+ if input.first() != Some(&b'\n') {
return Err(Error::TBD);
}
- while input.get(0) == Some(&b'\n') {
+ while input.first() == Some(&b'\n') {
input = &input[1..];
}
Ok((
MultipleResponseSet {
name: name.into(),
- label: label.into(),
+ label,
mr_type,
short_names: vars,
},
.find(|c: char| {
!(c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '-')
})
- .unwrap_or_else(|| s.len());
+ .unwrap_or(s.len());
let (number, rest) = s.split_at(len);
let token = if number == "-" {
Token::Minus
|| c == '.'
|| c == '_')
})
- .unwrap_or_else(|| s.len());
+ .unwrap_or(s.len());
let (s, rest) = s.split_at(len);
if let Some(rest) = rest.strip_prefix(':') {
(Token::Label(s.into()), rest)
})?);
(token, rest)
} else {
- let token = match &s[..] {
+ let token = match s {
"i8" => Token::I8,
"i16" => Token::I16,
"i64" => Token::I64,
let output = sack(&input, Some(&input_file_str), endian)?;
let output_file_str = output_file_name.to_string_lossy();
- std::fs::write(&output_file_name, &output)
+ std::fs::write(&output_file_name, output)
.map_err(|err| anyhow!("{output_file_str}: write failed ({err})"))?;
Ok(())