From dc4af933f65a9f9d0ae2931ce9cca1ffd5353ac4 Mon Sep 17 00:00:00 2001 From: Ben Pfaff Date: Wed, 15 Nov 2023 19:45:28 -0800 Subject: [PATCH] clippy --- rust/build.rs | 45 ++++++++++---------- rust/src/cooked.rs | 41 ++++++++----------- rust/src/encoding.rs | 4 +- rust/src/locale_charset.rs | 2 +- rust/src/raw.rs | 84 +++++++++++++++++++++++--------------- rust/src/sack.rs | 6 +-- rust/tests/sack.rs | 2 +- 7 files changed, 98 insertions(+), 86 deletions(-) diff --git a/rust/build.rs b/rust/build.rs index 112f3d3da8..baaacc5d4f 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -9,23 +9,23 @@ use std::{ #[derive(Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] enum Source { - CP, - IBM, + Codepage, + Ibm, Windows, } // Code page number. -type CPNumber = usize; +type CodepageNumber = usize; fn process_converter<'a>( fields: &Vec<&'a str>, - codepages: &mut BTreeMap>>, + codepages: &mut BTreeMap>>, ) { if fields.is_empty() || fields[0] == "{" { return; } - let mut cps: BTreeMap = BTreeMap::new(); + let mut cps: BTreeMap = BTreeMap::new(); let mut iana = VecDeque::new(); let mut other = VecDeque::new(); @@ -56,20 +56,20 @@ fn process_converter<'a>( } if let Some(number) = name.strip_prefix("cp") { - if let Ok(number) = number.parse::() { - cps.insert(Source::CP, number); + if let Ok(number) = number.parse::() { + cps.insert(Source::Codepage, number); } } if let Some(number) = name.strip_prefix("windows-") { - if let Ok(number) = number.parse::() { + if let Ok(number) = number.parse::() { cps.insert(Source::Windows, number); } } if let Some(number) = name.strip_prefix("ibm-") { - if let Ok(number) = number.parse::() { - cps.insert(Source::IBM, number); + if let Ok(number) = number.parse::() { + cps.insert(Source::Ibm, number); } } } @@ -79,7 +79,7 @@ fn process_converter<'a>( return; } - let all: Vec<&str> = iana.into_iter().chain(other.into_iter()).collect(); + let all: Vec<&str> = iana.into_iter().chain(other).collect(); for (source, number) in cps { codepages .entry(number) @@ -89,14 +89,12 @@ fn process_converter<'a>( } fn write_output( - codepages: &BTreeMap>>, + codepages: &BTreeMap>>, file_name: &PathBuf, ) -> Result<(), IoError> { let mut file = File::create(file_name)?; - write!( - file, - "{}", + file.write_all( "\ use lazy_static::lazy_static; use std::collections::HashMap; @@ -105,6 +103,7 @@ lazy_static! { static ref CODEPAGE_NUMBER_TO_NAME: HashMap = { let mut map = HashMap::new(); " + .as_bytes(), )?; for (&cpnumber, value) in codepages.iter() { @@ -112,18 +111,17 @@ lazy_static! { let name = value[source][0]; writeln!(file, " map.insert({cpnumber}, \"{name}\");")?; } - write!( - file, - "{}", + file.write_all( " map }; static ref CODEPAGE_NAME_TO_NUMBER: HashMap<&'static str, u32> = { let mut map = HashMap::new(); " + .as_bytes(), )?; - let mut names: BTreeMap>> = BTreeMap::new(); + let mut names: BTreeMap>> = BTreeMap::new(); for (&cpnumber, value) in codepages.iter() { for (&source, value2) in value.iter() { for name in value2.iter().map(|name| name.to_ascii_lowercase()) { @@ -142,13 +140,12 @@ lazy_static! { writeln!(file, " map.insert(\"{name}\", {});", numbers[0])?; } } - write!( - file, - "{}", + file.write_all( " map }; } " + .as_bytes(), )?; Ok(()) @@ -162,7 +159,7 @@ fn main() -> AnyResult<()> { let input = read_to_string(&input_file) .map_err(|e| anyhow!("{}: read failed ({e})", input_file.to_string_lossy()))?; - let mut codepages: BTreeMap>> = BTreeMap::new(); + let mut codepages: BTreeMap>> = BTreeMap::new(); let mut converter: Vec<&str> = Vec::new(); for line in input.lines() { let line = line @@ -170,7 +167,7 @@ fn main() -> AnyResult<()> { .map(|position| &line[..position]) .unwrap_or(line) .trim_end(); - if !line.starts_with(&[' ', '\t']) { + if !line.starts_with([' ', '\t']) { process_converter(&converter, &mut codepages); converter.clear(); } diff --git a/rust/src/cooked.rs b/rust/src/cooked.rs index 2e67965e41..f3c6adbbc4 100644 --- a/rust/src/cooked.rs +++ b/rust/src/cooked.rs @@ -1,4 +1,4 @@ -use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat}; +use std::{borrow::Cow, cmp::Ordering, collections::HashMap, iter::repeat, ops::Range}; use crate::{ encoding::{default_encoding, get_encoding, Error as EncodingError}, @@ -28,8 +28,8 @@ pub enum Error { #[error("Using default encoding {0}.")] UsingDefaultEncoding(String), - #[error("Variable record at offset {offset:#x} specifies width {width} not in valid range [-1,255).")] - InvalidVariableWidth { offset: u64, width: i32 }, + #[error("Variable record from offset {:x} to {:x} specifies width {width} not in valid range [-1,255).", offsets.start, offsets.end)] + InvalidVariableWidth { offsets: Range, width: i32 }, #[error("This file has corrupted metadata written by a buggy version of PSPP. To ensure that other software can read it correctly, save a new copy of the file.")] InvalidLongMissingValueFormat, @@ -328,9 +328,7 @@ pub fn decode( raw::Record::VeryLongStrings(ref input) => { let s = decoder.decode_string_cow(&input.text.0, warn); output.push(Record::VeryLongStrings(VeryLongStringRecord::parse( - &mut decoder, - &s, - warn, + &decoder, &s, warn, )?)); } raw::Record::FileAttributes(ref input) => { @@ -412,7 +410,7 @@ impl Decoder { fn decode_exact_length<'a>(&self, input: &'a [u8]) -> Cow<'a, str> { if let (s, false) = self.encoding.decode_without_bom_handling(input) { // This is the common case. Usually there will be no errors. - s.into() + s } else { // Unusual case. Don't bother to optimize it much. let mut decoder = self.encoding.new_decoder_without_bom_handling(); @@ -607,7 +605,7 @@ impl TryDecode for VariableRecord { -1 => return Ok(None), _ => { return Err(Error::InvalidVariableWidth { - offset: input.offset, + offsets: input.offsets.clone(), width: input.width, }) } @@ -820,7 +818,7 @@ impl TryDecode for ValueLabelRecord { let label = decoder.decode_string(&label.0, &warn); let value = Value::decode( raw::Value::from_raw(*value, var_type, decoder.endian), - &decoder, + decoder, ); ValueLabel { value, label } }) @@ -871,10 +869,10 @@ pub struct LongName { impl LongName { fn new(decoder: &mut Decoder, short_name: &str, long_name: &str) -> Result { - let short_name = Identifier::new(short_name, decoder.encoding) - .map_err(|e| Error::InvalidShortName(e))?; + let short_name = + Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidShortName)?; let long_name = - Identifier::new(long_name, decoder.encoding).map_err(|e| Error::InvalidLongName(e))?; + Identifier::new(long_name, decoder.encoding).map_err(Error::InvalidLongName)?; Ok(LongName { short_name, long_name, @@ -914,16 +912,13 @@ impl VeryLongString { let Some((short_name, length)) = input.split_once('=') else { return Err(Error::TBD); }; - let short_name = Identifier::new(short_name, decoder.encoding) - .map_err(|e| Error::InvalidLongStringName(e))?; + let short_name = + Identifier::new(short_name, decoder.encoding).map_err(Error::InvalidLongStringName)?; let length: u16 = length.parse().map_err(|_| Error::TBD)?; if length > VarWidth::MAX_STRING { return Err(Error::TBD); } - Ok(VeryLongString { - short_name: short_name.into(), - length, - }) + Ok(VeryLongString { short_name, length }) } } @@ -977,7 +972,7 @@ impl Attribute { } if let Some(rest) = rest.strip_prefix(')') { let attribute = Identifier::new(name, decoder.encoding) - .map_err(|e| Error::InvalidAttributeName(e)) + .map_err(Error::InvalidAttributeName) .warn_on_error(warn) .map(|name| Attribute { name, values }); return Ok((attribute, rest)); @@ -1045,7 +1040,7 @@ impl VarAttributeSet { }; let (attributes, rest) = AttributeSet::parse(decoder, rest, Some('/'), warn)?; let var_attribute = Identifier::new(long_var_name, decoder.encoding) - .map_err(|e| Error::InvalidAttributeVariableName(e)) + .map_err(Error::InvalidAttributeVariableName) .warn_on_error(warn) .map(|name| VarAttributeSet { long_var_name: name, @@ -1236,7 +1231,7 @@ impl MultipleResponseSet { ) -> Result { let mr_set_name = decoder .decode_identifier(&input.name.0, warn) - .map_err(|error| Error::InvalidMrSetName(error))?; + .map_err(Error::InvalidMrSetName)?; let label = decoder.decode_string(&input.label.0, warn); @@ -1325,13 +1320,13 @@ impl LongStringValueLabels { ) -> Result { let var_name = decoder.decode_string(&input.var_name.0, warn); let var_name = Identifier::new(var_name.trim_end(), decoder.encoding) - .map_err(|e| Error::InvalidLongStringValueLabelName(e))?; + .map_err(Error::InvalidLongStringValueLabelName)?; let min_width = 9; let max_width = VarWidth::MAX_STRING; if input.width < 9 || input.width > max_width as u32 { return Err(Error::InvalidLongValueLabelWidth { - name: var_name.into(), + name: var_name, width: input.width, min_width, max_width, diff --git a/rust/src/encoding.rs b/rust/src/encoding.rs index 8fd13f3ea3..aaed5fd4ca 100644 --- a/rust/src/encoding.rs +++ b/rust/src/encoding.rs @@ -29,7 +29,7 @@ pub enum Error { pub fn default_encoding() -> &'static Encoding { lazy_static! { static ref DEFAULT_ENCODING: &'static Encoding = - Encoding::for_label(locale_charset().as_bytes()).unwrap_or(&UTF_8); + Encoding::for_label(locale_charset().as_bytes()).unwrap_or(UTF_8); } &DEFAULT_ENCODING } @@ -60,5 +60,5 @@ pub fn get_encoding( return Err(Error::NoEncoding); }; - Ok(Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))?) + Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into())) } diff --git a/rust/src/locale_charset.rs b/rust/src/locale_charset.rs index 8b3de74daf..596fd62406 100644 --- a/rust/src/locale_charset.rs +++ b/rust/src/locale_charset.rs @@ -248,7 +248,7 @@ mod inner { let saved_locale = set_locale(LC_CTYPE, None); set_locale(LC_CTYPE, Some("")); let codeset = string_from_pointer(nl_langinfo(CODESET)); - set_locale(LC_CTYPE, saved_locale.as_ref().map(|x| x.as_str())); + set_locale(LC_CTYPE, saved_locale.as_deref()); codeset } } diff --git a/rust/src/raw.rs b/rust/src/raw.rs index ac8b3a0570..345a719e47 100644 --- a/rust/src/raw.rs +++ b/rust/src/raw.rs @@ -4,7 +4,9 @@ use encoding_rs::mem::decode_latin1; use flate2::read::ZlibDecoder; use num::Integer; use std::borrow::Cow; +use std::cmp::Ordering; use std::fmt::{Debug, Formatter, Result as FmtResult}; +use std::ops::Range; use std::str::from_utf8; use std::{ collections::VecDeque, @@ -41,8 +43,8 @@ pub enum Error { #[error("At offset {offset:#x}, unrecognized record type {rec_type}.")] BadRecordType { offset: u64, rec_type: u32 }, - #[error("At offset {offset:#x}, variable label code ({code}) is not 0 or 1.")] - BadVariableLabelCode { offset: u64, code: u32 }, + #[error("In variable record starting at offset {start_offset:#x}, variable label code {code} at offset {code_offset:#x} is not 0 or 1.")] + BadVariableLabelCode { start_offset: u64, code_offset: u64, code: u32 }, #[error( "At offset {offset:#x}, numeric missing value code ({code}) is not -3, -2, 0, 1, 2, or 3." @@ -173,7 +175,7 @@ impl Record { // If `s` is valid UTF-8, returns it decoded as UTF-8, otherwise returns it // decoded as Latin-1 (actually bytes interpreted as Unicode code points). -fn default_decode<'a>(s: &'a [u8]) -> Cow<'a, str> { +fn default_decode<>(s: &[u8]) -> Cow { from_utf8(s).map_or_else(|_| decode_latin1(s), Cow::from) } @@ -183,8 +185,15 @@ pub enum Compression { ZLib, } +trait Header { + fn offsets(&self) -> Range; +} + #[derive(Clone)] pub struct HeaderRecord { + /// Offset in file. + pub offsets: Range, + /// Magic number. pub magic: Magic, @@ -235,22 +244,24 @@ impl Debug for HeaderRecord { fn fmt(&self, f: &mut Formatter) -> FmtResult { writeln!(f, "File header record:")?; self.debug_field(f, "Magic", self.magic)?; - self.debug_field(f, "Product name", &self.eye_catcher)?; + self.debug_field(f, "Product name", self.eye_catcher)?; self.debug_field(f, "Layout code", self.layout_code)?; self.debug_field(f, "Nominal case size", self.nominal_case_size)?; self.debug_field(f, "Compression", self.compression)?; self.debug_field(f, "Weight index", self.weight_index)?; self.debug_field(f, "Number of cases", self.n_cases)?; self.debug_field(f, "Compression bias", self.bias)?; - self.debug_field(f, "Creation date", &self.creation_date)?; - self.debug_field(f, "Creation time", &self.creation_time)?; - self.debug_field(f, "File label", &self.file_label)?; + self.debug_field(f, "Creation date", self.creation_date)?; + self.debug_field(f, "Creation time", self.creation_time)?; + self.debug_field(f, "File label", self.file_label)?; self.debug_field(f, "Endianness", self.endian) } } impl HeaderRecord { - fn read(r: &mut R) -> Result { + fn read(r: &mut R) -> Result { + let start = r.stream_position()?; + let magic: [u8; 4] = read_bytes(r)?; let magic: Magic = magic.try_into().map_err(|_| Error::NotASystemFile)?; @@ -288,6 +299,7 @@ impl HeaderRecord { let _: [u8; 3] = read_bytes(r)?; Ok(HeaderRecord { + offsets: start..r.stream_position()?, magic, layout_code, nominal_case_size, @@ -304,6 +316,12 @@ impl HeaderRecord { } } +impl Header for HeaderRecord { + fn offsets(&self) -> Range { + self.offsets.clone() + } +} + #[derive(Copy, Clone, PartialEq, Eq, Hash)] pub struct Magic([u8; 4]); @@ -321,10 +339,10 @@ impl Magic { impl Debug for Magic { fn fmt(&self, f: &mut Formatter) -> FmtResult { - let s = match self { - &Magic::SAV => "$FL2", - &Magic::ZSAV => "$FL3", - &Magic::EBCDIC => "($FL2 in EBCDIC)", + let s = match *self { + Magic::SAV => "$FL2", + Magic::ZSAV => "$FL3", + Magic::EBCDIC => "($FL2 in EBCDIC)", _ => return write!(f, "{:?}", self.0), }; write!(f, "{s}") @@ -846,8 +864,8 @@ impl MissingValues { #[derive(Clone)] pub struct VariableRecord { - /// Offset from the start of the file to the start of the record. - pub offset: u64, + /// Range of offsets in file. + pub offsets: Range, /// Variable width, in the range -1..=255. pub width: i32, @@ -874,12 +892,10 @@ impl Debug for VariableRecord { f, "Width: {} ({})", self.width, - if self.width > 0 { - "string" - } else if self.width == 0 { - "numeric" - } else { - "long string continuation record" + match self.width.cmp(&0) { + Ordering::Greater => "string", + Ordering::Equal => "numeric", + Ordering::Less => "long string continuation record", } )?; writeln!(f, "Print format: {:?}", self.print_format)?; @@ -892,8 +908,9 @@ impl Debug for VariableRecord { impl VariableRecord { fn read(r: &mut R, endian: Endian) -> Result { - let offset = r.stream_position()?; + let start_offset = r.stream_position()?; let width: i32 = endian.parse(read_bytes(r)?); + let code_offset = r.stream_position()?; let has_variable_label: u32 = endian.parse(read_bytes(r)?); let missing_value_code: i32 = endian.parse(read_bytes(r)?); let print_format = Spec(endian.parse(read_bytes(r)?)); @@ -914,16 +931,19 @@ impl VariableRecord { } _ => { return Err(Error::BadVariableLabelCode { - offset, + start_offset, + code_offset, code: has_variable_label, }) } }; - let missing_values = MissingValues::read(r, offset, width, missing_value_code, endian)?; + let missing_values = MissingValues::read(r, start_offset, width, missing_value_code, endian)?; + + let end_offset = r.stream_position()?; Ok(VariableRecord { - offset, + offsets: start_offset..end_offset, width, name, print_format, @@ -1219,13 +1239,13 @@ pub enum MultipleResponseType { impl MultipleResponseType { fn parse(input: &[u8]) -> Result<(MultipleResponseType, &[u8]), Error> { - let (mr_type, input) = match input.get(0) { + let (mr_type, input) = match input.first() { Some(b'C') => (MultipleResponseType::MultipleCategory, &input[1..]), Some(b'D') => { let (value, input) = parse_counted_string(&input[1..])?; ( MultipleResponseType::MultipleDichotomy { - value: value.into(), + value, labels: CategoryLabels::VarLabels, }, input, @@ -1246,7 +1266,7 @@ impl MultipleResponseType { let (value, input) = parse_counted_string(input)?; ( MultipleResponseType::MultipleDichotomy { - value: value.into(), + value, labels, }, input, @@ -1273,12 +1293,12 @@ impl MultipleResponseSet { }; let (name, input) = input.split_at(equals); let (mr_type, input) = MultipleResponseType::parse(input)?; - let Some(b' ') = input.get(0) else { + let Some(b' ') = input.first() else { return Err(Error::TBD); }; let (label, mut input) = parse_counted_string(&input[1..])?; let mut vars = Vec::new(); - while input.get(0) == Some(&b' ') { + while input.first() == Some(&b' ') { input = &input[1..]; let Some(length) = input.iter().position(|b| b" \n".contains(b)) else { return Err(Error::TBD); @@ -1288,16 +1308,16 @@ impl MultipleResponseSet { } input = &input[length..]; } - if input.get(0) != Some(&b'\n') { + if input.first() != Some(&b'\n') { return Err(Error::TBD); } - while input.get(0) == Some(&b'\n') { + while input.first() == Some(&b'\n') { input = &input[1..]; } Ok(( MultipleResponseSet { name: name.into(), - label: label.into(), + label, mr_type, short_names: vars, }, diff --git a/rust/src/sack.rs b/rust/src/sack.rs index ab35f8661e..c6be5d1eef 100644 --- a/rust/src/sack.rs +++ b/rust/src/sack.rs @@ -437,7 +437,7 @@ impl<'a> Lexer<'a> { .find(|c: char| { !(c.is_ascii_digit() || c.is_alphabetic() || c == '.' || c == '-') }) - .unwrap_or_else(|| s.len()); + .unwrap_or(s.len()); let (number, rest) = s.split_at(len); let token = if number == "-" { Token::Minus @@ -481,7 +481,7 @@ impl<'a> Lexer<'a> { || c == '.' || c == '_') }) - .unwrap_or_else(|| s.len()); + .unwrap_or(s.len()); let (s, rest) = s.split_at(len); if let Some(rest) = rest.strip_prefix(':') { (Token::Label(s.into()), rest) @@ -494,7 +494,7 @@ impl<'a> Lexer<'a> { })?); (token, rest) } else { - let token = match &s[..] { + let token = match s { "i8" => Token::I8, "i16" => Token::I16, "i64" => Token::I64, diff --git a/rust/tests/sack.rs b/rust/tests/sack.rs index 5db8d43bb3..49b10e77ac 100644 --- a/rust/tests/sack.rs +++ b/rust/tests/sack.rs @@ -86,7 +86,7 @@ fn main() -> Result<()> { let output = sack(&input, Some(&input_file_str), endian)?; let output_file_str = output_file_name.to_string_lossy(); - std::fs::write(&output_file_name, &output) + std::fs::write(&output_file_name, output) .map_err(|err| anyhow!("{output_file_str}: write failed ({err})"))?; Ok(()) -- 2.30.2