From: Ben Pfaff Date: Sun, 20 Aug 2023 01:39:36 +0000 (-0700) Subject: Revert "find bad utf8" X-Git-Url: https://pintos-os.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1df337b1bd33d1e362e7c1a66efbf68c39b3ab2e;p=pspp Revert "find bad utf8" This reverts commit 33820c6420e31b9b7e878eeda38708cc447e4ca2. --- diff --git a/rust/src/main.rs b/rust/src/main.rs index 56b007e05b..6024b67f2d 100644 --- a/rust/src/main.rs +++ b/rust/src/main.rs @@ -16,11 +16,11 @@ use anyhow::Result; use clap::Parser; -use pspp::raw::{Reader, Record, UnencodedStr, Value}; +use pspp::raw::{Reader, Record}; use std::fs::File; use std::io::BufReader; use std::path::{Path, PathBuf}; -use std::str::{self, from_utf8}; +use std::str; /// A utility to dissect SPSS system files. #[derive(Parser, Debug)] @@ -39,70 +39,29 @@ fn main() -> Result<()> { let Args { max_cases, files } = Args::parse(); for file in files { - if let Err(error) = dissect(&file, max_cases) { - println!("{}: {error}", file.display()); - } + dissect(&file, max_cases)?; } Ok(()) } -fn dissect(file_name: &Path, _max_cases: u64) -> Result<()> { +fn dissect(file_name: &Path, max_cases: u64) -> Result<()> { let reader = File::open(file_name)?; let reader = BufReader::new(reader); let mut reader = Reader::new(reader)?; let records: Vec = reader.collect_headers()?; - let mut character_code = None; for record in records { - //println!("{record:?}"); - if let Record::IntegerInfo(ref info) = record { - character_code = Some(info.character_code); - } + println!("{record:?}"); if let Record::EndOfHeaders(_) = record { break; }; } - if character_code != Some(65001) { - return Ok(()); - } - let mut n = 0; - while let Some(Ok(Record::Case(data))) = reader.next() { - n += 1; - let mut strings = Vec::new(); - for value in data.iter() { - if let Value::String(UnencodedStr(s)) = value { - strings.extend_from_slice(&s[..]); - } - } - - let mut rest = &strings[..]; - let mut any_errors = false; - while let Err(error) = from_utf8(&rest) { - if !any_errors { - print!("{}: UTF-8 error", file_name.display()); - any_errors = true; - } - let start = error.valid_up_to(); - let len = match error.error_len() { - Some(len) => len, - None => rest.len() - start - }; -// print!(" {}", (start + len) % 8); - print!("["); - for i in 0..len { - print!("{:02x}", rest[i + start]); - } - print!("]"); - rest = &rest[start + len..]; - } - if any_errors { - println!(); - println!("Lossy: {}", String::from_utf8_lossy(&strings[..]).replace(char::REPLACEMENT_CHARACTER, "??????").replace(&[' ', '\0'], "")); - return Ok(()) - } - //println!("{:?}", data); + for _ in 0..max_cases { + let Some(Ok(Record::Case(data))) = reader.next() else { + break; + }; + println!("{:?}", data); } - println!("{}: read {n} records", file_name.display()); Ok(()) }