use std::{cell::RefCell, collections::HashMap, ops::Range, rc::Rc};
use crate::{
- dictionary::{Dictionary, EncodedString, Value, VarWidth, Variable},
+ dictionary::{Dictionary, Value, VarWidth, Variable},
encoding::Error as EncodingError,
endian::Endian,
format::{Error as FormatError, Format, UncheckedFormat},
}
}
+#[derive(Debug)]
pub struct Metadata {
creation: NaiveDateTime,
endian: Endian,
for ValueLabel { value, label } in record.labels.iter().cloned() {
let value = match value {
raw::Value::Number(number) => Value::Number(number.map(|n| n.into())),
- raw::Value::String(string) => Value::String(EncodedString::from_raw(
- &string.0[..variable.width.as_string_width().unwrap()],
- encoding,
- )),
+ raw::Value::String(string) => {
+ string.0[..variable.width.as_string_width().unwrap()].into()
+ }
};
}
}
use core::str;
use std::{
- borrow::Cow,
cmp::Ordering,
collections::{HashMap, HashSet},
fmt::Debug,
- hash::{Hash, Hasher},
+ hash::Hash,
ops::{Bound, RangeBounds},
};
-use encoding_rs::{Encoding, UTF_8};
+use encoding_rs::Encoding;
use indexmap::IndexSet;
use num::integer::div_ceil;
use ordered_float::OrderedFloat;
}
}
-#[derive(Debug)]
+#[derive(Clone, Debug)]
pub enum Value {
Number(Option<f64>),
- String(ValueString),
+ String(Box<[u8]>),
}
impl PartialEq for Value {
OrderedFloat(*l0) == OrderedFloat(*r0)
}
(Self::Number(None), Self::Number(None)) => true,
- (Self::Number(_), Self::Number(_)) => false,
(Self::String(l0), Self::String(r0)) => l0 == r0,
+ _ => false,
}
}
}
}
}
-impl Hash for Value {
- fn hash<H>(&self, state: &mut H)
- where
- H: Hasher,
- {
- match self {
- Value::Number(Some(a)) => OrderedFloat(*a).hash(state),
- Value::Number(None) => (),
- Value::String(string) => string.hash(state),
- }
- }
-}
-
-impl Clone for Value {
- fn clone(&self) -> Self {
- match self {
- Self::Number(number) => Self::Number(*number),
- Self::String(string) => Self::String(string.clone_boxed()),
- }
- }
-}
-
impl Value {
fn sysmis() -> Self {
Self::Number(None)
}
-
- fn for_string<S>(s: S) -> Self
- where
- S: AsRef<str>,
- {
- Self::String(ValueString::new(s))
- }
}
impl From<f64> for Value {
fn from(value: f64) -> Self {
- Self::Number(Some(value.into()))
- }
-}
-
-#[derive(Debug)]
-pub struct ValueString {
- nonutf8: Option<Box<EncodedString>>,
- utf8: Box<str>
-}
-
-impl ValueString {
- fn clone_boxed(&self) -> Box<Self> {
- Box::new(ValueString {
- nonutf8: self.nonutf8.map(|s| s.clone_boxed()),
- utf8: self.utf8,
- })
- }
-
- fn new<S>(s: S) -> Box<Self>
- where
- S: AsRef<str>,
- {
- Box::new(Self {
- nonutf8: None,
- utf8: s,
- })
- }
-
- fn new_encoded(s: &[u8], encoding: &'static Encoding) -> Box<Self> {
- if encoding == &UTF_8 {
- if let Some(utf8) = str::from_utf8(s) {
- return Self::new(utf8);
- }
- }
- todo!()
- }
-}
-
-impl PartialEq for ValueString {
- fn eq(&self, other: &Self) -> bool {
- self.utf8 == other.utf8
- }
-}
-
-impl Eq for ValueString {}
-
-impl PartialOrd for ValueString {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
+ Some(value).into()
}
}
-impl Ord for ValueString {
- fn cmp(&self, other: &Self) -> Ordering {
- self.utf8.cmp(&other.utf8)
- }
-}
-
-impl Hash for ValueString {
- fn hash<H>(&self, state: &mut H)
- where
- H: Hasher,
- {
- self.utf8.hash(state);
- }
-}
-
-#[derive(Debug, Hash)]
-pub struct EncodedString {
- encoding: &'static Encoding,
- s: Box<[u8]>,
-}
-
-impl PartialEq for EncodedString {
- fn eq(&self, other: &Self) -> bool {
- self.as_str().eq(&other.as_str())
- }
-}
-
-impl Eq for EncodedString {}
-
-impl PartialOrd for EncodedString {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl Ord for EncodedString {
- fn cmp(&self, other: &Self) -> Ordering {
- self.as_str().cmp(&other.as_str())
+impl From<Option<f64>> for Value {
+ fn from(value: Option<f64>) -> Self {
+ Self::Number(value)
}
}
-impl EncodedString {
- fn clone_boxed(&self) -> Box<Self> {
- todo!()
- }
- fn as_str(&self) -> EncodedStr {
- EncodedStr {
- s: &*self.s,
- encoding: self.encoding,
- }
+impl From<&str> for Value {
+ fn from(value: &str) -> Self {
+ value.as_bytes().into()
}
}
-#[derive(Clone, Debug, Hash)]
-pub struct EncodedStr<'a> {
- s: &'a [u8],
- encoding: &'static Encoding,
-}
-
-impl<'a> PartialOrd for EncodedStr<'a> {
- fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl<'a> Ord for EncodedStr<'a> {
- fn cmp(&self, other: &Self) -> Ordering {
- if self.encoding == other.encoding {
- self.s.cmp(&other.s)
- } else {
- // Get an arbitary but stable ordering for strings with different
- // encodings. It would be nice to do something like
- // `self.as_utf8().partial_cmp(other.as_utf8())` but it's likely that
- // this would violate transitivity.
- let this = self.encoding as *const Encoding;
- let other = other.encoding as *const Encoding;
- this.cmp(&other)
- }
- }
-}
-
-impl<'a> Eq for EncodedStr<'a> {}
-
-impl<'a> EncodedStr<'a> {
- fn as_utf8(&self) -> Cow<'a, str> {
- self.encoding.decode_without_bom_handling(self.s).0
- }
-}
-
-impl<'a> PartialEq for EncodedStr<'a> {
- fn eq(&self, other: &Self) -> bool {
- self.encoding == other.encoding && self.s == other.s
+impl From<&[u8]> for Value {
+ fn from(value: &[u8]) -> Self {
+ Self::String(value.into())
}
}
use anyhow::Result;
use clap::{Parser, ValueEnum};
use encoding_rs::Encoding;
+use pspp::cooked::{decode, Headers};
use pspp::raw::{encoding_from_headers, Decoder, Magic, Reader, Record};
use std::fs::File;
use std::io::BufReader;
}
}
Mode::Cooked => {
- /*
- let headers: Vec<Record> = reader.collect::<Result<Vec<_>, _>>()?;
- let encoding = encoding_from_headers(&headers, &|e| eprintln!("{e}"))?;
- let (headers, _) = decode(headers, encoding, &|e| eprintln!("{e}"))?;
- for header in headers {
- println!("{header:?}");
+ let headers: Vec<Record> = reader.collect::<Result<Vec<_>, _>>()?;
+ let encoding = match encoding {
+ Some(encoding) => encoding,
+ None => encoding_from_headers(&headers, &|e| eprintln!("{e}"))?,
+ };
+ let decoder = Decoder::new(encoding, |e| eprintln!("{e}"));
+ let mut decoded_records = Vec::new();
+ for header in headers {
+ decoded_records.push(header.decode(&decoder)?);
}
- */
+ let headers = Headers::new(decoded_records, &|e| eprintln!("{e}"))?;
+ let (dictionary, metadata) = decode(headers, encoding, |e| eprintln!("{e}"))?;
+ println!("{dictionary:?}");
+ println!("{metadata:?}");
}
}