projects
/
pspp
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
168475a
)
work
author
Ben Pfaff
<blp@cs.stanford.edu>
Sat, 19 Aug 2023 21:06:50 +0000
(14:06 -0700)
committer
Ben Pfaff
<blp@cs.stanford.edu>
Sat, 19 Aug 2023 21:06:50 +0000
(14:06 -0700)
rust/src/cooked.rs
patch
|
blob
|
history
diff --git
a/rust/src/cooked.rs
b/rust/src/cooked.rs
index e0a7ef1fea7da5d22ad473ca3500734ef6abf322..3c0d2e4ff60af1210c0c3a01640a18835bf06aa2 100644
(file)
--- a/
rust/src/cooked.rs
+++ b/
rust/src/cooked.rs
@@
-1,13
+1,13
@@
-use std::{borrow::Cow, collections::
HashSet
};
+use std::{borrow::Cow, collections::
{HashSet, HashMap}
};
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::Encoding;
use chrono::{NaiveDate, NaiveDateTime, NaiveTime};
use encoding_rs::Encoding;
-
+use num::integer::div_ceil;
use crate::{
format::{Spec, UncheckedSpec, Width},
identifier::{Error as IdError, Identifier},
raw::{self, MissingValues},
use crate::{
format::{Spec, UncheckedSpec, Width},
identifier::{Error as IdError, Identifier},
raw::{self, MissingValues},
- {endian::Endian, C
ategoryLabels, C
ompression},
+ {endian::Endian, Compression},
};
use thiserror::Error as ThisError;
};
use thiserror::Error as ThisError;
@@
-37,6
+37,8
@@
pub struct Decoder {
pub endian: Endian,
pub encoding: &'static Encoding,
pub var_names: HashSet<Identifier>,
pub endian: Endian,
pub encoding: &'static Encoding,
pub var_names: HashSet<Identifier>,
+ pub dict_indexes: HashMap<usize, Identifier>,
+ n_dict_indexes: usize,
n_generated_names: usize,
}
n_generated_names: usize,
}
@@
-55,6
+57,17
@@
impl Decoder {
assert!(self.n_generated_names < usize::MAX);
}
}
assert!(self.n_generated_names < usize::MAX);
}
}
+ fn take_dict_indexes(&mut self, id: &Identifier, width: Width) -> usize {
+ let n = match width {
+ 0 => 1,
+ w => div_ceil(w, 8) as usize,
+ };
+ let dict_index = self.n_dict_indexes;
+ self.dict_indexes.insert(self.n_dict_indexes, id.clone());
+ self.n_dict_indexes += n;
+ dict_index
+
+ }
fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
let (output, malformed) = self.encoding.decode_without_bom_handling(input);
if malformed {
fn decode_string<'a>(&self, input: &'a [u8], warn: &impl Fn(Error)) -> Cow<'a, str> {
let (output, malformed) = self.encoding.decode_without_bom_handling(input);
if malformed {
@@
-165,6
+178,7
@@
impl Variable {
.label
.as_ref()
.map(|label| decoder.decode_string(&label.0, &warn).into());
.label
.as_ref()
.map(|label| decoder.decode_string(&label.0, &warn).into());
+ decoder.take_dict_indexes(&name, width);
Ok(Some(Variable {
width,
name,
Ok(Some(Variable {
width,
name,
@@
-430,25
+444,6
@@
impl TextRecord for VariableAttributeRecord {
}
}
}
}
-#[derive(Clone, Debug)]
-pub enum MultipleResponseType {
- MultipleDichotomy {
- value: String,
- labels: CategoryLabels,
- },
- MultipleCategory,
-}
-#[derive(Clone, Debug)]
-pub struct MultipleResponseSet {
- pub name: String,
- pub label: String,
- pub mr_type: MultipleResponseType,
- pub vars: Vec<String>,
-}
-
-#[derive(Clone, Debug)]
-pub struct MultipleResponseRecord(Vec<MultipleResponseSet>);
-
pub enum Measure {
Nominal,
Ordinal,
pub enum Measure {
Nominal,
Ordinal,