encodings display pretty well now

author Ben Pfaff <blp@cs.stanford.edu>

Sun, 24 Aug 2025 00:36:59 +0000 (17:36 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Sun, 24 Aug 2025 00:36:59 +0000 (17:36 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Sun, 24 Aug 2025 00:36:59 +0000 (17:36 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Sun, 24 Aug 2025 00:36:59 +0000 (17:36 -0700)
diff --git a/rust/pspp/src/main.rs b/rust/pspp/src/main.rs

index f388f89c6edc940f4ccf5c02bbd9e37b880e06a3..606733daf5b8efe3be98e5c7543a7bdfa149dc19 100644 (file)
--- a/rust/pspp/src/main.rs
+++ b/rust/pspp/src/main.rs
@@ -26,8 +26,7 @@ use pspp::{
      sys::{
          self,
          raw::{
-            get_encoding_info, infer_encoding, records::Compression, Decoder, EncodingReport,
-            Magic, Reader, Record,
+            infer_encoding, records::Compression, Decoder, EncodingReport, Magic, Reader, Record,
          },
          ReadOptions, Records,
      },
@@ -278,6 +277,37 @@ enum Output {
  }
  
  impl Output {
+    /*
+    fn show_metadata(&self, metadata: MetadataEntry) -> Result<()> {
+        match self {
+            Self::Driver { driver, .. } => {
+                driver
+                    .borrow_mut()
+                    .write(&Arc::new(Item::new(metadata.into_pivot_table())));
+                Ok(())
+            }
+            Self::Json { .. } => self.show_json(&metadata),
+            Self::Discard => Ok(()),
+        }
+    }*/
+
+    fn show<T>(&self, value: &T) -> Result<()>
+    where
+        T: Serialize,
+        for<'a> &'a T: Into<Details>,
+    {
+        match self {
+            Self::Driver { driver, .. } => {
+                driver
+                    .borrow_mut()
+                    .write(&Arc::new(Item::new(value.into())));
+                Ok(())
+            }
+            Self::Json { .. } => self.show_json(value),
+            Self::Discard => Ok(()),
+        }
+    }
+
      fn show_json<T>(&self, value: &T) -> Result<()>
      where
          T: Serialize,
@@ -336,8 +366,6 @@ impl Show {
                  "ndjson" => ShowFormat::Ndjson,
                  _ => ShowFormat::Output,
              }
-        } else if self.mode == Mode::Encodings {
-            ShowFormat::Output
          } else {
              ShowFormat::Json
          };
@@ -474,29 +502,8 @@ impl Show {
              }
              Mode::Encodings => {
                  let records: Vec<Record> = reader.records().collect::<Result<Vec<_>, _>>()?;
-                let (encoding, character_code) = get_encoding_info(&records);
-
-                let mut record_strings = reader.header().get_strings();
-                for record in &records {
-                    record_strings.append(&mut record.get_strings());
-                }
-                let Some(encoding_report) = EncodingReport::new(&record_strings) else {
-                    output.warn(&"No valid encodings found.");
-                    return Ok(());
-                };
-                match &output {
-                    Output::Driver { driver, .. } => {
-                        driver
-                            .borrow_mut()
-                            .write(&Arc::new(Item::new(encoding_report.valid_encodings)));
-                        if let Some(interpretations) = encoding_report.interpretations {
-                            driver
-                                .borrow_mut()
-                                .write(&Arc::new(Item::new(interpretations)));
-                        }
-                    }
-                    _ => todo!(),
-                }
+                let encoding_report = EncodingReport::new(reader.header(), &records);
+                output.show(&encoding_report)?;
              }
          }
  
diff --git a/rust/pspp/src/output/mod.rs b/rust/pspp/src/output/mod.rs

index 0779129f8e5a41c51a53e2193330bc3bb5431f57..28ab4efdd6eac1cf08a5a714b2bde2c0c2f19640 100644 (file)
--- a/rust/pspp/src/output/mod.rs
+++ b/rust/pspp/src/output/mod.rs
@@ -148,6 +148,18 @@ impl Details {
      }
  }
  
+impl<A> FromIterator<A> for Details
+where
+    A: Into<Arc<Item>>,
+{
+    fn from_iter<T>(iter: T) -> Self
+    where
+        T: IntoIterator<Item = A>,
+    {
+        Self::Group(iter.into_iter().map(|value| value.into()).collect())
+    }
+}
+
  impl From<Diagnostic> for Details {
      fn from(value: Diagnostic) -> Self {
          Self::Message(Box::new(value))
@@ -192,10 +204,10 @@ pub struct Text {
  }
  
  impl Text {
-    pub fn new_log(s: impl Into<String>) -> Self {
+    pub fn new_log(value: impl Into<Value>) -> Self {
          Self {
              type_: TextType::Log,
-            content: Value::new_user_text(s),
+            content: value.into(),
          }
      }
  }
diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs

index bd6c4242fd04e3d3f8236bec41a32bc4c65ce32f..387e0f3d14837d3fbb4db2fbb5c2a250a15b3c1a 100644 (file)
--- a/rust/pspp/src/output/pivot/mod.rs
+++ b/rust/pspp/src/output/pivot/mod.rs
@@ -1986,20 +1986,20 @@ impl Serialize for Value {
  
  /// Wrapper for [Value] that uses [Value::serialize_bare] for serialization.
  #[derive(Serialize)]
-struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] &'a Value);
+struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] pub &'a Value);
  
  impl Value {
-    fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
      where
          S: Serializer,
      {
          match &self.inner {
-            ValueInner::Number(number_value) => number_value.value.serialize(serializer),
+            ValueInner::Number(number_value) => number_value.serialize_bare(serializer),
              ValueInner::String(string_value) => string_value.s.serialize(serializer),
              ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer),
              ValueInner::Text(text_value) => text_value.localized.serialize(serializer),
              ValueInner::Template(template_value) => template_value.localized.serialize(serializer),
-            ValueInner::Empty => ().serialize(serializer),
+            ValueInner::Empty => serializer.serialize_none(),
          }
      }
  
@@ -2499,6 +2499,28 @@ impl Serialize for NumberValue {
      }
  }
  
+impl NumberValue {
+    pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        if let Some(number) = self.value
+            && number.trunc() == number
+            && number >= -(1i64 << 53) as f64
+            && number <= (1i64 << 53) as f64
+        {
+            (number as u64).serialize(serializer)
+        } else {
+            self.value.serialize(serializer)
+        }
+    }
+}
+
+#[derive(Serialize)]
+pub struct BareNumberValue<'a>(
+    #[serde(serialize_with = "NumberValue::serialize_bare")] pub &'a NumberValue,
+);
+
  #[derive(Clone, Debug, Serialize)]
  pub struct StringValue {
      /// The string value.
@@ -2686,15 +2708,16 @@ pub enum MetadataValue {
  }
  
  impl MetadataEntry {
-    fn into_pivot_table(self) -> PivotTable {
+    pub fn into_pivot_table(self) -> PivotTable {
          let mut data = Vec::new();
          let group = match self.visit(&mut data) {
              Category::Group(group) => group,
-            Category::Leaf(leaf) => Group::new("Metadata").with(leaf),
+            Category::Leaf(leaf) => Group::new("Metadata").with(leaf).with_label_shown(),
          };
          PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
              data.into_iter()
                  .enumerate()
+                .filter(|(_row, value)| !value.is_empty())
                  .map(|(row, value)| ([row], value)),
          )
      }
diff --git a/rust/pspp/src/output/text.rs b/rust/pspp/src/output/text.rs

index 61b49a357c507b793544a4f1d1bf731f3a17b232..1a5aad6d8e2bf3c527477dec2ed85a5d3c63b551 100644 (file)
--- a/rust/pspp/src/output/text.rs
+++ b/rust/pspp/src/output/text.rs
@@ -60,7 +60,7 @@ impl Boxes {
  #[derive(Clone, Debug, Deserialize, Serialize)]
  pub struct TextConfig {
      /// Output file name.
-    file: PathBuf,
+    file: Option<PathBuf>,
  
      /// Renderer config.
      #[serde(flatten)]
@@ -365,7 +365,10 @@ pub struct TextDriver {
  impl TextDriver {
      pub fn new(config: &TextConfig) -> std::io::Result<TextDriver> {
          Ok(Self {
-            file: BufWriter::new(File::create(&config.file)?),
+            file: BufWriter::new(match &config.file {
+                Some(file) => File::create(&file)?,
+                None => File::options().write(true).open("/dev/stdout")?,
+            }),
              renderer: TextRenderer::new(&config.options),
          })
      }
diff --git a/rust/pspp/src/sys/encoding.rs b/rust/pspp/src/sys/encoding.rs

index 29a4f9e45f1486633b1cbe31aaeab5ac552cb3b1..4e7468829b39bc0b210b8a2058769ef6b221523f 100644 (file)
--- a/rust/pspp/src/sys/encoding.rs
+++ b/rust/pspp/src/sys/encoding.rs
@@ -22,6 +22,7 @@ use std::sync::LazyLock;
  
  use crate::locale_charset::locale_charset;
  use encoding_rs::{Encoding, UTF_8};
+use serde::Serialize;
  use thiserror::Error as ThisError;
  
  include!(concat!(env!("OUT_DIR"), "/encodings.rs"));
@@ -42,7 +43,8 @@ pub fn codepage_from_encoding(encoding: &'static Encoding) -> u32 {
  }
  
  /// An error or warning related to encodings.
-#[derive(Clone, ThisError, Debug, PartialEq, Eq)]
+#[derive(Clone, ThisError, Debug, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
  pub enum Error {
      /// Warning that the system file doesn't indicate its own encoding.
      #[error("This system file does not indicate its own character encoding.  For best results, specify an encoding explicitly.  Use SYSFILE INFO with ENCODING=\"DETECT\" to analyze the possible encodings.")]
@@ -88,29 +90,29 @@ pub fn get_encoding(
      encoding: Option<&str>,
      character_code: Option<i32>,
  ) -> Result<&'static Encoding, Error> {
-    let label = if let Some(encoding) = encoding {
-        encoding
-    } else if let Some(codepage) = character_code {
-        match codepage {
-            1 => return Err(Error::Ebcdic),
-            2 | 3 => {
-                // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
-                // respectively.  However, many files have character code 2 but
-                // data which are clearly not ASCII.  Therefore, ignore these
-                // values.
-                return Err(Error::NoEncoding);
-            }
-            4 => "MS_KANJI",
-            _ => CODEPAGE_NUMBER_TO_NAME
+    fn inner(label: &str) -> Result<&'static Encoding, Error> {
+        Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))
+    }
+
+    match (encoding, character_code) {
+        (Some(encoding), _) => inner(encoding),
+        (None, Some(1)) => Err(Error::Ebcdic),
+        (None, Some(2 | 3)) => {
+            // These ostensibly mean "7-bit ASCII" and "8-bit ASCII"[sic]
+            // respectively.  However, many files have character code 2 but
+            // data which are clearly not ASCII.  Therefore, ignore these
+            // values.
+            Err(Error::NoEncoding)
+        }
+        (None, Some(4)) => inner("MS_KANJI"),
+        (None, Some(codepage)) => inner(
+            CODEPAGE_NUMBER_TO_NAME
                  .get(&codepage)
                  .copied()
                  .ok_or(Error::UnknownCodepage(codepage))?,
-        }
-    } else {
-        return Err(Error::NoEncoding);
-    };
-
-    Encoding::for_label(label.as_bytes()).ok_or(Error::UnknownEncoding(label.into()))
+        ),
+        (None, None) => Err(Error::NoEncoding),
+    }
  }
  
  #[cfg(test)]
diff --git a/rust/pspp/src/sys/raw.rs b/rust/pspp/src/sys/raw.rs

index 75218b794f2031c664f3203b7a76ab70c364731a..747c34761c3615d0b5d7f897eaba241b0ed973f7 100644 (file)
--- a/rust/pspp/src/sys/raw.rs
+++ b/rust/pspp/src/sys/raw.rs
@@ -23,7 +23,10 @@ use crate::{
      data::{ByteStr, ByteString, Datum, RawCase, RawString},
      endian::{FromBytes, ToBytes},
      identifier::{Error as IdError, Identifier},
-    output::pivot::{Axis3, Dimension, Group, PivotTable, Value},
+    output::{
+        pivot::{Axis3, Dimension, Group, PivotTable, Value},
+        Details, Item, Text,
+    },
      sys::{
          encoding::{default_encoding, get_encoding, Error as EncodingError},
          raw::records::{
@@ -1873,13 +1876,225 @@ static ENCODINGS: [&Encoding; 32] = [
      EUC_KR,
  ];
  
+#[derive(Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum EncodingSource {
+    Name,
+    Codepage,
+    Default,
+}
+
+impl EncodingSource {
+    fn as_str(&self) -> &'static str {
+        match self {
+            EncodingSource::Name => "name",
+            EncodingSource::Codepage => "codepage",
+            EncodingSource::Default => "default",
+        }
+    }
+}
+
+#[derive(Serialize)]
  pub struct EncodingReport {
-    pub valid_encodings: PivotTable,
-    pub interpretations: Option<PivotTable>,
+    /// If the file includes a record that names its encoding, then this is the
+    /// name and how PSPP interprets that as an encoding.
+    pub name: Option<(String, Result<&'static Encoding, EncodingError>)>,
+
+    /// If the file includes a record that identifies its encoding as a code
+    /// page number, then this is the number and how PSPP interprets that as an
+    /// encoding.
+    pub codepage: Option<(i32, Result<&'static Encoding, EncodingError>)>,
+
+    /// The overall encoding chosen.
+    pub inferred_encoding: Result<&'static Encoding, EncodingError>,
+
+    /// Why the overall encoding was chosen.
+    pub inferred_encoding_source: EncodingSource,
+
+    /// The encodings that are valid for this file, based on looking at all the
+    /// text data in the file headers.  Each array element is a group of
+    /// encodings that yield the same text data.  If there is only one element,
+    /// then all valid encodings yield the same text data.
+    pub valid_encodings: Vec<Vec<&'static Encoding>>,
+
+    /// Individual strings in the file headers, together with their
+    /// intepretations for each group of valid encodings.  Only strings that
+    /// don't have the same interpretation for every valid encoding are
+    /// included.
+    ///
+    /// If this is empty, then either:
+    ///
+    /// - `valid_encodings` is also empty.  In this case, there are no valid
+    ///   encodings, so there are no strings in the valid encodings.
+    ///
+    /// - `valid_encodings` has one element (one group of valid encodings).  In
+    ///   this case, every valid encoding interprets every string the same way.
+    pub strings: Vec<EncodingReportString>,
+}
+
+impl EncodingReport {
+    fn metadata_pivot_table(&self) -> PivotTable {
+        fn result_to_value(result: &Result<&'static Encoding, EncodingError>) -> Value {
+            match result {
+                Ok(encoding) => encoding.name().into(),
+                Err(error) => error.to_string().into(),
+            }
+        }
+
+        let cols = Group::new("Distinctions")
+            .with("Value")
+            .with("Interpretation");
+        let rows = Group::new("Category")
+            .with("Name")
+            .with("Codepage")
+            .with("Overall");
+        let mut table = PivotTable::new([
+            (Axis3::X, Dimension::new(cols)),
+            (Axis3::Y, Dimension::new(rows)),
+        ])
+        .with_title("Character encoding information found in system file and its interpretation")
+        .with_caption("A system file may identify its character encoding by name or by codepage number or both.  This table states which were found, how each was interpreted, and the overall interpretation.");
+        if let Some((label, result)) = &self.name {
+            table.insert(&[0, 0], label.as_str());
+            table.insert(&[1, 0], result_to_value(result));
+        } else {
+            table.insert(&[0, 0], "(none)");
+        }
+        if let Some((codepage, result)) = &self.codepage {
+            table.insert(&[0, 1], Value::new_integer(Some((*codepage) as f64)));
+            table.insert(&[1, 1], result_to_value(result));
+        } else {
+            table.insert(&[0, 1], "(none)");
+        }
+        table.insert(&[0, 2], self.inferred_encoding_source.as_str());
+        table.insert(&[1, 2], result_to_value(&self.inferred_encoding));
+        table
+    }
+}
+
+impl From<&EncodingReport> for Details {
+    fn from(value: &EncodingReport) -> Self {
+        let mut output: Vec<Item> = vec![value.metadata_pivot_table().into()];
+
+        if !value.valid_encodings.is_empty() {
+            let numbers = Group::new("#")
+                .with_multiple((1..=value.valid_encodings.len()).map(|i| format!("{i}")));
+            output.push(
+                PivotTable::new([(Axis3::Y, Dimension::new(numbers))])
+                    .with_data(
+                        value
+                            .valid_encodings
+                            .iter()
+                            .map(|encodings| {
+                                Value::new_user_text(encodings.iter().map(|e| e.name()).join(", "))
+                            })
+                            .enumerate()
+                            .map(|(index, datum)| ([index], datum)),
+                    )
+                    .into(),
+            );
+
+            if !value.strings.is_empty() {
+                let purposes = Group::with_capacity("Purpose", value.strings.len())
+                    .with_label_shown()
+                    .with_multiple(value.strings.iter().map(|rs| &rs.name));
+                let number = Group::new("Text")
+                    .with_label_shown()
+                    .with_multiple((1..=value.valid_encodings.len()).map(|i| format!("{i}")));
+                output.push(
+                        PivotTable::new([
+                            (Axis3::X, Dimension::new(Group::new("Text").with("Text"))),
+                            (Axis3::Y, Dimension::new(number)),
+                            (Axis3::Y, Dimension::new(purposes)),
+                        ])
+                            .with_title("Alternate Encoded Text Strings")
+                            .with_caption("Text strings in the file dictionary that the previously listed encodings interpret differently, along with the interpretations.")
+                            .with_data(value
+                    .strings
+                    .iter()
+                    .enumerate()
+                    .map(|(purpose, rs)| {
+                        rs.interpretations
+                            .iter()
+                            .enumerate()
+                            .map(move |(encoding, s)| {
+                                (
+                                    [0, encoding, purpose],
+                                    Value::new_user_text(rs.ellipsize(s.as_str())),
+                                )
+                            })
+                    })
+                    .flatten()
+                    .collect::<Vec<_>>()).into(),
+                    );
+            }
+        } else {
+            output.push(Text::new_log("No valid encodings were found.").into());
+        };
+
+        output.into_iter().collect()
+    }
+}
+
+/// All of the (valid) interpretations of a given string in a system file.
+#[derive(Serialize)]
+pub struct EncodingReportString {
+    /// Name for the string, something like "variable name 1".
+    name: String,
+
+    /// If the string's interpretations all start with a common prefix, this is
+    /// it.  Only whole words are considered to be common.
+    common_prefix: String,
+
+    /// All of the interpretations of the string, one per valid encoding, in the
+    /// order of [EncodingReport::valid_encodings].
+    interpretations: Vec<String>,
+
+    /// If the string's interpretations all end with a common suffix, this is
+    /// it.  Only whole words are considered to be common.
+    common_suffix: String,
+}
+
+impl EncodingReportString {
+    fn ellipsize<'a>(&self, s: &'a str) -> Cow<'a, str> {
+        if self.common_prefix.is_empty() && self.common_suffix.is_empty() {
+            Cow::from(s)
+        } else {
+            let mut result = String::with_capacity(s.len() + 6);
+            if !self.common_prefix.is_empty() {
+                result.push_str("...");
+            }
+            result.push_str(s);
+            if !self.common_suffix.is_empty() {
+                result.push_str("...");
+            }
+            Cow::from(result)
+        }
+    }
  }
  
  impl EncodingReport {
-    pub fn new(record_strings: &[RecordString]) -> Option<Self> {
+    pub fn new(header: &FileHeader<ByteString>, records: &[Record]) -> Self {
+        let (encoding, codepage) = get_encoding_info(&records);
+        let label =
+            encoding.map(|encoding| (String::from(encoding), get_encoding(Some(encoding), None)));
+        let codepage = codepage.map(|codepage| (codepage, get_encoding(None, Some(codepage))));
+        let (inferred_encoding_source, inferred_encoding) = match label
+            .as_ref()
+            .map(|(_string, result)| (EncodingSource::Name, result.clone()))
+            .or(codepage
+                .as_ref()
+                .map(|(_codepage, result)| (EncodingSource::Codepage, result.clone())))
+        {
+            Some((source, Ok(encoding))) => (source, Ok(encoding)),
+            Some((source, Err(EncodingError::Ebcdic))) => (source, Err(EncodingError::Ebcdic)),
+            _ => (EncodingSource::Default, Ok(default_encoding())),
+        };
+
+        let mut record_strings = header.get_strings();
+        for record in records {
+            record_strings.append(&mut record.get_strings());
+        }
          let mut encodings: IndexMap<Vec<String>, Vec<&'static Encoding>> = IndexMap::new();
          for encoding in ENCODINGS {
              fn recode_as(
@@ -1899,89 +2114,59 @@ impl EncodingReport {
                  }
                  Some(output)
              }
-            if let Some(strings) = recode_as(record_strings, encoding) {
+            if let Some(strings) = recode_as(&record_strings, encoding) {
                  encodings.entry(strings).or_default().push(encoding);
              }
          }
-        if encodings.is_empty() {
-            return None;
-        }
-
-        let numbers = Group::new("#").with_multiple((1..=encodings.len()).map(|i| format!("{i}")));
-        let valid_encodings = PivotTable::new([(Axis3::Y, Dimension::new(numbers))]).with_data(
-            encodings
-                .values()
-                .map(|encodings| {
-                    Value::new_user_text(encodings.iter().map(|e| e.name()).join(", "))
-                })
-                .enumerate()
-                .map(|(index, datum)| ([index], datum)),
-        );
-
-        let mut purposes = Group::new("Purpose").with_label_shown();
-        let mut data = Vec::new();
-        for (index, rs) in record_strings.iter().enumerate() {
-            // Skip strings that decode the same way from every encoding.
-            if encodings.keys().map(|strings| &strings[index]).all_equal() {
-                continue;
-            }
  
-            /// Returns an iterator for the decoded strings for the given
-            /// `index`.
-            fn decoded_index<'a>(
-                encodings: &'a IndexMap<Vec<String>, Vec<&'static Encoding>>,
-                index: usize,
-            ) -> impl Iterator<Item = &'a str> {
-                encodings.keys().map(move |strings| strings[index].as_str())
-            }
-
-            let common_prefix = decoded_index(&encodings, index)
-                .reduce(common_prefix)
-                .unwrap()
-                .trim_end_matches(|c| c != ' ')
-                .len();
-            let common_suffix = decoded_index(&encodings, index)
-                .reduce(common_suffix)
-                .unwrap()
-                .trim_start_matches(|c| c != ' ')
-                .len();
-
-            let purpose = purposes.push(&rs.title);
-
-            for (j, s) in decoded_index(&encodings, index).enumerate() {
-                let s = &s[common_prefix..s.len() - common_suffix];
-                let mut entry = String::with_capacity(s.len() + 6);
-                if common_prefix > 0 {
-                    entry.push_str("...");
+        let mut strings = Vec::with_capacity(record_strings.len());
+        if !encodings.is_empty() {
+            for (index, rs) in record_strings.iter().enumerate() {
+                // Skip strings that decode the same way from every encoding.
+                if encodings.keys().map(|strings| &strings[index]).all_equal() {
+                    continue;
                  }
-                entry.push_str(s);
-                if common_suffix > 0 {
-                    entry.push_str("...");
+
+                /// Returns an iterator for the decoded strings for the given
+                /// `index`.
+                fn decoded_index<'a>(
+                    encodings: &'a IndexMap<Vec<String>, Vec<&'static Encoding>>,
+                    index: usize,
+                ) -> impl Iterator<Item = &'a str> {
+                    encodings.keys().map(move |strings| strings[index].as_str())
                  }
-                data.push(([0, j, purpose], Value::new_user_text(entry)));
+
+                let common_prefix: String = decoded_index(&encodings, index)
+                    .reduce(common_prefix)
+                    .unwrap()
+                    .trim_end_matches(|c| c != ' ')
+                    .into();
+                let common_suffix: String = decoded_index(&encodings, index)
+                    .reduce(common_suffix)
+                    .unwrap()
+                    .trim_start_matches(|c| c != ' ')
+                    .into();
+
+                let interpretations = decoded_index(&encodings, index)
+                    .map(|s| s[common_prefix.len()..s.len() - common_suffix.len()].into())
+                    .collect();
+
+                strings.push(EncodingReportString {
+                    name: rs.title.clone(),
+                    common_prefix,
+                    interpretations,
+                    common_suffix,
+                });
              }
          }
-        let number = Group::new("Text")
-            .with_label_shown()
-            .with_multiple((1..=encodings.len()).map(|i| format!("{i}")));
-        let interpretations = if !data.is_empty() {
-            Some(
-                PivotTable::new([
-                    (Axis3::X, Dimension::new(Group::new("Text").with("Text"))),
-                    (Axis3::Y, Dimension::new(number)),
-                    (Axis3::Y, Dimension::new(purposes)),
-                ])
-                .with_title("Alternate Encoded Text Strings")
-                .with_caption("Text strings in the file dictionary that the previously listed encodings interpret differently, along with the interpretations.")
-                .with_data(data),
-            )
-        } else {
-            None
-        };
-        Some(Self {
-            valid_encodings,
-            interpretations,
-        })
+        EncodingReport {
+            valid_encodings: encodings.values().cloned().collect(),
+            strings,
+            name: label,
+            codepage,
+            inferred_encoding,
+            inferred_encoding_source,
+        }
      }
  }
author	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 24 Aug 2025 00:36:59 +0000 (17:36 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Sun, 24 Aug 2025 00:36:59 +0000 (17:36 -0700)
rust/pspp/src/main.rs		patch \| blob \| history
rust/pspp/src/output/mod.rs		patch \| blob \| history
rust/pspp/src/output/pivot/mod.rs		patch \| blob \| history
rust/pspp/src/output/text.rs		patch \| blob \| history
rust/pspp/src/sys/encoding.rs		patch \| blob \| history
rust/pspp/src/sys/raw.rs		patch \| blob \| history