rust: Rename `mod.rs` files to reflect module names.

author Ben Pfaff <blp@cs.stanford.edu>

Wed, 3 Sep 2025 18:24:28 +0000 (11:24 -0700)

committer Ben Pfaff <blp@cs.stanford.edu>

Wed, 3 Sep 2025 18:24:28 +0000 (11:24 -0700)
author Ben Pfaff <blp@cs.stanford.edu>
Wed, 3 Sep 2025 18:24:28 +0000 (11:24 -0700)
committer Ben Pfaff <blp@cs.stanford.edu>
Wed, 3 Sep 2025 18:24:28 +0000 (11:24 -0700)
diff --git a/rust/pspp/src/command.rs b/rust/pspp/src/command.rs

new file mode 100644 (file)

index 0000000..5f0d1ec
--- /dev/null
+++ b/rust/pspp/src/command.rs
@@ -0,0 +1,959 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#![allow(dead_code)]
+use std::{
+    fmt::{Debug, Write},
+    ops::RangeFrom,
+    sync::OnceLock,
+};
+
+use crosstabs::crosstabs_command;
+use ctables::ctables_command;
+use data_list::data_list_command;
+use descriptives::descriptives_command;
+use either::Either;
+use flagset::{flags, FlagSet};
+use pspp_derive::FromTokens;
+
+use crate::{
+    format::AbstractFormat,
+    identifier::Identifier,
+    integer::ToInteger,
+    lex::{
+        command_name::CommandMatcher,
+        lexer::{LexToken, TokenSlice},
+        Punct, Token,
+    },
+    message::{Diagnostic, Diagnostics},
+};
+
+pub mod crosstabs;
+pub mod ctables;
+pub mod data_list;
+pub mod descriptives;
+
+flags! {
+    enum State: u8 {
+        /// No active dataset yet defined.
+        Initial,
+
+        /// Active dataset has been defined.
+        Data,
+
+        /// Inside `INPUT PROGRAM`.
+        InputProgram,
+
+        /// Inside `FILE TYPE`.
+        FileType,
+
+        /// State nested inside `LOOP` or `DO IF`, inside [State::Data].
+        NestedData,
+
+        /// State nested inside `LOOP` or `DO IF`, inside [State::InputProgram].
+        NestedInputProgram,
+    }
+}
+
+struct Command {
+    allowed_states: FlagSet<State>,
+    enhanced_only: bool,
+    testing_only: bool,
+    no_abbrev: bool,
+    name: &'static str,
+    run: Box<dyn Fn(&mut Context) + Send + Sync>, //-> Box<dyn ParsedCommand> + Send + Sync>,
+}
+
+#[derive(Debug)]
+enum ParseError {
+    Error(Diagnostics),
+    Mismatch(Diagnostics),
+}
+
+#[derive(Debug)]
+struct Parsed<T> {
+    value: T,
+    rest: TokenSlice,
+    diagnostics: Diagnostics,
+}
+
+impl<T> Parsed<T> {
+    pub fn new(value: T, rest: TokenSlice, warnings: Diagnostics) -> Self {
+        Self {
+            value,
+            rest,
+            diagnostics: warnings,
+        }
+    }
+    pub fn ok(value: T, rest: TokenSlice) -> Self {
+        Self {
+            value,
+            rest,
+            diagnostics: Diagnostics::default(),
+        }
+    }
+    pub fn into_tuple(self) -> (T, TokenSlice, Diagnostics) {
+        (self.value, self.rest, self.diagnostics)
+    }
+    pub fn take_diagnostics(self, d: &mut Diagnostics) -> (T, TokenSlice) {
+        let (value, rest, mut diagnostics) = self.into_tuple();
+        d.0.append(&mut diagnostics.0);
+        (value, rest)
+    }
+    pub fn map<F, R>(self, f: F) -> Parsed<R>
+    where
+        F: FnOnce(T) -> R,
+    {
+        Parsed {
+            value: f(self.value),
+            rest: self.rest,
+            diagnostics: self.diagnostics,
+        }
+    }
+    pub fn warn(self, mut warnings: Diagnostics) -> Self {
+        Self {
+            value: self.value,
+            rest: self.rest,
+            diagnostics: {
+                let mut vec = self.diagnostics.0;
+                vec.append(&mut warnings.0);
+                Diagnostics(vec)
+            },
+        }
+    }
+}
+
+type ParseResult<T> = Result<Parsed<T>, ParseError>;
+
+trait MismatchToError {
+    fn mismatch_to_error(self) -> Self;
+}
+
+impl<T> MismatchToError for ParseResult<T> {
+    fn mismatch_to_error(self) -> Self {
+        match self {
+            Err(ParseError::Mismatch(diagnostic)) => Err(ParseError::Error(diagnostic)),
+            rest => rest,
+        }
+    }
+}
+
+trait FromTokens {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized;
+}
+
+impl<T> FromTokens for Option<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        match T::from_tokens(input) {
+            Ok(p) => Ok(p.map(Some)),
+            Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input.clone())),
+            Err(ParseError::Error(error)) => Err(ParseError::Error(error)),
+        }
+    }
+}
+
+impl<L, R> FromTokens for Either<L, R>
+where
+    L: FromTokens,
+    R: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        match L::from_tokens(input) {
+            Ok(p) => Ok(p.map(Either::Left)),
+            Err(ParseError::Mismatch(_)) => Ok(R::from_tokens(input)?.map(Either::Right)),
+            Err(ParseError::Error(error)) => Err(ParseError::Error(error)),
+        }
+    }
+}
+
+impl<A, B> FromTokens for (A, B)
+where
+    A: FromTokens,
+    B: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple();
+        let (b, rest, mut diagnostics2) = B::from_tokens(&input)?.into_tuple();
+        diagnostics.0.append(&mut diagnostics2.0);
+        Ok(Parsed::new((a, b), rest, diagnostics))
+    }
+}
+
+impl<A, B, C> FromTokens for (A, B, C)
+where
+    A: FromTokens,
+    B: FromTokens,
+    C: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple();
+        let (b, input, mut diagnostics2) = B::from_tokens(&input)?.into_tuple();
+        let (c, rest, mut diagnostics3) = C::from_tokens(&input)?.into_tuple();
+        diagnostics.0.append(&mut diagnostics2.0);
+        diagnostics.0.append(&mut diagnostics3.0);
+        Ok(Parsed::new((a, b, c), rest, diagnostics))
+    }
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "/")]
+pub struct Slash;
+
+#[derive(Debug)]
+pub struct Comma;
+
+impl FromTokens for Comma {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        _parse_token(input, &Token::Punct(Punct::Comma)).map(|p| p.map(|_| Comma))
+    }
+}
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "=")]
+pub struct Equals;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "&")]
+pub struct And;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = ">")]
+pub struct Gt;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "+")]
+pub struct Plus;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "-")]
+pub struct Dash;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "*")]
+pub struct Asterisk;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+#[pspp(syntax = "**")]
+pub struct Exp;
+
+#[derive(Debug, pspp_derive::FromTokens)]
+struct By;
+
+pub struct Punctuated<T, P = Option<Comma>> {
+    head: Vec<(T, P)>,
+    tail: Option<T>,
+}
+
+impl<T, P> Debug for Punctuated<T, P>
+where
+    T: Debug,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "[")?;
+        for (index, item) in self
+            .head
+            .iter()
+            .map(|(t, _p)| t)
+            .chain(self.tail.iter())
+            .enumerate()
+        {
+            if index > 0 {
+                write!(f, ", ")?;
+            }
+            write!(f, "{item:?}")?;
+        }
+        write!(f, "]")
+    }
+}
+
+impl<T, P> FromTokens for Punctuated<T, P>
+where
+    T: FromTokens,
+    P: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let mut head = Vec::new();
+        let mut warnings_vec = Vec::new();
+        let mut input = input.clone();
+        let tail = loop {
+            let t = match T::from_tokens(&input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    warnings_vec.append(&mut warnings.0);
+                    input = rest;
+                    value
+                }
+                Err(ParseError::Mismatch(_)) => break None,
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            };
+            let p = match P::from_tokens(&input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    warnings_vec.append(&mut warnings.0);
+                    input = rest;
+                    value
+                }
+                Err(ParseError::Mismatch(_)) => break Some(t),
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            };
+            head.push((t, p));
+        };
+        Ok(Parsed {
+            value: Punctuated { head, tail },
+            rest: input,
+            diagnostics: Diagnostics(warnings_vec),
+        })
+    }
+}
+
+impl<T> FromTokens for Box<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        T::from_tokens(input).map(|p| p.map(|value| Box::new(value)))
+    }
+}
+
+pub struct Subcommands<T>(Vec<T>);
+
+impl<T> Debug for Subcommands<T>
+where
+    T: Debug,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "Subcommands[")?;
+        for (index, item) in self.0.iter().enumerate() {
+            if index > 0 {
+                writeln!(f, ",")?;
+            }
+            write!(f, "{item:?}")?;
+        }
+        write!(f, "]")
+    }
+}
+
+impl<T> FromTokens for Subcommands<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let mut items = Vec::new();
+        let mut diagnostics = Vec::new();
+        let mut input = input.clone();
+        loop {
+            let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash));
+            if start.is_empty() {
+                break;
+            }
+            let end = start.skip_to(&Token::Punct(Punct::Slash));
+            let subcommand = start.subslice(0..start.len() - end.len());
+            match T::from_tokens(&subcommand) {
+                Ok(p) => {
+                    let (value, rest, mut d) = p.into_tuple();
+                    items.push(value);
+                    diagnostics.append(&mut d.0);
+                    if !rest.is_empty() {
+                        diagnostics.push(rest.warning("Syntax error expecting end of subcommand."));
+                    }
+                }
+                Err(ParseError::Error(mut d) | ParseError::Mismatch(mut d)) => {
+                    diagnostics.append(&mut d.0);
+                }
+            }
+            input = end;
+        }
+        println!("{diagnostics:?}");
+        Ok(Parsed {
+            value: Subcommands(items),
+            rest: input,
+            diagnostics: Diagnostics(diagnostics),
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct Seq0<T>(Vec<T>);
+
+impl<T> FromTokens for Seq0<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let mut values_vec = Vec::new();
+        let mut warnings_vec = Vec::new();
+        let mut input = input.clone();
+        while !input.is_empty() {
+            match T::from_tokens(&input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    warnings_vec.append(&mut warnings.0);
+                    if input.len() == rest.len() {
+                        break;
+                    }
+                    values_vec.push(value);
+                    input = rest;
+                }
+                Err(ParseError::Mismatch(_)) => break,
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            }
+        }
+        Ok(Parsed {
+            value: Seq0(values_vec),
+            rest: input,
+            diagnostics: Diagnostics(warnings_vec),
+        })
+    }
+}
+
+#[derive(Debug)]
+pub struct Seq1<T>(Vec<T>);
+
+impl<T> FromTokens for Seq1<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let mut values_vec = Vec::new();
+        let mut warnings_vec = Vec::new();
+        let mut input = input.clone();
+        while !input.is_empty() {
+            match T::from_tokens(&input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    warnings_vec.append(&mut warnings.0);
+                    if input.len() == rest.len() {
+                        break;
+                    }
+                    values_vec.push(value);
+                    input = rest;
+                }
+                Err(ParseError::Mismatch(_)) => break,
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            }
+        }
+        if values_vec.is_empty() {
+            return Err(ParseError::Mismatch(input.error("Syntax error.").into()));
+        }
+        Ok(Parsed {
+            value: Seq1(values_vec),
+            rest: input,
+            diagnostics: Diagnostics(warnings_vec),
+        })
+    }
+}
+
+/*
+impl<T> FromTokens for Vec<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(mut input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let mut values_vec = Vec::new();
+        let mut warnings_vec = Vec::new();
+        while !input.is_empty() {
+            match T::from_tokens(input) {
+                Ok(Parsed {
+                    value,
+                    rest,
+                    diagnostics: mut warnings,
+                }) => {
+                    values_vec.push(value);
+                    warnings_vec.append(&mut warnings.0);
+                    input = rest;
+                }
+                Err(ParseError::Mismatch(_)) => break,
+                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
+            }
+        }
+        Ok(Parsed {
+            value: values_vec,
+            rest: input,
+            diagnostics: Diagnostics(warnings_vec),
+        })
+    }
+}*/
+
+impl FromTokens for TokenSlice {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        Ok(Parsed::ok(input.clone(), input.end()))
+    }
+}
+
+#[derive(Debug)]
+struct Subcommand<T>(pub T);
+
+impl<T> FromTokens for Subcommand<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash));
+        if start.is_empty() {
+            return Err(ParseError::Error(
+                input.error("Syntax error at end of input.").into(),
+            ));
+        }
+        let end = start.skip_to(&Token::Punct(Punct::Slash));
+        let subcommand = start.subslice(0..start.len() - end.len());
+        let (value, rest, mut warnings) = T::from_tokens(&subcommand)?.into_tuple();
+        if !rest.is_empty() {
+            warnings
+                .0
+                .push(rest.warning("Syntax error expecting end of subcommand."));
+        }
+        Ok(Parsed::new(Self(value), end, warnings))
+    }
+}
+
+#[derive(Debug)]
+struct InParens<T>(pub T);
+
+impl<T> FromTokens for InParens<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LParen))?.into_tuple();
+        let (value, rest, warnings) = T::from_tokens(&rest)?.into_tuple();
+        let ((), rest, _) = parse_token(&rest, &Token::Punct(Punct::RParen))?.into_tuple();
+        Ok(Parsed {
+            value: Self(value),
+            rest,
+            diagnostics: warnings,
+        })
+    }
+}
+
+#[derive(Debug)]
+struct InSquares<T>(pub T);
+
+impl<T> FromTokens for InSquares<T>
+where
+    T: FromTokens,
+{
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LSquare))?.into_tuple();
+        let (value, rest, warnings) = T::from_tokens(&rest)?.into_tuple();
+        let ((), rest, _) = parse_token(&rest, &Token::Punct(Punct::RSquare))?.into_tuple();
+        Ok(Parsed {
+            value: Self(value),
+            rest,
+            diagnostics: warnings,
+        })
+    }
+}
+
+fn parse_token_if<F, R>(input: &TokenSlice, parse: F) -> ParseResult<R>
+where
+    F: Fn(&Token) -> Option<R>,
+{
+    if let Some(token) = input.get_token(0) {
+        if let Some(result) = parse(token) {
+            return Ok(Parsed::ok(result, input.subslice(1..input.len())));
+        }
+    }
+    Err(ParseError::Mismatch(Diagnostics::default()))
+}
+
+fn _parse_token(input: &TokenSlice, token: &Token) -> ParseResult<Token> {
+    if let Some(rest) = input.skip(token) {
+        Ok(Parsed::ok(input.first().token.clone(), rest))
+    } else {
+        Err(ParseError::Mismatch(
+            input.error(format!("expecting {token}")).into(),
+        ))
+    }
+}
+
+fn parse_token(input: &TokenSlice, token: &Token) -> ParseResult<()> {
+    if let Some(rest) = input.skip(token) {
+        Ok(Parsed::ok((), rest))
+    } else {
+        Err(ParseError::Mismatch(
+            input.error(format!("expecting {token}")).into(),
+        ))
+    }
+}
+
+fn parse_syntax(input: &TokenSlice, syntax: &str) -> ParseResult<()> {
+    if let Some(rest) = input.skip_syntax(syntax) {
+        Ok(Parsed::ok((), rest))
+    } else {
+        Err(ParseError::Mismatch(
+            input.error(format!("expecting {syntax}")).into(),
+        ))
+    }
+}
+
+pub type VarList = Punctuated<VarRange>;
+
+pub struct Number(f64);
+
+impl Debug for Number {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self.0)
+    }
+}
+
+impl FromTokens for Number {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        parse_token_if(input, |token| token.as_number().map(Number))
+            .map_err(|_| ParseError::Mismatch(input.error(String::from("expecting number")).into()))
+    }
+}
+
+#[derive(Debug)]
+pub struct Integer(i64);
+
+impl FromTokens for Integer {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        parse_token_if(input, |token| token.as_integer().map(Integer)).map_err(|_| {
+            ParseError::Mismatch(input.error(String::from("expecting integer")).into())
+        })
+    }
+}
+
+pub enum VarRange {
+    Single(Identifier),
+    Range(Identifier, Identifier),
+    All,
+}
+
+impl Debug for VarRange {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Single(var) => write!(f, "{var:?}"),
+            Self::Range(from, to) => write!(f, "{from:?} TO {to:?}"),
+            Self::All => write!(f, "ALL"),
+        }
+    }
+}
+
+impl FromTokens for VarRange {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        if let Ok(Parsed { rest, .. }) = parse_token(input, &Token::Punct(Punct::All)) {
+            Ok(Parsed::ok(Self::All, rest))
+        } else {
+            let (from, rest, _) = parse_id(input)?.into_tuple();
+            if let Ok(Parsed { rest, .. }) = parse_token(&rest, &Token::Punct(Punct::To)) {
+                if let Ok(p) = parse_id(&rest) {
+                    return Ok(p.map(|to| Self::Range(from, to)));
+                }
+            }
+            Ok(Parsed::ok(Self::Single(from), rest))
+        }
+    }
+}
+
+fn parse_id(input: &TokenSlice) -> ParseResult<Identifier> {
+    let mut iter = input.iter();
+    if let Some(LexToken {
+        token: Token::Id(id),
+        ..
+    }) = iter.next()
+    {
+        Ok(Parsed::ok(id.clone(), iter.remainder()))
+    } else {
+        Err(ParseError::Mismatch(
+            input.error("Syntax error expecting identifier.").into(),
+        ))
+    }
+}
+
+fn parse_format(input: &TokenSlice) -> ParseResult<AbstractFormat> {
+    let mut iter = input.iter();
+    if let Some(LexToken {
+        token: Token::Id(id),
+        ..
+    }) = iter.next()
+    {
+        if let Ok(format) = id.0.as_ref().parse() {
+            return Ok(Parsed::ok(format, iter.remainder()));
+        }
+    }
+    Err(ParseError::Mismatch(
+        input.error("Syntax error expecting identifier.").into(),
+    ))
+}
+
+fn parse_string(input: &TokenSlice) -> ParseResult<String> {
+    let mut iter = input.iter();
+    if let Some(LexToken {
+        token: Token::String(s),
+        ..
+    }) = iter.next()
+    {
+        Ok(Parsed::ok(s.clone(), iter.remainder()))
+    } else {
+        Err(ParseError::Mismatch(
+            input.error("Syntax error expecting identifier.").into(),
+        ))
+    }
+}
+
+impl FromTokens for Identifier {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        parse_id(input)
+    }
+}
+
+impl FromTokens for String {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        parse_string(input)
+    }
+}
+
+impl FromTokens for AbstractFormat {
+    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
+    where
+        Self: Sized,
+    {
+        parse_format(input)
+    }
+}
+
+fn collect_subcommands(src: TokenSlice) -> Vec<TokenSlice> {
+    src.split(|token| token.token == Token::Punct(Punct::Slash))
+        .filter(|slice| !slice.is_empty())
+        .collect()
+}
+
+fn commands() -> &'static [Command] {
+    fn new_commands() -> Vec<Command> {
+        vec![
+            descriptives_command(),
+            crosstabs_command(),
+            ctables_command(),
+            data_list_command(),
+            Command {
+                allowed_states: FlagSet::full(),
+                enhanced_only: false,
+                testing_only: false,
+                no_abbrev: false,
+                name: "ECHO",
+                run: Box::new(|_context| todo!()),
+            },
+        ]
+    }
+
+    static COMMANDS: OnceLock<Vec<Command>> = OnceLock::new();
+    COMMANDS.get_or_init(new_commands).as_slice()
+}
+
+fn parse_command_word(lexer: &mut TokenSlice, s: &mut String, n: usize) -> bool {
+    let separator = match s.chars().next_back() {
+        Some(c) if c != '-' => " ",
+        _ => "",
+    };
+
+    match lexer.get_token(n) {
+        Some(Token::Punct(Punct::Dash)) => {
+            s.push('-');
+            true
+        }
+        Some(Token::Id(id)) => {
+            write!(s, "{separator}{id}").unwrap();
+            true
+        }
+        Some(Token::Number(number)) if number.is_sign_positive() => {
+            if let Some(integer) = number.to_exact_usize() {
+                write!(s, "{separator}{integer}").unwrap();
+                true
+            } else {
+                false
+            }
+        }
+        _ => false,
+    }
+}
+
+fn find_best_match(s: &str) -> (Option<&'static Command>, isize) {
+    let mut cm = CommandMatcher::new(s);
+    for command in commands() {
+        cm.add(command.name, command);
+    }
+    cm.get_match()
+}
+
+fn parse_command_name(
+    lexer: &mut TokenSlice,
+    error: &dyn Fn(Diagnostic),
+) -> Result<(&'static Command, usize), ()> {
+    let mut s = String::new();
+    let mut word = 0;
+    let mut missing_words = 0;
+    let mut command = None;
+    while parse_command_word(lexer, &mut s, word) {
+        (command, missing_words) = find_best_match(&s);
+        if missing_words <= 0 {
+            break;
+        }
+        word += 1;
+    }
+    if command.is_none() && missing_words > 0 {
+        s.push_str(" .");
+        (command, missing_words) = find_best_match(&s);
+        s.truncate(s.len() - 2);
+    }
+
+    match command {
+        Some(command) => Ok((command, ((word as isize + 1) + missing_words) as usize)),
+        None => {
+            if word == 0 {
+                error(
+                    lexer
+                        .subslice(0..1)
+                        .error("Syntax error expecting command name"),
+                )
+            } else {
+                error(lexer.subslice(0..word + 1).error("Unknown command `{s}`."))
+            };
+            Err(())
+        }
+    }
+}
+
+pub enum Success {
+    Success,
+    Eof,
+    Finish,
+}
+
+pub fn end_of_command(context: &Context, range: RangeFrom<usize>) -> Result<Success, ()> {
+    match context.lexer.get_token(range.start) {
+        None | Some(Token::End) => Ok(Success::Success),
+        _ => {
+            context.error(
+                context
+                    .lexer
+                    .subslice(range.start..context.lexer.len())
+                    .error("Syntax error expecting end of command."),
+            );
+            Err(())
+        }
+    }
+}
+
+fn parse_in_state(mut lexer: TokenSlice, error: &dyn Fn(Diagnostic), _state: State) {
+    match lexer.get_token(0) {
+        None | Some(Token::End) => (),
+        _ => match parse_command_name(&mut lexer, error) {
+            Ok((command, n_tokens)) => {
+                let mut context = Context {
+                    error,
+                    lexer: lexer.subslice(n_tokens..lexer.len()),
+                    command_name: Some(command.name),
+                };
+                (command.run)(&mut context);
+            }
+            Err(error) => println!("{error:?}"),
+        },
+    }
+}
+
+pub fn parse_command(lexer: TokenSlice, error: &dyn Fn(Diagnostic)) {
+    parse_in_state(lexer, error, State::Initial)
+}
+
+pub struct Context<'a> {
+    error: &'a dyn Fn(Diagnostic),
+    lexer: TokenSlice,
+    command_name: Option<&'static str>,
+}
+
+impl Context<'_> {
+    pub fn error(&self, diagnostic: Diagnostic) {
+        (self.error)(diagnostic);
+    }
+}
diff --git a/rust/pspp/src/command/mod.rs b/rust/pspp/src/command/mod.rs

deleted file mode 100644 (file)

index 5f0d1ec..0000000
--- a/rust/pspp/src/command/mod.rs
+++ /dev/null
@@ -1,959 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-#![allow(dead_code)]
-use std::{
-    fmt::{Debug, Write},
-    ops::RangeFrom,
-    sync::OnceLock,
-};
-
-use crosstabs::crosstabs_command;
-use ctables::ctables_command;
-use data_list::data_list_command;
-use descriptives::descriptives_command;
-use either::Either;
-use flagset::{flags, FlagSet};
-use pspp_derive::FromTokens;
-
-use crate::{
-    format::AbstractFormat,
-    identifier::Identifier,
-    integer::ToInteger,
-    lex::{
-        command_name::CommandMatcher,
-        lexer::{LexToken, TokenSlice},
-        Punct, Token,
-    },
-    message::{Diagnostic, Diagnostics},
-};
-
-pub mod crosstabs;
-pub mod ctables;
-pub mod data_list;
-pub mod descriptives;
-
-flags! {
-    enum State: u8 {
-        /// No active dataset yet defined.
-        Initial,
-
-        /// Active dataset has been defined.
-        Data,
-
-        /// Inside `INPUT PROGRAM`.
-        InputProgram,
-
-        /// Inside `FILE TYPE`.
-        FileType,
-
-        /// State nested inside `LOOP` or `DO IF`, inside [State::Data].
-        NestedData,
-
-        /// State nested inside `LOOP` or `DO IF`, inside [State::InputProgram].
-        NestedInputProgram,
-    }
-}
-
-struct Command {
-    allowed_states: FlagSet<State>,
-    enhanced_only: bool,
-    testing_only: bool,
-    no_abbrev: bool,
-    name: &'static str,
-    run: Box<dyn Fn(&mut Context) + Send + Sync>, //-> Box<dyn ParsedCommand> + Send + Sync>,
-}
-
-#[derive(Debug)]
-enum ParseError {
-    Error(Diagnostics),
-    Mismatch(Diagnostics),
-}
-
-#[derive(Debug)]
-struct Parsed<T> {
-    value: T,
-    rest: TokenSlice,
-    diagnostics: Diagnostics,
-}
-
-impl<T> Parsed<T> {
-    pub fn new(value: T, rest: TokenSlice, warnings: Diagnostics) -> Self {
-        Self {
-            value,
-            rest,
-            diagnostics: warnings,
-        }
-    }
-    pub fn ok(value: T, rest: TokenSlice) -> Self {
-        Self {
-            value,
-            rest,
-            diagnostics: Diagnostics::default(),
-        }
-    }
-    pub fn into_tuple(self) -> (T, TokenSlice, Diagnostics) {
-        (self.value, self.rest, self.diagnostics)
-    }
-    pub fn take_diagnostics(self, d: &mut Diagnostics) -> (T, TokenSlice) {
-        let (value, rest, mut diagnostics) = self.into_tuple();
-        d.0.append(&mut diagnostics.0);
-        (value, rest)
-    }
-    pub fn map<F, R>(self, f: F) -> Parsed<R>
-    where
-        F: FnOnce(T) -> R,
-    {
-        Parsed {
-            value: f(self.value),
-            rest: self.rest,
-            diagnostics: self.diagnostics,
-        }
-    }
-    pub fn warn(self, mut warnings: Diagnostics) -> Self {
-        Self {
-            value: self.value,
-            rest: self.rest,
-            diagnostics: {
-                let mut vec = self.diagnostics.0;
-                vec.append(&mut warnings.0);
-                Diagnostics(vec)
-            },
-        }
-    }
-}
-
-type ParseResult<T> = Result<Parsed<T>, ParseError>;
-
-trait MismatchToError {
-    fn mismatch_to_error(self) -> Self;
-}
-
-impl<T> MismatchToError for ParseResult<T> {
-    fn mismatch_to_error(self) -> Self {
-        match self {
-            Err(ParseError::Mismatch(diagnostic)) => Err(ParseError::Error(diagnostic)),
-            rest => rest,
-        }
-    }
-}
-
-trait FromTokens {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized;
-}
-
-impl<T> FromTokens for Option<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        match T::from_tokens(input) {
-            Ok(p) => Ok(p.map(Some)),
-            Err(ParseError::Mismatch(_)) => Ok(Parsed::ok(None, input.clone())),
-            Err(ParseError::Error(error)) => Err(ParseError::Error(error)),
-        }
-    }
-}
-
-impl<L, R> FromTokens for Either<L, R>
-where
-    L: FromTokens,
-    R: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        match L::from_tokens(input) {
-            Ok(p) => Ok(p.map(Either::Left)),
-            Err(ParseError::Mismatch(_)) => Ok(R::from_tokens(input)?.map(Either::Right)),
-            Err(ParseError::Error(error)) => Err(ParseError::Error(error)),
-        }
-    }
-}
-
-impl<A, B> FromTokens for (A, B)
-where
-    A: FromTokens,
-    B: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple();
-        let (b, rest, mut diagnostics2) = B::from_tokens(&input)?.into_tuple();
-        diagnostics.0.append(&mut diagnostics2.0);
-        Ok(Parsed::new((a, b), rest, diagnostics))
-    }
-}
-
-impl<A, B, C> FromTokens for (A, B, C)
-where
-    A: FromTokens,
-    B: FromTokens,
-    C: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let (a, input, mut diagnostics) = A::from_tokens(input)?.into_tuple();
-        let (b, input, mut diagnostics2) = B::from_tokens(&input)?.into_tuple();
-        let (c, rest, mut diagnostics3) = C::from_tokens(&input)?.into_tuple();
-        diagnostics.0.append(&mut diagnostics2.0);
-        diagnostics.0.append(&mut diagnostics3.0);
-        Ok(Parsed::new((a, b, c), rest, diagnostics))
-    }
-}
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "/")]
-pub struct Slash;
-
-#[derive(Debug)]
-pub struct Comma;
-
-impl FromTokens for Comma {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        _parse_token(input, &Token::Punct(Punct::Comma)).map(|p| p.map(|_| Comma))
-    }
-}
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "=")]
-pub struct Equals;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "&")]
-pub struct And;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = ">")]
-pub struct Gt;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "+")]
-pub struct Plus;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "-")]
-pub struct Dash;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "*")]
-pub struct Asterisk;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-#[pspp(syntax = "**")]
-pub struct Exp;
-
-#[derive(Debug, pspp_derive::FromTokens)]
-struct By;
-
-pub struct Punctuated<T, P = Option<Comma>> {
-    head: Vec<(T, P)>,
-    tail: Option<T>,
-}
-
-impl<T, P> Debug for Punctuated<T, P>
-where
-    T: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "[")?;
-        for (index, item) in self
-            .head
-            .iter()
-            .map(|(t, _p)| t)
-            .chain(self.tail.iter())
-            .enumerate()
-        {
-            if index > 0 {
-                write!(f, ", ")?;
-            }
-            write!(f, "{item:?}")?;
-        }
-        write!(f, "]")
-    }
-}
-
-impl<T, P> FromTokens for Punctuated<T, P>
-where
-    T: FromTokens,
-    P: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let mut head = Vec::new();
-        let mut warnings_vec = Vec::new();
-        let mut input = input.clone();
-        let tail = loop {
-            let t = match T::from_tokens(&input) {
-                Ok(Parsed {
-                    value,
-                    rest,
-                    diagnostics: mut warnings,
-                }) => {
-                    warnings_vec.append(&mut warnings.0);
-                    input = rest;
-                    value
-                }
-                Err(ParseError::Mismatch(_)) => break None,
-                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
-            };
-            let p = match P::from_tokens(&input) {
-                Ok(Parsed {
-                    value,
-                    rest,
-                    diagnostics: mut warnings,
-                }) => {
-                    warnings_vec.append(&mut warnings.0);
-                    input = rest;
-                    value
-                }
-                Err(ParseError::Mismatch(_)) => break Some(t),
-                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
-            };
-            head.push((t, p));
-        };
-        Ok(Parsed {
-            value: Punctuated { head, tail },
-            rest: input,
-            diagnostics: Diagnostics(warnings_vec),
-        })
-    }
-}
-
-impl<T> FromTokens for Box<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        T::from_tokens(input).map(|p| p.map(|value| Box::new(value)))
-    }
-}
-
-pub struct Subcommands<T>(Vec<T>);
-
-impl<T> Debug for Subcommands<T>
-where
-    T: Debug,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "Subcommands[")?;
-        for (index, item) in self.0.iter().enumerate() {
-            if index > 0 {
-                writeln!(f, ",")?;
-            }
-            write!(f, "{item:?}")?;
-        }
-        write!(f, "]")
-    }
-}
-
-impl<T> FromTokens for Subcommands<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let mut items = Vec::new();
-        let mut diagnostics = Vec::new();
-        let mut input = input.clone();
-        loop {
-            let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash));
-            if start.is_empty() {
-                break;
-            }
-            let end = start.skip_to(&Token::Punct(Punct::Slash));
-            let subcommand = start.subslice(0..start.len() - end.len());
-            match T::from_tokens(&subcommand) {
-                Ok(p) => {
-                    let (value, rest, mut d) = p.into_tuple();
-                    items.push(value);
-                    diagnostics.append(&mut d.0);
-                    if !rest.is_empty() {
-                        diagnostics.push(rest.warning("Syntax error expecting end of subcommand."));
-                    }
-                }
-                Err(ParseError::Error(mut d) | ParseError::Mismatch(mut d)) => {
-                    diagnostics.append(&mut d.0);
-                }
-            }
-            input = end;
-        }
-        println!("{diagnostics:?}");
-        Ok(Parsed {
-            value: Subcommands(items),
-            rest: input,
-            diagnostics: Diagnostics(diagnostics),
-        })
-    }
-}
-
-#[derive(Debug)]
-pub struct Seq0<T>(Vec<T>);
-
-impl<T> FromTokens for Seq0<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let mut values_vec = Vec::new();
-        let mut warnings_vec = Vec::new();
-        let mut input = input.clone();
-        while !input.is_empty() {
-            match T::from_tokens(&input) {
-                Ok(Parsed {
-                    value,
-                    rest,
-                    diagnostics: mut warnings,
-                }) => {
-                    warnings_vec.append(&mut warnings.0);
-                    if input.len() == rest.len() {
-                        break;
-                    }
-                    values_vec.push(value);
-                    input = rest;
-                }
-                Err(ParseError::Mismatch(_)) => break,
-                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
-            }
-        }
-        Ok(Parsed {
-            value: Seq0(values_vec),
-            rest: input,
-            diagnostics: Diagnostics(warnings_vec),
-        })
-    }
-}
-
-#[derive(Debug)]
-pub struct Seq1<T>(Vec<T>);
-
-impl<T> FromTokens for Seq1<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let mut values_vec = Vec::new();
-        let mut warnings_vec = Vec::new();
-        let mut input = input.clone();
-        while !input.is_empty() {
-            match T::from_tokens(&input) {
-                Ok(Parsed {
-                    value,
-                    rest,
-                    diagnostics: mut warnings,
-                }) => {
-                    warnings_vec.append(&mut warnings.0);
-                    if input.len() == rest.len() {
-                        break;
-                    }
-                    values_vec.push(value);
-                    input = rest;
-                }
-                Err(ParseError::Mismatch(_)) => break,
-                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
-            }
-        }
-        if values_vec.is_empty() {
-            return Err(ParseError::Mismatch(input.error("Syntax error.").into()));
-        }
-        Ok(Parsed {
-            value: Seq1(values_vec),
-            rest: input,
-            diagnostics: Diagnostics(warnings_vec),
-        })
-    }
-}
-
-/*
-impl<T> FromTokens for Vec<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(mut input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let mut values_vec = Vec::new();
-        let mut warnings_vec = Vec::new();
-        while !input.is_empty() {
-            match T::from_tokens(input) {
-                Ok(Parsed {
-                    value,
-                    rest,
-                    diagnostics: mut warnings,
-                }) => {
-                    values_vec.push(value);
-                    warnings_vec.append(&mut warnings.0);
-                    input = rest;
-                }
-                Err(ParseError::Mismatch(_)) => break,
-                Err(ParseError::Error(e)) => return Err(ParseError::Error(e)),
-            }
-        }
-        Ok(Parsed {
-            value: values_vec,
-            rest: input,
-            diagnostics: Diagnostics(warnings_vec),
-        })
-    }
-}*/
-
-impl FromTokens for TokenSlice {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        Ok(Parsed::ok(input.clone(), input.end()))
-    }
-}
-
-#[derive(Debug)]
-struct Subcommand<T>(pub T);
-
-impl<T> FromTokens for Subcommand<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let start = input.skip_until(|token| token != &Token::Punct(Punct::Slash));
-        if start.is_empty() {
-            return Err(ParseError::Error(
-                input.error("Syntax error at end of input.").into(),
-            ));
-        }
-        let end = start.skip_to(&Token::Punct(Punct::Slash));
-        let subcommand = start.subslice(0..start.len() - end.len());
-        let (value, rest, mut warnings) = T::from_tokens(&subcommand)?.into_tuple();
-        if !rest.is_empty() {
-            warnings
-                .0
-                .push(rest.warning("Syntax error expecting end of subcommand."));
-        }
-        Ok(Parsed::new(Self(value), end, warnings))
-    }
-}
-
-#[derive(Debug)]
-struct InParens<T>(pub T);
-
-impl<T> FromTokens for InParens<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LParen))?.into_tuple();
-        let (value, rest, warnings) = T::from_tokens(&rest)?.into_tuple();
-        let ((), rest, _) = parse_token(&rest, &Token::Punct(Punct::RParen))?.into_tuple();
-        Ok(Parsed {
-            value: Self(value),
-            rest,
-            diagnostics: warnings,
-        })
-    }
-}
-
-#[derive(Debug)]
-struct InSquares<T>(pub T);
-
-impl<T> FromTokens for InSquares<T>
-where
-    T: FromTokens,
-{
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        let ((), rest, _) = parse_token(input, &Token::Punct(Punct::LSquare))?.into_tuple();
-        let (value, rest, warnings) = T::from_tokens(&rest)?.into_tuple();
-        let ((), rest, _) = parse_token(&rest, &Token::Punct(Punct::RSquare))?.into_tuple();
-        Ok(Parsed {
-            value: Self(value),
-            rest,
-            diagnostics: warnings,
-        })
-    }
-}
-
-fn parse_token_if<F, R>(input: &TokenSlice, parse: F) -> ParseResult<R>
-where
-    F: Fn(&Token) -> Option<R>,
-{
-    if let Some(token) = input.get_token(0) {
-        if let Some(result) = parse(token) {
-            return Ok(Parsed::ok(result, input.subslice(1..input.len())));
-        }
-    }
-    Err(ParseError::Mismatch(Diagnostics::default()))
-}
-
-fn _parse_token(input: &TokenSlice, token: &Token) -> ParseResult<Token> {
-    if let Some(rest) = input.skip(token) {
-        Ok(Parsed::ok(input.first().token.clone(), rest))
-    } else {
-        Err(ParseError::Mismatch(
-            input.error(format!("expecting {token}")).into(),
-        ))
-    }
-}
-
-fn parse_token(input: &TokenSlice, token: &Token) -> ParseResult<()> {
-    if let Some(rest) = input.skip(token) {
-        Ok(Parsed::ok((), rest))
-    } else {
-        Err(ParseError::Mismatch(
-            input.error(format!("expecting {token}")).into(),
-        ))
-    }
-}
-
-fn parse_syntax(input: &TokenSlice, syntax: &str) -> ParseResult<()> {
-    if let Some(rest) = input.skip_syntax(syntax) {
-        Ok(Parsed::ok((), rest))
-    } else {
-        Err(ParseError::Mismatch(
-            input.error(format!("expecting {syntax}")).into(),
-        ))
-    }
-}
-
-pub type VarList = Punctuated<VarRange>;
-
-pub struct Number(f64);
-
-impl Debug for Number {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self.0)
-    }
-}
-
-impl FromTokens for Number {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        parse_token_if(input, |token| token.as_number().map(Number))
-            .map_err(|_| ParseError::Mismatch(input.error(String::from("expecting number")).into()))
-    }
-}
-
-#[derive(Debug)]
-pub struct Integer(i64);
-
-impl FromTokens for Integer {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        parse_token_if(input, |token| token.as_integer().map(Integer)).map_err(|_| {
-            ParseError::Mismatch(input.error(String::from("expecting integer")).into())
-        })
-    }
-}
-
-pub enum VarRange {
-    Single(Identifier),
-    Range(Identifier, Identifier),
-    All,
-}
-
-impl Debug for VarRange {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Self::Single(var) => write!(f, "{var:?}"),
-            Self::Range(from, to) => write!(f, "{from:?} TO {to:?}"),
-            Self::All => write!(f, "ALL"),
-        }
-    }
-}
-
-impl FromTokens for VarRange {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        if let Ok(Parsed { rest, .. }) = parse_token(input, &Token::Punct(Punct::All)) {
-            Ok(Parsed::ok(Self::All, rest))
-        } else {
-            let (from, rest, _) = parse_id(input)?.into_tuple();
-            if let Ok(Parsed { rest, .. }) = parse_token(&rest, &Token::Punct(Punct::To)) {
-                if let Ok(p) = parse_id(&rest) {
-                    return Ok(p.map(|to| Self::Range(from, to)));
-                }
-            }
-            Ok(Parsed::ok(Self::Single(from), rest))
-        }
-    }
-}
-
-fn parse_id(input: &TokenSlice) -> ParseResult<Identifier> {
-    let mut iter = input.iter();
-    if let Some(LexToken {
-        token: Token::Id(id),
-        ..
-    }) = iter.next()
-    {
-        Ok(Parsed::ok(id.clone(), iter.remainder()))
-    } else {
-        Err(ParseError::Mismatch(
-            input.error("Syntax error expecting identifier.").into(),
-        ))
-    }
-}
-
-fn parse_format(input: &TokenSlice) -> ParseResult<AbstractFormat> {
-    let mut iter = input.iter();
-    if let Some(LexToken {
-        token: Token::Id(id),
-        ..
-    }) = iter.next()
-    {
-        if let Ok(format) = id.0.as_ref().parse() {
-            return Ok(Parsed::ok(format, iter.remainder()));
-        }
-    }
-    Err(ParseError::Mismatch(
-        input.error("Syntax error expecting identifier.").into(),
-    ))
-}
-
-fn parse_string(input: &TokenSlice) -> ParseResult<String> {
-    let mut iter = input.iter();
-    if let Some(LexToken {
-        token: Token::String(s),
-        ..
-    }) = iter.next()
-    {
-        Ok(Parsed::ok(s.clone(), iter.remainder()))
-    } else {
-        Err(ParseError::Mismatch(
-            input.error("Syntax error expecting identifier.").into(),
-        ))
-    }
-}
-
-impl FromTokens for Identifier {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        parse_id(input)
-    }
-}
-
-impl FromTokens for String {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        parse_string(input)
-    }
-}
-
-impl FromTokens for AbstractFormat {
-    fn from_tokens(input: &TokenSlice) -> ParseResult<Self>
-    where
-        Self: Sized,
-    {
-        parse_format(input)
-    }
-}
-
-fn collect_subcommands(src: TokenSlice) -> Vec<TokenSlice> {
-    src.split(|token| token.token == Token::Punct(Punct::Slash))
-        .filter(|slice| !slice.is_empty())
-        .collect()
-}
-
-fn commands() -> &'static [Command] {
-    fn new_commands() -> Vec<Command> {
-        vec![
-            descriptives_command(),
-            crosstabs_command(),
-            ctables_command(),
-            data_list_command(),
-            Command {
-                allowed_states: FlagSet::full(),
-                enhanced_only: false,
-                testing_only: false,
-                no_abbrev: false,
-                name: "ECHO",
-                run: Box::new(|_context| todo!()),
-            },
-        ]
-    }
-
-    static COMMANDS: OnceLock<Vec<Command>> = OnceLock::new();
-    COMMANDS.get_or_init(new_commands).as_slice()
-}
-
-fn parse_command_word(lexer: &mut TokenSlice, s: &mut String, n: usize) -> bool {
-    let separator = match s.chars().next_back() {
-        Some(c) if c != '-' => " ",
-        _ => "",
-    };
-
-    match lexer.get_token(n) {
-        Some(Token::Punct(Punct::Dash)) => {
-            s.push('-');
-            true
-        }
-        Some(Token::Id(id)) => {
-            write!(s, "{separator}{id}").unwrap();
-            true
-        }
-        Some(Token::Number(number)) if number.is_sign_positive() => {
-            if let Some(integer) = number.to_exact_usize() {
-                write!(s, "{separator}{integer}").unwrap();
-                true
-            } else {
-                false
-            }
-        }
-        _ => false,
-    }
-}
-
-fn find_best_match(s: &str) -> (Option<&'static Command>, isize) {
-    let mut cm = CommandMatcher::new(s);
-    for command in commands() {
-        cm.add(command.name, command);
-    }
-    cm.get_match()
-}
-
-fn parse_command_name(
-    lexer: &mut TokenSlice,
-    error: &dyn Fn(Diagnostic),
-) -> Result<(&'static Command, usize), ()> {
-    let mut s = String::new();
-    let mut word = 0;
-    let mut missing_words = 0;
-    let mut command = None;
-    while parse_command_word(lexer, &mut s, word) {
-        (command, missing_words) = find_best_match(&s);
-        if missing_words <= 0 {
-            break;
-        }
-        word += 1;
-    }
-    if command.is_none() && missing_words > 0 {
-        s.push_str(" .");
-        (command, missing_words) = find_best_match(&s);
-        s.truncate(s.len() - 2);
-    }
-
-    match command {
-        Some(command) => Ok((command, ((word as isize + 1) + missing_words) as usize)),
-        None => {
-            if word == 0 {
-                error(
-                    lexer
-                        .subslice(0..1)
-                        .error("Syntax error expecting command name"),
-                )
-            } else {
-                error(lexer.subslice(0..word + 1).error("Unknown command `{s}`."))
-            };
-            Err(())
-        }
-    }
-}
-
-pub enum Success {
-    Success,
-    Eof,
-    Finish,
-}
-
-pub fn end_of_command(context: &Context, range: RangeFrom<usize>) -> Result<Success, ()> {
-    match context.lexer.get_token(range.start) {
-        None | Some(Token::End) => Ok(Success::Success),
-        _ => {
-            context.error(
-                context
-                    .lexer
-                    .subslice(range.start..context.lexer.len())
-                    .error("Syntax error expecting end of command."),
-            );
-            Err(())
-        }
-    }
-}
-
-fn parse_in_state(mut lexer: TokenSlice, error: &dyn Fn(Diagnostic), _state: State) {
-    match lexer.get_token(0) {
-        None | Some(Token::End) => (),
-        _ => match parse_command_name(&mut lexer, error) {
-            Ok((command, n_tokens)) => {
-                let mut context = Context {
-                    error,
-                    lexer: lexer.subslice(n_tokens..lexer.len()),
-                    command_name: Some(command.name),
-                };
-                (command.run)(&mut context);
-            }
-            Err(error) => println!("{error:?}"),
-        },
-    }
-}
-
-pub fn parse_command(lexer: TokenSlice, error: &dyn Fn(Diagnostic)) {
-    parse_in_state(lexer, error, State::Initial)
-}
-
-pub struct Context<'a> {
-    error: &'a dyn Fn(Diagnostic),
-    lexer: TokenSlice,
-    command_name: Option<&'static str>,
-}
-
-impl Context<'_> {
-    pub fn error(&self, diagnostic: Diagnostic) {
-        (self.error)(diagnostic);
-    }
-}
diff --git a/rust/pspp/src/crypto.rs b/rust/pspp/src/crypto.rs

new file mode 100644 (file)

index 0000000..c2e86cd
--- /dev/null
+++ b/rust/pspp/src/crypto.rs
@@ -0,0 +1,668 @@
+//! # Decryption for SPSS encrypted files
+//!
+//! SPSS supports encryption using a password for data, viewer, and syntax
+//! files.  The encryption mechanism is poorly designed, so this module provides
+//! support for decrypting, but not encrypting, the SPSS format.
+//! Use [EncryptedFile] as the starting point for reading an encrypted file.
+//!
+//! SPSS also supports what calls "encrypted passwords".  Use [EncodedPassword]
+//! to encode and decode these passwords.
+
+// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
+#![cfg_attr(not(test), warn(missing_docs))]
+
+use aes::{
+    cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit},
+    Aes256, Aes256Dec,
+};
+use cmac::{Cmac, Mac};
+use smallvec::SmallVec;
+use std::{
+    fmt::Debug,
+    io::{BufRead, Error as IoError, ErrorKind, Read, Seek, SeekFrom},
+};
+use thiserror::Error as ThisError;
+
+use binrw::{io::NoSeek, BinRead};
+
+/// Error reading an encrypted file.
+#[derive(Clone, Debug, ThisError)]
+pub enum Error {
+    /// I/O error.
+    #[error("I/O error reading encrypted file wrapper ({0})")]
+    IoError(ErrorKind),
+
+    /// Invalid padding in final encrypted data block.
+    #[error("Invalid padding in final encrypted data block")]
+    InvalidPadding,
+
+    /// Not an encrypted file.
+    #[error("Not an encrypted file")]
+    NotEncrypted,
+
+    /// Encrypted file has invalid length.
+    #[error("Encrypted file has invalid length {0} (expected 4 more than a multiple of 16).")]
+    InvalidLength(u64),
+
+    /// Unknown file type.
+    #[error("Unknown file type {0:?}.")]
+    UnknownFileType(String),
+}
+
+impl From<std::io::Error> for Error {
+    fn from(value: std::io::Error) -> Self {
+        Self::IoError(value.kind())
+    }
+}
+
+#[derive(BinRead)]
+struct EncryptedHeader {
+    /// Fixed as `1c 00 00 00 00 00 00 00` in practice.
+    _ignore: [u8; 8],
+
+    /// File type.
+    #[br(magic = b"ENCRYPTED")]
+    file_type: [u8; 3],
+
+    /// Fixed as `15 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00` in practice.
+    _ignore2: [u8; 16],
+}
+
+/// An encrypted file.
+pub struct EncryptedFile<R> {
+    reader: R,
+    file_type: FileType,
+
+    /// Length of the ciphertext (excluding the 36-byte header).
+    length: u64,
+
+    /// First block of ciphertext, for verifying that any password the user
+    /// tries is correct.
+    first_block: [u8; 16],
+
+    /// Last block of ciphertext, for checking padding and determining the
+    /// plaintext length.
+    last_block: [u8; 16],
+}
+
+/// Type of encrypted file.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum FileType {
+    /// A `.sps` syntax file.
+    Syntax,
+
+    /// A `.spv` viewer file.
+    Viewer,
+
+    /// A `.sav` data file.
+    Data,
+}
+
+impl<R> EncryptedFile<R>
+where
+    R: Read + Seek,
+{
+    /// Opens `reader` as an encrypted file.
+    ///
+    /// This reads enough of the file to verify that it is in the expected
+    /// format and returns an error if it cannot be read or is not the expected
+    /// format.
+    ///
+    /// `reader` doesn't need to be [BufRead], and probably should not be.  The
+    /// [EncryptedReader] returned by [unlock] or [unlock_literal] will be
+    /// [BufRead].
+    ///
+    /// [unlock]: Self::unlock
+    /// [unlock_literal]: Self::unlock_literal
+    pub fn new(mut reader: R) -> Result<Self, Error> {
+        let header =
+            EncryptedHeader::read_le(&mut NoSeek::new(&mut reader)).map_err(
+                |error| match error {
+                    binrw::Error::BadMagic { .. } => Error::NotEncrypted,
+                    binrw::Error::Io(error) => Error::IoError(error.kind()),
+                    _ => unreachable!(),
+                },
+            )?;
+        let file_type = match &header.file_type {
+            b"SAV" => FileType::Data,
+            b"SPV" => FileType::Viewer,
+            b"SPS" => FileType::Syntax,
+            _ => {
+                return Err(Error::UnknownFileType(
+                    header.file_type.iter().map(|b| *b as char).collect(),
+                ))
+            }
+        };
+        let mut first_block = [0; 16];
+        reader.read_exact(&mut first_block)?;
+        let length = reader.seek(SeekFrom::End(-16))? + 16;
+        if length < 36 + 16 || (length - 36) % 16 != 0 {
+            return Err(Error::InvalidLength(length + 36));
+        }
+        let mut last_block = [0; 16];
+        reader.read_exact(&mut last_block)?;
+        reader.seek(SeekFrom::Start(36))?;
+        Ok(Self {
+            reader,
+            file_type,
+            length,
+            first_block,
+            last_block,
+        })
+    }
+
+    /// Tries to unlock the encrypted file using both `password` and with
+    /// `password` decoded with [EncodedPassword::decode].  If successful,
+    /// returns an [EncryptedReader] for the file; on failure, returns the
+    /// [EncryptedFile] again for another try.
+    pub fn unlock(self, password: &[u8]) -> Result<EncryptedReader<R>, Self> {
+        self.unlock_literal(password).or_else(|this| {
+            match EncodedPassword::from_encoded(password) {
+                Some(encoded) => this.unlock_literal(&encoded.decode()),
+                None => Err(this),
+            }
+        })
+    }
+
+    /// Tries to unlock the encrypted file using just `password`.  If
+    /// successful, returns an [EncryptedReader] for the file; on failure,
+    /// returns the [EncryptedFile] again for another try.
+    ///
+    /// If the password itself might be encoded ("encrypted"), instead use
+    /// [Self::unlock] to try it both ways.
+    pub fn unlock_literal(self, password: &[u8]) -> Result<EncryptedReader<R>, Self> {
+        // NIST SP 800-108 fixed data.
+        #[rustfmt::skip]
+        static  FIXED: &[u8] = &[
+            // i
+            0x00, 0x00, 0x00, 0x01,
+
+            // label
+            0x35, 0x27, 0x13, 0xcc, 0x53, 0xa7, 0x78, 0x89,
+            0x87, 0x53, 0x22, 0x11, 0xd6, 0x5b, 0x31, 0x58,
+            0xdc, 0xfe, 0x2e, 0x7e, 0x94, 0xda, 0x2f, 0x00,
+            0xcc, 0x15, 0x71, 0x80, 0x0a, 0x6c, 0x63, 0x53,
+
+            // delimiter
+            0x00,
+
+            // context
+            0x38, 0xc3, 0x38, 0xac, 0x22, 0xf3, 0x63, 0x62,
+            0x0e, 0xce, 0x85, 0x3f, 0xb8, 0x07, 0x4c, 0x4e,
+            0x2b, 0x77, 0xc7, 0x21, 0xf5, 0x1a, 0x80, 0x1d,
+            0x67, 0xfb, 0xe1, 0xe1, 0x83, 0x07, 0xd8, 0x0d,
+
+            // L
+            0x00, 0x00, 0x01, 0x00,
+        ];
+
+        // Truncate password to at most 10 bytes.
+        let password = password.get(..10).unwrap_or(password);
+        let n = password.len();
+
+        //  padded_password = password padded with zeros to 32 bytes.
+        let mut padded_password = [0; 32];
+        padded_password[..n].copy_from_slice(password);
+
+        // cmac = CMAC(padded_password, fixed).
+        let mut cmac = <Cmac<Aes256> as Mac>::new_from_slice(&padded_password).unwrap();
+        cmac.update(FIXED);
+        let cmac = cmac.finalize().into_bytes();
+
+        // The key is the cmac repeated twice.
+        let mut key = [0; 32];
+        key[..16].copy_from_slice(cmac.as_slice());
+        key[16..].copy_from_slice(cmac.as_slice());
+
+        // Use key to initialize AES.
+        let aes = <Aes256Dec as KeyInit>::new_from_slice(&key).unwrap();
+
+        // Decrypt first block to verify password.
+        let mut out = [0; 16];
+        aes.decrypt_block_b2b(
+            GenericArray::from_slice(&self.first_block),
+            GenericArray::from_mut_slice(&mut out),
+        );
+        static MAGIC: &[&[u8]] = &[
+            b"$FL2@(#)",
+            b"$FL3@(#)",
+            b"* Encoding",
+            b"PK\x03\x04\x14\0\x08",
+        ];
+        if !MAGIC.iter().any(|magic| out.starts_with(magic)) {
+            return Err(self);
+        }
+
+        // Decrypt last block to check padding and get final length.
+        aes.decrypt_block_b2b(
+            GenericArray::from_slice(&self.last_block),
+            GenericArray::from_mut_slice(&mut out),
+        );
+        let Some(padding_length) = parse_padding(&out) else {
+            return Err(self);
+        };
+
+        Ok(EncryptedReader::new(
+            self.reader,
+            aes,
+            self.file_type,
+            self.length - 36 - padding_length as u64,
+        ))
+    }
+
+    /// Returns the type of encrypted file.
+    pub fn file_type(&self) -> FileType {
+        self.file_type
+    }
+}
+
+fn parse_padding(block: &[u8; 16]) -> Option<usize> {
+    let pad = block[15] as usize;
+    if (1..=16).contains(&pad) && block[16 - pad..].iter().all(|b| *b == pad as u8) {
+        Some(pad)
+    } else {
+        None
+    }
+}
+
+impl<R> Debug for EncryptedFile<R>
+where
+    R: Read,
+{
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "EncryptedFile({:?})", &self.file_type)
+    }
+}
+
+/// Encrypted file reader.
+///
+/// This implements [Read] and [Seek] for SPSS encrypted files.  To construct an
+/// [EncryptedReader], call [EncryptedFile::new], then [EncryptedFile::unlock].
+pub struct EncryptedReader<R> {
+    /// Underlying reader.
+    reader: R,
+
+    /// AES-256 decryption key.
+    aes: Aes256Dec,
+
+    /// Type of file.
+    file_type: FileType,
+
+    /// Plaintext file length (not including the file header or padding).
+    length: u64,
+
+    /// Plaintext data buffer.
+    buffer: Box<[u8; 4096]>,
+
+    /// Plaintext offset of the byte in `buffer[0]`.  A multiple of 16 less than
+    /// or equal to `length`.
+    start: u64,
+
+    /// Number of bytes in buffer (`0 <= head <= 4096`).
+    head: usize,
+
+    /// Offset in buffer of the next byte to read (`head <= tail`).
+    tail: usize,
+}
+
+impl<R> EncryptedReader<R> {
+    fn new(reader: R, aes: Aes256Dec, file_type: FileType, length: u64) -> Self {
+        Self {
+            reader,
+            aes,
+            file_type,
+            length,
+            buffer: Box::new([0; 4096]),
+            start: 0,
+            head: 0,
+            tail: 0,
+        }
+    }
+
+    fn read_buffer(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
+        let n = buf.len().min(self.head - self.tail);
+        buf[..n].copy_from_slice(&self.buffer[self.tail..n + self.tail]);
+        self.tail += n;
+        Ok(n)
+    }
+
+    /// Returns the type of encrypted file.
+    pub fn file_type(&self) -> FileType {
+        self.file_type
+    }
+}
+
+impl<R> EncryptedReader<R>
+where
+    R: Read,
+{
+    fn fill_buffer(&mut self, offset: u64) -> Result<(), IoError> {
+        self.start = offset / 16 * 16;
+        self.head = 0;
+        self.tail = (offset % 16) as usize;
+        let n = self.buffer.len().min((self.length - self.start) as usize);
+        self.reader
+            .read_exact(&mut self.buffer[..n.next_multiple_of(16)])?;
+        for offset in (0..n).step_by(16) {
+            self.aes.decrypt_block(GenericArray::from_mut_slice(
+                &mut self.buffer[offset..offset + 16],
+            ));
+        }
+        self.head = n;
+        Ok(())
+    }
+}
+
+impl<R> Read for EncryptedReader<R>
+where
+    R: Read,
+{
+    fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
+        if self.tail < self.head {
+            self.read_buffer(buf)
+        } else {
+            let offset = self.start + self.head as u64;
+            if offset < self.length {
+                self.fill_buffer(offset)?;
+                self.read_buffer(buf)
+            } else {
+                Ok(0)
+            }
+        }
+    }
+}
+
+impl<R> Seek for EncryptedReader<R>
+where
+    R: Read + Seek,
+{
+    fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
+        let offset = match pos {
+            SeekFrom::Start(offset) => Some(offset),
+            SeekFrom::End(relative) => self.length.checked_add_signed(relative),
+            SeekFrom::Current(relative) => {
+                (self.start + self.tail as u64).checked_add_signed(relative)
+            }
+        }
+        .filter(|offset| *offset < u64::MAX - 36)
+        .ok_or(IoError::from(ErrorKind::InvalidInput))?;
+        if offset != self.start + self.tail as u64 {
+            self.reader.seek(SeekFrom::Start(offset / 16 * 16 + 36))?;
+            self.fill_buffer(offset)?;
+        }
+        Ok(offset)
+    }
+}
+
+impl<R> BufRead for EncryptedReader<R>
+where
+    R: Read + Seek,
+{
+    fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
+        if self.tail >= self.head {
+            let offset = self.start + self.head as u64;
+            if offset < self.length {
+                self.fill_buffer(offset)?;
+            }
+        }
+        Ok(&self.buffer[self.tail..self.head])
+    }
+
+    fn consume(&mut self, amount: usize) {
+        self.tail += amount;
+        debug_assert!(self.tail <= self.head);
+    }
+}
+
+const fn b(x: i32) -> u16 {
+    1 << x
+}
+
+static AH: [[u16; 2]; 4] = [
+    [b(2), b(2) | b(3) | b(6) | b(7)],
+    [b(3), b(0) | b(1) | b(4) | b(5)],
+    [b(4) | b(7), b(8) | b(9) | b(12) | b(13)],
+    [b(5) | b(6), b(10) | b(11) | b(14) | b(15)],
+];
+
+static AL: [[u16; 2]; 4] = [
+    [b(0) | b(3) | b(12) | b(15), b(0) | b(1) | b(4) | b(5)],
+    [b(1) | b(2) | b(13) | b(14), b(2) | b(3) | b(6) | b(7)],
+    [b(4) | b(7) | b(8) | b(11), b(8) | b(9) | b(12) | b(13)],
+    [b(5) | b(6) | b(9) | b(10), b(10) | b(11) | b(14) | b(15)],
+];
+
+static BH: [[u16; 2]; 4] = [
+    [b(2), b(1) | b(3) | b(9) | b(11)],
+    [b(3), b(0) | b(2) | b(8) | b(10)],
+    [b(4) | b(7), b(4) | b(6) | b(12) | b(14)],
+    [b(5) | b(6), b(5) | b(7) | b(13) | b(15)],
+];
+
+static BL: [[u16; 2]; 4] = [
+    [b(0) | b(3) | b(12) | b(15), b(0) | b(2) | b(8) | b(10)],
+    [b(1) | b(2) | b(13) | b(14), b(1) | b(3) | b(9) | b(11)],
+    [b(4) | b(7) | b(8) | b(11), b(4) | b(6) | b(12) | b(14)],
+    [b(5) | b(6) | b(9) | b(10), b(5) | b(7) | b(13) | b(15)],
+];
+
+fn decode_nibble(table: &[[u16; 2]; 4], nibble: u8) -> u16 {
+    for section in table.iter() {
+        if section[0] & (1 << nibble) != 0 {
+            return section[1];
+        }
+    }
+    0
+}
+
+fn find_1bit(x: u16) -> Option<u8> {
+    x.is_power_of_two().then(|| x.trailing_zeros() as u8)
+}
+
+fn decode_pair(a: u8, b: u8) -> Option<u8> {
+    let x = find_1bit(decode_nibble(&AH, a >> 4) & decode_nibble(&BH, b >> 4))?;
+    let y = find_1bit(decode_nibble(&AL, a & 15) & decode_nibble(&BL, b & 15))?;
+    Some((x << 4) | y)
+}
+
+fn encode_nibble(table: &[[u16; 2]; 4], nibble: u8) -> Vec<u8> {
+    for section in table.iter() {
+        if section[1] & (1 << nibble) != 0 {
+            let mut outputs = Vec::with_capacity(4);
+            let mut bits = section[0];
+            while bits != 0 {
+                outputs.push(bits.trailing_zeros() as u8);
+                bits &= bits - 1;
+            }
+            return outputs;
+        }
+    }
+    unreachable!()
+}
+
+fn encode_byte(hi_table: &[[u16; 2]; 4], lo_table: &[[u16; 2]; 4], byte: u8) -> Vec<char> {
+    let hi_variants = encode_nibble(hi_table, byte >> 4);
+    let lo_variants = encode_nibble(lo_table, byte & 15);
+    let mut variants = Vec::with_capacity(hi_variants.len() * lo_variants.len());
+    for hi in hi_variants.iter().copied() {
+        for lo in lo_variants.iter().copied() {
+            let byte = (hi << 4) | lo;
+            if byte != 127 {
+                variants.push(byte as char);
+            }
+        }
+    }
+    variants
+}
+
+/// An encoded password.
+///
+/// SPSS calls these "encrypted passwords", but they are not encrypted.  They
+/// are encoded with a simple scheme, analogous to base64 encoding but
+/// one-to-many: any plaintext password maps to many possible encoded passwords.
+///
+/// The encoding scheme maps each plaintext password byte to 2 ASCII characters,
+/// using only at most the first 10 bytes of the plaintext password.  Thus, an
+/// encoded password is always a multiple of 2 characters long, and never longer
+/// than 20 characters.  The characters in an encoded password are always in the
+/// graphic ASCII range 33 through 126.  Each successive pair of characters in
+/// the password encodes a single byte in the plaintext password.
+///
+/// This struct supports both encoding and decoding passwords.
+#[derive(Clone, Debug)]
+pub struct EncodedPassword(Vec<Vec<char>>);
+
+impl EncodedPassword {
+    /// Creates an [EncodedPassword] from an already-encoded password `encoded`.
+    /// Returns `None` if `encoded` is not a valid encoded password.
+    pub fn from_encoded(encoded: &[u8]) -> Option<Self> {
+        if encoded.len() > 20
+            || encoded.len() % 2 != 0
+            || !encoded.iter().all(|byte| (32..=127).contains(byte))
+        {
+            return None;
+        }
+
+        Some(EncodedPassword(
+            encoded.iter().map(|byte| vec![*byte as char]).collect(),
+        ))
+    }
+
+    /// Returns an [EncodedPassword] as an encoded version of the given
+    /// `plaintext` password.  Only the first 10 bytes, at most, of the
+    /// plaintext password is used.
+    pub fn from_plaintext(plaintext: &[u8]) -> EncodedPassword {
+        let input = plaintext.get(..10).unwrap_or(plaintext);
+        EncodedPassword(
+            input
+                .iter()
+                .copied()
+                .flat_map(|byte| [encode_byte(&AH, &AL, byte), encode_byte(&BH, &BL, byte)])
+                .collect(),
+        )
+    }
+
+    /// Returns the number of variations of this encoded password.
+    ///
+    /// An [EncodedPassword] created by [EncodedPassword::from_plaintext] has
+    /// many variations: between `16**n` and `32**n` for an `n`-byte plaintext
+    /// password, so up to `32**10` (about 1e15) for the 10-byte longest
+    /// plaintext passwords.
+    ///
+    /// An [EncodedPassword] created by [EncodedPassword::from_encoded] has only
+    /// a single variation, the one passed in by that function.
+    pub fn n_variants(&self) -> u64 {
+        self.0
+            .iter()
+            .map(|variants| variants.len() as u64)
+            .product()
+    }
+
+    /// Returns one variation of this encoded password, numbered `index`.  All
+    /// variations decode the same way.
+    pub fn variant(&self, mut index: u64) -> String {
+        let mut output = String::with_capacity(20);
+        for variants in &self.0 {
+            let n = variants.len() as u64;
+            output.push(variants[(index % n) as usize]);
+            index /= n;
+        }
+        output
+    }
+
+    /// Returns the decoded version of this encoded password.
+    pub fn decode(&self) -> SmallVec<[u8; 10]> {
+        let mut output = SmallVec::new();
+        for [a, b] in self.0.as_chunks::<2>().0 {
+            output.push(decode_pair(a[0] as u8, b[0] as u8).unwrap());
+        }
+        output
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::{io::Cursor, path::Path};
+
+    use crate::crypto::{EncodedPassword, EncryptedFile, FileType};
+
+    fn test_decrypt(input_name: &Path, expected_name: &Path, password: &str, file_type: FileType) {
+        let input_filename = Path::new("src/crypto/testdata").join(input_name);
+        let input = std::fs::read(&input_filename).unwrap();
+        let mut cursor = Cursor::new(&input);
+        let file = EncryptedFile::new(&mut cursor).unwrap();
+        assert_eq!(file.file_type(), file_type);
+        let mut reader = file.unlock_literal(password.as_bytes()).unwrap();
+        assert_eq!(reader.file_type(), file_type);
+        let mut actual = Vec::new();
+        std::io::copy(&mut reader, &mut actual).unwrap();
+
+        let expected_filename = Path::new("src/crypto/testdata").join(expected_name);
+        let expected = std::fs::read(&expected_filename).unwrap();
+        if actual != expected {
+            panic!();
+        }
+    }
+
+    #[test]
+    fn sys_file() {
+        test_decrypt(
+            Path::new("test-encrypted.sav"),
+            Path::new("test.sav"),
+            "pspp",
+            FileType::Data,
+        );
+    }
+
+    #[test]
+    fn syntax_file() {
+        test_decrypt(
+            Path::new("test-encrypted.sps"),
+            Path::new("test.sps"),
+            "password",
+            FileType::Syntax,
+        );
+    }
+
+    #[test]
+    fn spv_file() {
+        test_decrypt(
+            Path::new("test-encrypted.spv"),
+            Path::new("test.spv"),
+            "Password1",
+            FileType::Viewer,
+        );
+    }
+
+    #[test]
+    fn password_encoding() {
+        // Decode a few specific passwords.
+        assert_eq!(
+            EncodedPassword::from_encoded(b"-|")
+                .unwrap()
+                .decode()
+                .as_slice(),
+            b"b"
+        );
+        assert_eq!(
+            EncodedPassword::from_encoded(b" A")
+                .unwrap()
+                .decode()
+                .as_slice(),
+            b"a"
+        );
+
+        // Check that the encoding and decoding algorithms are inverses
+        // for individual characters at least.
+        for plaintext in 0..=255 {
+            let encoded = EncodedPassword::from_plaintext(&[plaintext]);
+            for variant in 0..encoded.n_variants() {
+                let encoded_variant = encoded.variant(variant);
+                let decoded = EncodedPassword::from_encoded(encoded_variant.as_bytes())
+                    .unwrap()
+                    .decode();
+                assert_eq!(&[plaintext], decoded.as_slice());
+            }
+        }
+    }
+}
diff --git a/rust/pspp/src/crypto/mod.rs b/rust/pspp/src/crypto/mod.rs

deleted file mode 100644 (file)

index c2e86cd..0000000
--- a/rust/pspp/src/crypto/mod.rs
+++ /dev/null
@@ -1,668 +0,0 @@
-//! # Decryption for SPSS encrypted files
-//!
-//! SPSS supports encryption using a password for data, viewer, and syntax
-//! files.  The encryption mechanism is poorly designed, so this module provides
-//! support for decrypting, but not encrypting, the SPSS format.
-//! Use [EncryptedFile] as the starting point for reading an encrypted file.
-//!
-//! SPSS also supports what calls "encrypted passwords".  Use [EncodedPassword]
-//! to encode and decode these passwords.
-
-// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
-#![cfg_attr(not(test), warn(missing_docs))]
-
-use aes::{
-    cipher::{generic_array::GenericArray, BlockDecrypt, KeyInit},
-    Aes256, Aes256Dec,
-};
-use cmac::{Cmac, Mac};
-use smallvec::SmallVec;
-use std::{
-    fmt::Debug,
-    io::{BufRead, Error as IoError, ErrorKind, Read, Seek, SeekFrom},
-};
-use thiserror::Error as ThisError;
-
-use binrw::{io::NoSeek, BinRead};
-
-/// Error reading an encrypted file.
-#[derive(Clone, Debug, ThisError)]
-pub enum Error {
-    /// I/O error.
-    #[error("I/O error reading encrypted file wrapper ({0})")]
-    IoError(ErrorKind),
-
-    /// Invalid padding in final encrypted data block.
-    #[error("Invalid padding in final encrypted data block")]
-    InvalidPadding,
-
-    /// Not an encrypted file.
-    #[error("Not an encrypted file")]
-    NotEncrypted,
-
-    /// Encrypted file has invalid length.
-    #[error("Encrypted file has invalid length {0} (expected 4 more than a multiple of 16).")]
-    InvalidLength(u64),
-
-    /// Unknown file type.
-    #[error("Unknown file type {0:?}.")]
-    UnknownFileType(String),
-}
-
-impl From<std::io::Error> for Error {
-    fn from(value: std::io::Error) -> Self {
-        Self::IoError(value.kind())
-    }
-}
-
-#[derive(BinRead)]
-struct EncryptedHeader {
-    /// Fixed as `1c 00 00 00 00 00 00 00` in practice.
-    _ignore: [u8; 8],
-
-    /// File type.
-    #[br(magic = b"ENCRYPTED")]
-    file_type: [u8; 3],
-
-    /// Fixed as `15 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00` in practice.
-    _ignore2: [u8; 16],
-}
-
-/// An encrypted file.
-pub struct EncryptedFile<R> {
-    reader: R,
-    file_type: FileType,
-
-    /// Length of the ciphertext (excluding the 36-byte header).
-    length: u64,
-
-    /// First block of ciphertext, for verifying that any password the user
-    /// tries is correct.
-    first_block: [u8; 16],
-
-    /// Last block of ciphertext, for checking padding and determining the
-    /// plaintext length.
-    last_block: [u8; 16],
-}
-
-/// Type of encrypted file.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum FileType {
-    /// A `.sps` syntax file.
-    Syntax,
-
-    /// A `.spv` viewer file.
-    Viewer,
-
-    /// A `.sav` data file.
-    Data,
-}
-
-impl<R> EncryptedFile<R>
-where
-    R: Read + Seek,
-{
-    /// Opens `reader` as an encrypted file.
-    ///
-    /// This reads enough of the file to verify that it is in the expected
-    /// format and returns an error if it cannot be read or is not the expected
-    /// format.
-    ///
-    /// `reader` doesn't need to be [BufRead], and probably should not be.  The
-    /// [EncryptedReader] returned by [unlock] or [unlock_literal] will be
-    /// [BufRead].
-    ///
-    /// [unlock]: Self::unlock
-    /// [unlock_literal]: Self::unlock_literal
-    pub fn new(mut reader: R) -> Result<Self, Error> {
-        let header =
-            EncryptedHeader::read_le(&mut NoSeek::new(&mut reader)).map_err(
-                |error| match error {
-                    binrw::Error::BadMagic { .. } => Error::NotEncrypted,
-                    binrw::Error::Io(error) => Error::IoError(error.kind()),
-                    _ => unreachable!(),
-                },
-            )?;
-        let file_type = match &header.file_type {
-            b"SAV" => FileType::Data,
-            b"SPV" => FileType::Viewer,
-            b"SPS" => FileType::Syntax,
-            _ => {
-                return Err(Error::UnknownFileType(
-                    header.file_type.iter().map(|b| *b as char).collect(),
-                ))
-            }
-        };
-        let mut first_block = [0; 16];
-        reader.read_exact(&mut first_block)?;
-        let length = reader.seek(SeekFrom::End(-16))? + 16;
-        if length < 36 + 16 || (length - 36) % 16 != 0 {
-            return Err(Error::InvalidLength(length + 36));
-        }
-        let mut last_block = [0; 16];
-        reader.read_exact(&mut last_block)?;
-        reader.seek(SeekFrom::Start(36))?;
-        Ok(Self {
-            reader,
-            file_type,
-            length,
-            first_block,
-            last_block,
-        })
-    }
-
-    /// Tries to unlock the encrypted file using both `password` and with
-    /// `password` decoded with [EncodedPassword::decode].  If successful,
-    /// returns an [EncryptedReader] for the file; on failure, returns the
-    /// [EncryptedFile] again for another try.
-    pub fn unlock(self, password: &[u8]) -> Result<EncryptedReader<R>, Self> {
-        self.unlock_literal(password).or_else(|this| {
-            match EncodedPassword::from_encoded(password) {
-                Some(encoded) => this.unlock_literal(&encoded.decode()),
-                None => Err(this),
-            }
-        })
-    }
-
-    /// Tries to unlock the encrypted file using just `password`.  If
-    /// successful, returns an [EncryptedReader] for the file; on failure,
-    /// returns the [EncryptedFile] again for another try.
-    ///
-    /// If the password itself might be encoded ("encrypted"), instead use
-    /// [Self::unlock] to try it both ways.
-    pub fn unlock_literal(self, password: &[u8]) -> Result<EncryptedReader<R>, Self> {
-        // NIST SP 800-108 fixed data.
-        #[rustfmt::skip]
-        static  FIXED: &[u8] = &[
-            // i
-            0x00, 0x00, 0x00, 0x01,
-
-            // label
-            0x35, 0x27, 0x13, 0xcc, 0x53, 0xa7, 0x78, 0x89,
-            0x87, 0x53, 0x22, 0x11, 0xd6, 0x5b, 0x31, 0x58,
-            0xdc, 0xfe, 0x2e, 0x7e, 0x94, 0xda, 0x2f, 0x00,
-            0xcc, 0x15, 0x71, 0x80, 0x0a, 0x6c, 0x63, 0x53,
-
-            // delimiter
-            0x00,
-
-            // context
-            0x38, 0xc3, 0x38, 0xac, 0x22, 0xf3, 0x63, 0x62,
-            0x0e, 0xce, 0x85, 0x3f, 0xb8, 0x07, 0x4c, 0x4e,
-            0x2b, 0x77, 0xc7, 0x21, 0xf5, 0x1a, 0x80, 0x1d,
-            0x67, 0xfb, 0xe1, 0xe1, 0x83, 0x07, 0xd8, 0x0d,
-
-            // L
-            0x00, 0x00, 0x01, 0x00,
-        ];
-
-        // Truncate password to at most 10 bytes.
-        let password = password.get(..10).unwrap_or(password);
-        let n = password.len();
-
-        //  padded_password = password padded with zeros to 32 bytes.
-        let mut padded_password = [0; 32];
-        padded_password[..n].copy_from_slice(password);
-
-        // cmac = CMAC(padded_password, fixed).
-        let mut cmac = <Cmac<Aes256> as Mac>::new_from_slice(&padded_password).unwrap();
-        cmac.update(FIXED);
-        let cmac = cmac.finalize().into_bytes();
-
-        // The key is the cmac repeated twice.
-        let mut key = [0; 32];
-        key[..16].copy_from_slice(cmac.as_slice());
-        key[16..].copy_from_slice(cmac.as_slice());
-
-        // Use key to initialize AES.
-        let aes = <Aes256Dec as KeyInit>::new_from_slice(&key).unwrap();
-
-        // Decrypt first block to verify password.
-        let mut out = [0; 16];
-        aes.decrypt_block_b2b(
-            GenericArray::from_slice(&self.first_block),
-            GenericArray::from_mut_slice(&mut out),
-        );
-        static MAGIC: &[&[u8]] = &[
-            b"$FL2@(#)",
-            b"$FL3@(#)",
-            b"* Encoding",
-            b"PK\x03\x04\x14\0\x08",
-        ];
-        if !MAGIC.iter().any(|magic| out.starts_with(magic)) {
-            return Err(self);
-        }
-
-        // Decrypt last block to check padding and get final length.
-        aes.decrypt_block_b2b(
-            GenericArray::from_slice(&self.last_block),
-            GenericArray::from_mut_slice(&mut out),
-        );
-        let Some(padding_length) = parse_padding(&out) else {
-            return Err(self);
-        };
-
-        Ok(EncryptedReader::new(
-            self.reader,
-            aes,
-            self.file_type,
-            self.length - 36 - padding_length as u64,
-        ))
-    }
-
-    /// Returns the type of encrypted file.
-    pub fn file_type(&self) -> FileType {
-        self.file_type
-    }
-}
-
-fn parse_padding(block: &[u8; 16]) -> Option<usize> {
-    let pad = block[15] as usize;
-    if (1..=16).contains(&pad) && block[16 - pad..].iter().all(|b| *b == pad as u8) {
-        Some(pad)
-    } else {
-        None
-    }
-}
-
-impl<R> Debug for EncryptedFile<R>
-where
-    R: Read,
-{
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "EncryptedFile({:?})", &self.file_type)
-    }
-}
-
-/// Encrypted file reader.
-///
-/// This implements [Read] and [Seek] for SPSS encrypted files.  To construct an
-/// [EncryptedReader], call [EncryptedFile::new], then [EncryptedFile::unlock].
-pub struct EncryptedReader<R> {
-    /// Underlying reader.
-    reader: R,
-
-    /// AES-256 decryption key.
-    aes: Aes256Dec,
-
-    /// Type of file.
-    file_type: FileType,
-
-    /// Plaintext file length (not including the file header or padding).
-    length: u64,
-
-    /// Plaintext data buffer.
-    buffer: Box<[u8; 4096]>,
-
-    /// Plaintext offset of the byte in `buffer[0]`.  A multiple of 16 less than
-    /// or equal to `length`.
-    start: u64,
-
-    /// Number of bytes in buffer (`0 <= head <= 4096`).
-    head: usize,
-
-    /// Offset in buffer of the next byte to read (`head <= tail`).
-    tail: usize,
-}
-
-impl<R> EncryptedReader<R> {
-    fn new(reader: R, aes: Aes256Dec, file_type: FileType, length: u64) -> Self {
-        Self {
-            reader,
-            aes,
-            file_type,
-            length,
-            buffer: Box::new([0; 4096]),
-            start: 0,
-            head: 0,
-            tail: 0,
-        }
-    }
-
-    fn read_buffer(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
-        let n = buf.len().min(self.head - self.tail);
-        buf[..n].copy_from_slice(&self.buffer[self.tail..n + self.tail]);
-        self.tail += n;
-        Ok(n)
-    }
-
-    /// Returns the type of encrypted file.
-    pub fn file_type(&self) -> FileType {
-        self.file_type
-    }
-}
-
-impl<R> EncryptedReader<R>
-where
-    R: Read,
-{
-    fn fill_buffer(&mut self, offset: u64) -> Result<(), IoError> {
-        self.start = offset / 16 * 16;
-        self.head = 0;
-        self.tail = (offset % 16) as usize;
-        let n = self.buffer.len().min((self.length - self.start) as usize);
-        self.reader
-            .read_exact(&mut self.buffer[..n.next_multiple_of(16)])?;
-        for offset in (0..n).step_by(16) {
-            self.aes.decrypt_block(GenericArray::from_mut_slice(
-                &mut self.buffer[offset..offset + 16],
-            ));
-        }
-        self.head = n;
-        Ok(())
-    }
-}
-
-impl<R> Read for EncryptedReader<R>
-where
-    R: Read,
-{
-    fn read(&mut self, buf: &mut [u8]) -> Result<usize, IoError> {
-        if self.tail < self.head {
-            self.read_buffer(buf)
-        } else {
-            let offset = self.start + self.head as u64;
-            if offset < self.length {
-                self.fill_buffer(offset)?;
-                self.read_buffer(buf)
-            } else {
-                Ok(0)
-            }
-        }
-    }
-}
-
-impl<R> Seek for EncryptedReader<R>
-where
-    R: Read + Seek,
-{
-    fn seek(&mut self, pos: SeekFrom) -> Result<u64, IoError> {
-        let offset = match pos {
-            SeekFrom::Start(offset) => Some(offset),
-            SeekFrom::End(relative) => self.length.checked_add_signed(relative),
-            SeekFrom::Current(relative) => {
-                (self.start + self.tail as u64).checked_add_signed(relative)
-            }
-        }
-        .filter(|offset| *offset < u64::MAX - 36)
-        .ok_or(IoError::from(ErrorKind::InvalidInput))?;
-        if offset != self.start + self.tail as u64 {
-            self.reader.seek(SeekFrom::Start(offset / 16 * 16 + 36))?;
-            self.fill_buffer(offset)?;
-        }
-        Ok(offset)
-    }
-}
-
-impl<R> BufRead for EncryptedReader<R>
-where
-    R: Read + Seek,
-{
-    fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
-        if self.tail >= self.head {
-            let offset = self.start + self.head as u64;
-            if offset < self.length {
-                self.fill_buffer(offset)?;
-            }
-        }
-        Ok(&self.buffer[self.tail..self.head])
-    }
-
-    fn consume(&mut self, amount: usize) {
-        self.tail += amount;
-        debug_assert!(self.tail <= self.head);
-    }
-}
-
-const fn b(x: i32) -> u16 {
-    1 << x
-}
-
-static AH: [[u16; 2]; 4] = [
-    [b(2), b(2) | b(3) | b(6) | b(7)],
-    [b(3), b(0) | b(1) | b(4) | b(5)],
-    [b(4) | b(7), b(8) | b(9) | b(12) | b(13)],
-    [b(5) | b(6), b(10) | b(11) | b(14) | b(15)],
-];
-
-static AL: [[u16; 2]; 4] = [
-    [b(0) | b(3) | b(12) | b(15), b(0) | b(1) | b(4) | b(5)],
-    [b(1) | b(2) | b(13) | b(14), b(2) | b(3) | b(6) | b(7)],
-    [b(4) | b(7) | b(8) | b(11), b(8) | b(9) | b(12) | b(13)],
-    [b(5) | b(6) | b(9) | b(10), b(10) | b(11) | b(14) | b(15)],
-];
-
-static BH: [[u16; 2]; 4] = [
-    [b(2), b(1) | b(3) | b(9) | b(11)],
-    [b(3), b(0) | b(2) | b(8) | b(10)],
-    [b(4) | b(7), b(4) | b(6) | b(12) | b(14)],
-    [b(5) | b(6), b(5) | b(7) | b(13) | b(15)],
-];
-
-static BL: [[u16; 2]; 4] = [
-    [b(0) | b(3) | b(12) | b(15), b(0) | b(2) | b(8) | b(10)],
-    [b(1) | b(2) | b(13) | b(14), b(1) | b(3) | b(9) | b(11)],
-    [b(4) | b(7) | b(8) | b(11), b(4) | b(6) | b(12) | b(14)],
-    [b(5) | b(6) | b(9) | b(10), b(5) | b(7) | b(13) | b(15)],
-];
-
-fn decode_nibble(table: &[[u16; 2]; 4], nibble: u8) -> u16 {
-    for section in table.iter() {
-        if section[0] & (1 << nibble) != 0 {
-            return section[1];
-        }
-    }
-    0
-}
-
-fn find_1bit(x: u16) -> Option<u8> {
-    x.is_power_of_two().then(|| x.trailing_zeros() as u8)
-}
-
-fn decode_pair(a: u8, b: u8) -> Option<u8> {
-    let x = find_1bit(decode_nibble(&AH, a >> 4) & decode_nibble(&BH, b >> 4))?;
-    let y = find_1bit(decode_nibble(&AL, a & 15) & decode_nibble(&BL, b & 15))?;
-    Some((x << 4) | y)
-}
-
-fn encode_nibble(table: &[[u16; 2]; 4], nibble: u8) -> Vec<u8> {
-    for section in table.iter() {
-        if section[1] & (1 << nibble) != 0 {
-            let mut outputs = Vec::with_capacity(4);
-            let mut bits = section[0];
-            while bits != 0 {
-                outputs.push(bits.trailing_zeros() as u8);
-                bits &= bits - 1;
-            }
-            return outputs;
-        }
-    }
-    unreachable!()
-}
-
-fn encode_byte(hi_table: &[[u16; 2]; 4], lo_table: &[[u16; 2]; 4], byte: u8) -> Vec<char> {
-    let hi_variants = encode_nibble(hi_table, byte >> 4);
-    let lo_variants = encode_nibble(lo_table, byte & 15);
-    let mut variants = Vec::with_capacity(hi_variants.len() * lo_variants.len());
-    for hi in hi_variants.iter().copied() {
-        for lo in lo_variants.iter().copied() {
-            let byte = (hi << 4) | lo;
-            if byte != 127 {
-                variants.push(byte as char);
-            }
-        }
-    }
-    variants
-}
-
-/// An encoded password.
-///
-/// SPSS calls these "encrypted passwords", but they are not encrypted.  They
-/// are encoded with a simple scheme, analogous to base64 encoding but
-/// one-to-many: any plaintext password maps to many possible encoded passwords.
-///
-/// The encoding scheme maps each plaintext password byte to 2 ASCII characters,
-/// using only at most the first 10 bytes of the plaintext password.  Thus, an
-/// encoded password is always a multiple of 2 characters long, and never longer
-/// than 20 characters.  The characters in an encoded password are always in the
-/// graphic ASCII range 33 through 126.  Each successive pair of characters in
-/// the password encodes a single byte in the plaintext password.
-///
-/// This struct supports both encoding and decoding passwords.
-#[derive(Clone, Debug)]
-pub struct EncodedPassword(Vec<Vec<char>>);
-
-impl EncodedPassword {
-    /// Creates an [EncodedPassword] from an already-encoded password `encoded`.
-    /// Returns `None` if `encoded` is not a valid encoded password.
-    pub fn from_encoded(encoded: &[u8]) -> Option<Self> {
-        if encoded.len() > 20
-            || encoded.len() % 2 != 0
-            || !encoded.iter().all(|byte| (32..=127).contains(byte))
-        {
-            return None;
-        }
-
-        Some(EncodedPassword(
-            encoded.iter().map(|byte| vec![*byte as char]).collect(),
-        ))
-    }
-
-    /// Returns an [EncodedPassword] as an encoded version of the given
-    /// `plaintext` password.  Only the first 10 bytes, at most, of the
-    /// plaintext password is used.
-    pub fn from_plaintext(plaintext: &[u8]) -> EncodedPassword {
-        let input = plaintext.get(..10).unwrap_or(plaintext);
-        EncodedPassword(
-            input
-                .iter()
-                .copied()
-                .flat_map(|byte| [encode_byte(&AH, &AL, byte), encode_byte(&BH, &BL, byte)])
-                .collect(),
-        )
-    }
-
-    /// Returns the number of variations of this encoded password.
-    ///
-    /// An [EncodedPassword] created by [EncodedPassword::from_plaintext] has
-    /// many variations: between `16**n` and `32**n` for an `n`-byte plaintext
-    /// password, so up to `32**10` (about 1e15) for the 10-byte longest
-    /// plaintext passwords.
-    ///
-    /// An [EncodedPassword] created by [EncodedPassword::from_encoded] has only
-    /// a single variation, the one passed in by that function.
-    pub fn n_variants(&self) -> u64 {
-        self.0
-            .iter()
-            .map(|variants| variants.len() as u64)
-            .product()
-    }
-
-    /// Returns one variation of this encoded password, numbered `index`.  All
-    /// variations decode the same way.
-    pub fn variant(&self, mut index: u64) -> String {
-        let mut output = String::with_capacity(20);
-        for variants in &self.0 {
-            let n = variants.len() as u64;
-            output.push(variants[(index % n) as usize]);
-            index /= n;
-        }
-        output
-    }
-
-    /// Returns the decoded version of this encoded password.
-    pub fn decode(&self) -> SmallVec<[u8; 10]> {
-        let mut output = SmallVec::new();
-        for [a, b] in self.0.as_chunks::<2>().0 {
-            output.push(decode_pair(a[0] as u8, b[0] as u8).unwrap());
-        }
-        output
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use std::{io::Cursor, path::Path};
-
-    use crate::crypto::{EncodedPassword, EncryptedFile, FileType};
-
-    fn test_decrypt(input_name: &Path, expected_name: &Path, password: &str, file_type: FileType) {
-        let input_filename = Path::new("src/crypto/testdata").join(input_name);
-        let input = std::fs::read(&input_filename).unwrap();
-        let mut cursor = Cursor::new(&input);
-        let file = EncryptedFile::new(&mut cursor).unwrap();
-        assert_eq!(file.file_type(), file_type);
-        let mut reader = file.unlock_literal(password.as_bytes()).unwrap();
-        assert_eq!(reader.file_type(), file_type);
-        let mut actual = Vec::new();
-        std::io::copy(&mut reader, &mut actual).unwrap();
-
-        let expected_filename = Path::new("src/crypto/testdata").join(expected_name);
-        let expected = std::fs::read(&expected_filename).unwrap();
-        if actual != expected {
-            panic!();
-        }
-    }
-
-    #[test]
-    fn sys_file() {
-        test_decrypt(
-            Path::new("test-encrypted.sav"),
-            Path::new("test.sav"),
-            "pspp",
-            FileType::Data,
-        );
-    }
-
-    #[test]
-    fn syntax_file() {
-        test_decrypt(
-            Path::new("test-encrypted.sps"),
-            Path::new("test.sps"),
-            "password",
-            FileType::Syntax,
-        );
-    }
-
-    #[test]
-    fn spv_file() {
-        test_decrypt(
-            Path::new("test-encrypted.spv"),
-            Path::new("test.spv"),
-            "Password1",
-            FileType::Viewer,
-        );
-    }
-
-    #[test]
-    fn password_encoding() {
-        // Decode a few specific passwords.
-        assert_eq!(
-            EncodedPassword::from_encoded(b"-|")
-                .unwrap()
-                .decode()
-                .as_slice(),
-            b"b"
-        );
-        assert_eq!(
-            EncodedPassword::from_encoded(b" A")
-                .unwrap()
-                .decode()
-                .as_slice(),
-            b"a"
-        );
-
-        // Check that the encoding and decoding algorithms are inverses
-        // for individual characters at least.
-        for plaintext in 0..=255 {
-            let encoded = EncodedPassword::from_plaintext(&[plaintext]);
-            for variant in 0..encoded.n_variants() {
-                let encoded_variant = encoded.variant(variant);
-                let decoded = EncodedPassword::from_encoded(encoded_variant.as_bytes())
-                    .unwrap()
-                    .decode();
-                assert_eq!(&[plaintext], decoded.as_slice());
-            }
-        }
-    }
-}
diff --git a/rust/pspp/src/format.rs b/rust/pspp/src/format.rs

new file mode 100644 (file)

index 0000000..43ba519
--- /dev/null
+++ b/rust/pspp/src/format.rs
@@ -0,0 +1,1390 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::{
+    fmt::{Debug, Display, Formatter, Result as FmtResult, Write},
+    ops::{Not, RangeInclusive},
+    str::{Chars, FromStr},
+    sync::LazyLock,
+};
+
+use chrono::{Datelike, Local};
+use enum_iterator::{all, Sequence};
+use enum_map::{Enum, EnumMap};
+use serde::{Deserialize, Serialize};
+use thiserror::Error as ThisError;
+use unicode_width::UnicodeWidthStr;
+
+use crate::{
+    data::{ByteString, Datum},
+    sys::raw,
+    util::ToSmallString,
+    variable::{VarType, VarWidth},
+};
+
+mod display;
+mod parse;
+pub use display::{DisplayDatum, DisplayPlain, DisplayPlainF64};
+
+#[derive(Clone, ThisError, Debug, PartialEq, Eq)]
+pub enum Error {
+    #[error("Unknown format type {value}.")]
+    UnknownFormat { value: u16 },
+
+    #[error("Output format {0} specifies width {}, but {} requires an even width.", .0.w, .0.type_)]
+    OddWidthNotAllowed(UncheckedFormat),
+
+    #[error("Output format {0} specifies width {}, but {} requires a width between {} and {}.", .0.w, .0.type_, .0.type_.min_width(), .0.type_.max_width())]
+    BadWidth(UncheckedFormat),
+
+    #[error("Output format {0} specifies decimal places, but {} format does not allow any decimals.", .0.type_)]
+    DecimalsNotAllowedForFormat(UncheckedFormat),
+
+    #[error("Output format {0} specifies {} decimal places, but with a width of {}, {} does not allow any decimal places.", .0.d, .0.w, .0.type_)]
+    DecimalsNotAllowedForWidth(UncheckedFormat),
+
+    #[error("Output format {spec} specifies {} decimal places but, with a width of {}, {} allows at most {max_d} decimal places.", .spec.d, .spec.w, .spec.type_)]
+    TooManyDecimalsForWidth {
+        spec: UncheckedFormat,
+        max_d: Decimals,
+    },
+
+    #[error("String variable is not compatible with numeric format {0}.")]
+    UnnamedVariableNotCompatibleWithNumericFormat(Type),
+
+    #[error("Numeric variable is not compatible with string format {0}.")]
+    UnnamedVariableNotCompatibleWithStringFormat(Type),
+
+    #[error("String variable {variable} with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
+    NamedStringVariableBadSpecWidth {
+        variable: String,
+        width: Width,
+        bad_spec: Format,
+        good_spec: Format,
+    },
+
+    #[error("String variable with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
+    UnnamedStringVariableBadSpecWidth {
+        width: Width,
+        bad_spec: Format,
+        good_spec: Format,
+    },
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub enum Category {
+    // Numeric formats.
+    Basic,
+    Custom,
+    Legacy,
+    Binary,
+    Hex,
+    Date,
+    Time,
+    DateComponent,
+
+    // String formats.
+    String,
+}
+
+impl From<Type> for Category {
+    fn from(source: Type) -> Self {
+        match source {
+            Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => Self::Basic,
+            Type::CC(_) => Self::Custom,
+            Type::N | Type::Z => Self::Legacy,
+            Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => Self::Binary,
+            Type::PIBHex | Type::RBHex => Self::Hex,
+            Type::Date
+            | Type::ADate
+            | Type::EDate
+            | Type::JDate
+            | Type::SDate
+            | Type::QYr
+            | Type::MoYr
+            | Type::WkYr
+            | Type::DateTime
+            | Type::YmdHms => Self::Date,
+            Type::MTime | Type::Time | Type::DTime => Self::Time,
+            Type::WkDay | Type::Month => Self::DateComponent,
+            Type::A | Type::AHex => Self::String,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Hash, Sequence, Serialize)]
+pub enum CC {
+    A,
+    B,
+    C,
+    D,
+    E,
+}
+
+impl CC {
+    pub fn as_string(&self) -> &'static str {
+        match self {
+            CC::A => "A",
+            CC::B => "B",
+            CC::C => "C",
+            CC::D => "D",
+            CC::E => "E",
+        }
+    }
+}
+
+impl Display for CC {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}", self.as_string())
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Sequence, Serialize)]
+pub enum Type {
+    // Basic numeric formats.
+    F,
+    Comma,
+    Dot,
+    Dollar,
+    Pct,
+    E,
+
+    // Custom currency formats.
+    CC(CC),
+
+    // Legacy numeric formats.
+    N,
+    Z,
+
+    // Binary and hexadecimal formats.
+    P,
+    PK,
+    IB,
+    PIB,
+    PIBHex,
+    RB,
+    RBHex,
+
+    // Time and date formats.
+    Date,
+    ADate,
+    EDate,
+    JDate,
+    SDate,
+    QYr,
+    MoYr,
+    WkYr,
+    DateTime,
+    YmdHms,
+    MTime,
+    Time,
+    DTime,
+
+    // Date component formats.
+    WkDay,
+    Month,
+
+    // String formats.
+    A,
+    AHex,
+}
+
+pub type Width = u16;
+pub type SignedWidth = i16;
+
+pub type Decimals = u8;
+
+impl Type {
+    pub fn max_width(self) -> Width {
+        match self {
+            Self::P | Self::PK | Self::PIBHex | Self::RBHex => 16,
+            Self::IB | Self::PIB | Self::RB => 8,
+            Self::A => 32767,
+            Self::AHex => 32767 * 2,
+            _ => 40,
+        }
+    }
+
+    pub fn min_width(self) -> Width {
+        match self {
+            // Basic numeric formats.
+            Self::F => 1,
+            Self::Comma => 1,
+            Self::Dot => 1,
+            Self::Dollar => 2,
+            Self::Pct => 2,
+            Self::E => 6,
+
+            // Custom currency formats.
+            Self::CC(_) => 2,
+
+            // Legacy numeric formats.
+            Self::N => 1,
+            Self::Z => 1,
+
+            // Binary and hexadecimal formats.
+            Self::P => 1,
+            Self::PK => 1,
+            Self::IB => 1,
+            Self::PIB => 1,
+            Self::PIBHex => 2,
+            Self::RB => 2,
+            Self::RBHex => 4,
+
+            // Time and date formats.
+            Self::Date => 9,
+            Self::ADate => 8,
+            Self::EDate => 8,
+            Self::JDate => 5,
+            Self::SDate => 8,
+            Self::QYr => 6,
+            Self::MoYr => 6,
+            Self::WkYr => 8,
+            Self::DateTime => 17,
+            Self::YmdHms => 16,
+            Self::MTime => 5,
+            Self::Time => 5,
+            Self::DTime => 8,
+
+            // Date component formats.
+            Self::WkDay => 2,
+            Self::Month => 3,
+
+            // String formats.
+            Self::A => 1,
+            Self::AHex => 2,
+        }
+    }
+
+    pub fn width_range(self) -> RangeInclusive<Width> {
+        self.min_width()..=self.max_width()
+    }
+
+    pub fn max_decimals(self, width: Width) -> Decimals {
+        let width = width.clamp(1, 40) as SignedWidth;
+        let max = match self {
+            Self::F | Self::Comma | Self::Dot | Self::CC(_) => width - 1,
+            Self::Dollar | Self::Pct => width - 2,
+            Self::E => width - 7,
+            Self::N | Self::Z => width,
+            Self::P => width * 2 - 1,
+            Self::PK => width * 2,
+            Self::IB | Self::PIB => max_digits_for_bytes(width as usize) as SignedWidth,
+            Self::PIBHex => 0,
+            Self::RB | Self::RBHex => 16,
+            Self::Date
+            | Self::ADate
+            | Self::EDate
+            | Self::JDate
+            | Self::SDate
+            | Self::QYr
+            | Self::MoYr
+            | Self::WkYr => 0,
+            Self::DateTime => width - 21,
+            Self::YmdHms => width - 20,
+            Self::MTime => width - 6,
+            Self::Time => width - 9,
+            Self::DTime => width - 12,
+            Self::WkDay | Self::Month | Self::A | Self::AHex => 0,
+        };
+        max.clamp(0, 16) as Decimals
+    }
+
+    pub fn takes_decimals(self) -> bool {
+        self.max_decimals(Width::MAX) > 0
+    }
+
+    pub fn category(self) -> Category {
+        self.into()
+    }
+
+    pub fn width_step(self) -> Width {
+        if self.category() == Category::Hex || self == Self::AHex {
+            2
+        } else {
+            1
+        }
+    }
+
+    pub fn clamp_width(self, width: Width) -> Width {
+        let (min, max) = self.width_range().into_inner();
+        let width = width.clamp(min, max);
+        if self.width_step() == 2 {
+            width / 2 * 2
+        } else {
+            width
+        }
+    }
+
+    pub fn var_type(self) -> VarType {
+        match self {
+            Self::A | Self::AHex => VarType::String,
+            _ => VarType::Numeric,
+        }
+    }
+
+    /// Checks whether this format is valid for a variable with the given
+    /// `var_type`.
+    pub fn check_type_compatibility(self, var_type: VarType) -> Result<(), Error> {
+        let my_type = self.var_type();
+        match (my_type, var_type) {
+            (VarType::Numeric, VarType::String) => {
+                Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self))
+            }
+            (VarType::String, VarType::Numeric) => {
+                Err(Error::UnnamedVariableNotCompatibleWithStringFormat(self))
+            }
+            _ => Ok(()),
+        }
+    }
+
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::F => "F",
+            Self::Comma => "COMMA",
+            Self::Dot => "DOT",
+            Self::Dollar => "DOLLAR",
+            Self::Pct => "PCT",
+            Self::E => "E",
+            Self::CC(CC::A) => "CCA",
+            Self::CC(CC::B) => "CCB",
+            Self::CC(CC::C) => "CCC",
+            Self::CC(CC::D) => "CCD",
+            Self::CC(CC::E) => "CCE",
+            Self::N => "N",
+            Self::Z => "Z",
+            Self::P => "P",
+            Self::PK => "PK",
+            Self::IB => "IB",
+            Self::PIB => "PIB",
+            Self::PIBHex => "PIBHEX",
+            Self::RB => "RB",
+            Self::RBHex => "RBHEX",
+            Self::Date => "DATE",
+            Self::ADate => "ADATE",
+            Self::EDate => "EDATE",
+            Self::JDate => "JDATE",
+            Self::SDate => "SDATE",
+            Self::QYr => "QYR",
+            Self::MoYr => "MOYR",
+            Self::WkYr => "WKYR",
+            Self::DateTime => "DATETIME",
+            Self::YmdHms => "YMDHMS",
+            Self::MTime => "MTIME",
+            Self::Time => "TIME",
+            Self::DTime => "DTIME",
+            Self::WkDay => "WKDAY",
+            Self::Month => "MONTH",
+            Self::A => "A",
+            Self::AHex => "AHEX",
+        }
+    }
+
+    pub fn default_value(&self) -> Datum<ByteString> {
+        match self.var_type() {
+            VarType::Numeric => Datum::sysmis(),
+            VarType::String => Datum::String(ByteString::default()),
+        }
+    }
+}
+
+impl Display for Type {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+impl FromStr for Type {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        for type_ in all::<Type>() {
+            if type_.as_str().eq_ignore_ascii_case(s) {
+                return Ok(type_);
+            }
+        }
+        Err(())
+    }
+}
+
+fn max_digits_for_bytes(bytes: usize) -> usize {
+    *[0, 3, 5, 8, 10, 13, 15, 17].get(bytes).unwrap_or(&20)
+}
+
+#[derive(Debug, PartialEq, Eq, Hash)]
+pub struct AbstractFormat {
+    pub name: String,
+    w: Width,
+    d: Decimals,
+}
+
+fn split<F>(s: &str, predicate: F) -> (&str, &str)
+where
+    F: Fn(&char) -> bool,
+{
+    let rest = s.trim_start_matches(|c| predicate(&c));
+    let start = &s[..s.len() - rest.len()];
+    (start, rest)
+}
+
+impl FromStr for AbstractFormat {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let (name, s) = split(s, char::is_ascii_alphabetic);
+        if name.is_empty() {
+            return Err(());
+        }
+
+        let (w, s) = split(s, char::is_ascii_digit);
+        let Ok(w) = w.parse() else {
+            return Err(());
+        };
+
+        let (d, rest) = if let Some(s) = s.strip_prefix('.') {
+            let (d, rest) = split(s, char::is_ascii_digit);
+            let Ok(d) = d.parse() else {
+                return Err(());
+            };
+            (d, rest)
+        } else {
+            (0, s)
+        };
+
+        if !rest.is_empty() {
+            return Err(());
+        }
+        Ok(Self {
+            name: name.into(),
+            w,
+            d,
+        })
+    }
+}
+
+impl TryFrom<AbstractFormat> for UncheckedFormat {
+    type Error = ();
+
+    fn try_from(value: AbstractFormat) -> Result<Self, Self::Error> {
+        Ok(UncheckedFormat::new(value.name.parse()?, value.w, value.d))
+    }
+}
+
+#[derive(Copy, Clone, PartialEq, Eq, Hash)]
+pub struct Format {
+    type_: Type,
+    w: Width,
+    d: Decimals,
+}
+
+impl Serialize for Format {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        self.to_small_string::<16>().serialize(serializer)
+    }
+}
+
+impl Format {
+    pub const F40: Format = Format {
+        type_: Type::F,
+        w: 40,
+        d: 0,
+    };
+
+    pub const F40_1: Format = Format {
+        type_: Type::F,
+        w: 40,
+        d: 1,
+    };
+
+    pub const F40_2: Format = Format {
+        type_: Type::F,
+        w: 40,
+        d: 2,
+    };
+
+    pub const F40_3: Format = Format {
+        type_: Type::F,
+        w: 40,
+        d: 3,
+    };
+
+    pub const PCT40_1: Format = Format {
+        type_: Type::Pct,
+        w: 40,
+        d: 1,
+    };
+
+    pub const F8_2: Format = Format {
+        type_: Type::F,
+        w: 8,
+        d: 2,
+    };
+
+    pub const DATETIME40_0: Format = Format {
+        type_: Type::DateTime,
+        w: 40,
+        d: 0,
+    };
+
+    pub fn type_(self) -> Type {
+        self.type_
+    }
+    pub fn w(self) -> usize {
+        self.w as usize
+    }
+    pub fn d(self) -> usize {
+        self.d as usize
+    }
+
+    pub fn new(type_: Type, w: Width, d: Decimals) -> Option<Self> {
+        UncheckedFormat { type_, w, d }.try_into().ok()
+    }
+
+    pub fn default_for_width(var_width: VarWidth) -> Self {
+        match var_width {
+            VarWidth::Numeric => Format {
+                type_: Type::F,
+                w: 8,
+                d: 2,
+            },
+            VarWidth::String(w) => Format {
+                type_: Type::A,
+                w,
+                d: 0,
+            },
+        }
+    }
+
+    pub fn fixed_from(source: &UncheckedFormat) -> Self {
+        let UncheckedFormat {
+            type_: format,
+            w,
+            d,
+        } = *source;
+        let (min, max) = format.width_range().into_inner();
+        let mut w = w.clamp(min, max);
+        if d <= format.max_decimals(Width::MAX) {
+            while d > format.max_decimals(w) {
+                w += 1;
+                assert!(w <= 40);
+            }
+        }
+        let d = d.clamp(0, format.max_decimals(w));
+        Self {
+            type_: format,
+            w,
+            d,
+        }
+    }
+
+    pub fn var_width(self) -> VarWidth {
+        match self.type_ {
+            Type::A => VarWidth::String(self.w),
+            Type::AHex => VarWidth::String(self.w / 2),
+            _ => VarWidth::Numeric,
+        }
+    }
+
+    pub fn var_type(self) -> VarType {
+        self.type_.var_type()
+    }
+
+    /// Checks whether this format specification is valid for a variable with
+    /// width `var_width`.
+    pub fn check_width_compatibility(self, var_width: VarWidth) -> Result<Self, Error> {
+        // Verify that the format is right for the variable's type.
+        self.type_.check_type_compatibility(var_width.into())?;
+
+        if let VarWidth::String(w) = var_width {
+            if var_width != self.var_width() {
+                let bad_spec = self;
+                let good_spec = if self.type_ == Type::A {
+                    Format { w, ..self }
+                } else {
+                    Format { w: w * 2, ..self }
+                };
+                return Err(Error::UnnamedStringVariableBadSpecWidth {
+                    width: w,
+                    bad_spec,
+                    good_spec,
+                });
+            }
+        }
+
+        Ok(self)
+    }
+
+    pub fn default_value(&self) -> Datum<ByteString> {
+        match self.var_width() {
+            VarWidth::Numeric => Datum::sysmis(),
+            VarWidth::String(width) => Datum::String(ByteString::spaces(width as usize)),
+        }
+    }
+
+    pub fn resize(&mut self, width: VarWidth) {
+        match (self.var_width(), width) {
+            (VarWidth::Numeric, VarWidth::Numeric) => {}
+            (VarWidth::String(_), VarWidth::String(new_width)) => {
+                self.w = if self.type_ == Type::AHex {
+                    new_width * 2
+                } else {
+                    new_width
+                };
+            }
+            _ => *self = Self::default_for_width(width),
+        }
+    }
+
+    pub fn codepage_to_unicode(&mut self) {
+        let mut width = self.var_width();
+        width.codepage_to_unicode();
+        if let Some(width) = width.as_string_width() {
+            if self.type_ == Type::AHex {
+                self.w = width as u16 * 2;
+            } else {
+                self.w = width as u16;
+            }
+        }
+    }
+}
+
+impl Debug for Format {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "{self}")
+    }
+}
+
+impl Display for Format {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}{}", self.type_, self.w)?;
+        if self.type_.takes_decimals() || self.d > 0 {
+            write!(f, ".{}", self.d)?;
+        }
+        Ok(())
+    }
+}
+
+impl TryFrom<UncheckedFormat> for Format {
+    type Error = Error;
+
+    fn try_from(source: UncheckedFormat) -> Result<Self, Self::Error> {
+        let UncheckedFormat {
+            type_: format,
+            w,
+            d,
+        } = source;
+        let max_d = format.max_decimals(w);
+        if w % format.width_step() != 0 {
+            Err(Error::OddWidthNotAllowed(source))
+        } else if !format.width_range().contains(&w) {
+            Err(Error::BadWidth(source))
+        } else if d > max_d {
+            if format.takes_decimals() {
+                Err(Error::DecimalsNotAllowedForFormat(source))
+            } else if max_d > 0 {
+                Err(Error::TooManyDecimalsForWidth {
+                    spec: source,
+                    max_d,
+                })
+            } else {
+                Err(Error::DecimalsNotAllowedForWidth(source))
+            }
+        } else {
+            Ok(Format {
+                type_: format,
+                w,
+                d,
+            })
+        }
+    }
+}
+
+impl From<Type> for u16 {
+    fn from(source: Type) -> Self {
+        match source {
+            Type::A => 1,
+            Type::AHex => 2,
+            Type::Comma => 3,
+            Type::Dollar => 4,
+            Type::F => 5,
+            Type::IB => 6,
+            Type::PIBHex => 7,
+            Type::P => 8,
+            Type::PIB => 9,
+            Type::PK => 10,
+            Type::RB => 11,
+            Type::RBHex => 12,
+            Type::Z => 15,
+            Type::N => 16,
+            Type::E => 17,
+            Type::Date => 20,
+            Type::Time => 21,
+            Type::DateTime => 22,
+            Type::ADate => 23,
+            Type::JDate => 24,
+            Type::DTime => 25,
+            Type::WkDay => 26,
+            Type::Month => 27,
+            Type::MoYr => 28,
+            Type::QYr => 29,
+            Type::WkYr => 30,
+            Type::Pct => 31,
+            Type::Dot => 32,
+            Type::CC(CC::A) => 33,
+            Type::CC(CC::B) => 34,
+            Type::CC(CC::C) => 35,
+            Type::CC(CC::D) => 36,
+            Type::CC(CC::E) => 37,
+            Type::EDate => 38,
+            Type::SDate => 39,
+            Type::MTime => 40,
+            Type::YmdHms => 41,
+        }
+    }
+}
+
+impl TryFrom<u16> for Type {
+    type Error = Error;
+
+    fn try_from(source: u16) -> Result<Self, Self::Error> {
+        match source {
+            1 => Ok(Self::A),
+            2 => Ok(Self::AHex),
+            3 => Ok(Self::Comma),
+            4 => Ok(Self::Dollar),
+            5 => Ok(Self::F),
+            6 => Ok(Self::IB),
+            7 => Ok(Self::PIBHex),
+            8 => Ok(Self::P),
+            9 => Ok(Self::PIB),
+            10 => Ok(Self::PK),
+            11 => Ok(Self::RB),
+            12 => Ok(Self::RBHex),
+            15 => Ok(Self::Z),
+            16 => Ok(Self::N),
+            17 => Ok(Self::E),
+            20 => Ok(Self::Date),
+            21 => Ok(Self::Time),
+            22 => Ok(Self::DateTime),
+            23 => Ok(Self::ADate),
+            24 => Ok(Self::JDate),
+            25 => Ok(Self::DTime),
+            26 => Ok(Self::WkDay),
+            27 => Ok(Self::Month),
+            28 => Ok(Self::MoYr),
+            29 => Ok(Self::QYr),
+            30 => Ok(Self::WkYr),
+            31 => Ok(Self::Pct),
+            32 => Ok(Self::Dot),
+            33 => Ok(Self::CC(CC::A)),
+            34 => Ok(Self::CC(CC::B)),
+            35 => Ok(Self::CC(CC::C)),
+            36 => Ok(Self::CC(CC::D)),
+            37 => Ok(Self::CC(CC::E)),
+            38 => Ok(Self::EDate),
+            39 => Ok(Self::SDate),
+            40 => Ok(Self::MTime),
+            41 => Ok(Self::YmdHms),
+            _ => Err(Error::UnknownFormat { value: source }),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
+pub struct UncheckedFormat {
+    pub type_: Type,
+
+    pub w: Width,
+
+    pub d: Decimals,
+}
+
+impl UncheckedFormat {
+    pub fn new(type_: Type, w: Width, d: Decimals) -> Self {
+        Self { type_, w, d }
+    }
+    pub fn fix(&self) -> Format {
+        Format::fixed_from(self)
+    }
+}
+
+impl TryFrom<raw::records::RawFormat> for UncheckedFormat {
+    type Error = Error;
+
+    fn try_from(raw: raw::records::RawFormat) -> Result<Self, Self::Error> {
+        let raw = raw.0;
+        let raw_format = (raw >> 16) as u16;
+        let format = raw_format.try_into()?;
+        let w = ((raw >> 8) & 0xff) as Width;
+        let d = (raw & 0xff) as Decimals;
+        Ok(Self {
+            type_: format,
+            w,
+            d,
+        })
+    }
+}
+
+impl Display for UncheckedFormat {
+    fn fmt(&self, f: &mut Formatter) -> FmtResult {
+        write!(f, "{}{}", self.type_, self.w)?;
+        if self.type_.takes_decimals() || self.d > 0 {
+            write!(f, ".{}", self.d)?;
+        }
+        Ok(())
+    }
+}
+
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum Decimal {
+    #[default]
+    Dot,
+    Comma,
+}
+
+impl Decimal {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Decimal::Dot => ".",
+            Decimal::Comma => ",",
+        }
+    }
+}
+
+impl From<Decimal> for char {
+    fn from(value: Decimal) -> Self {
+        u8::from(value).into()
+    }
+}
+
+impl From<Decimal> for u8 {
+    fn from(value: Decimal) -> Self {
+        match value {
+            Decimal::Dot => b'.',
+            Decimal::Comma => b',',
+        }
+    }
+}
+
+impl TryFrom<char> for Decimal {
+    type Error = ();
+
+    fn try_from(c: char) -> Result<Self, Self::Error> {
+        match c {
+            '.' => Ok(Self::Dot),
+            ',' => Ok(Self::Comma),
+            _ => Err(()),
+        }
+    }
+}
+
+impl Not for Decimal {
+    type Output = Self;
+
+    fn not(self) -> Self::Output {
+        match self {
+            Self::Dot => Self::Comma,
+            Self::Comma => Self::Dot,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
+pub struct Epoch(pub i32);
+
+impl Epoch {
+    /// Applies the epoch to `year`:
+    ///
+    /// - If `year` is 2 digits (between 0 and 99, inclusive), returns it
+    ///   converted it to the correct year considering the epoch.
+    ///
+    /// - Otherwise, returns `year` unchanged.
+    pub fn apply(&self, year: i32) -> i32 {
+        match year {
+            0..=99 => {
+                let century = self.0 / 100 * 100;
+                let offset = self.0 - century;
+                if year >= offset {
+                    year + century
+                } else {
+                    year + century + 100
+                }
+            }
+            other => other,
+        }
+    }
+}
+
+impl Default for Epoch {
+    fn default() -> Self {
+        static DEFAULT: LazyLock<Epoch> = LazyLock::new(|| Epoch(Local::now().year() - 69));
+        *DEFAULT
+    }
+}
+
+impl Display for Epoch {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "{}", self.0)
+    }
+}
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct Settings {
+    pub epoch: Epoch,
+
+    /// Either `'.'` or `','`.
+    pub decimal: Decimal,
+
+    /// Format `F`, `E`, `COMMA`, and `DOT` with leading zero (e.g. `0.5`
+    /// instead of `.5`)?
+    pub leading_zero: bool,
+
+    /// Custom currency styles.
+    pub ccs: EnumMap<CC, Option<Box<NumberStyle>>>,
+}
+
+#[derive(Copy, Clone, Enum)]
+struct StyleParams {
+    decimal: Decimal,
+    leading_zero: bool,
+}
+impl From<&Settings> for StyleParams {
+    fn from(value: &Settings) -> Self {
+        Self {
+            decimal: value.decimal,
+            leading_zero: value.leading_zero,
+        }
+    }
+}
+
+struct StyleSet(EnumMap<StyleParams, NumberStyle>);
+
+impl StyleSet {
+    fn new(f: impl Fn(StyleParams) -> NumberStyle) -> Self {
+        Self(EnumMap::from_fn(f))
+    }
+    fn get(&self, settings: &Settings) -> &NumberStyle {
+        &self.0[settings.into()]
+    }
+}
+
+impl Settings {
+    pub fn with_cc(mut self, cc: CC, style: NumberStyle) -> Self {
+        self.ccs[cc] = Some(Box::new(style));
+        self
+    }
+    pub fn with_leading_zero(self, leading_zero: bool) -> Self {
+        Self {
+            leading_zero,
+            ..self
+        }
+    }
+    pub fn with_epoch(self, epoch: Epoch) -> Self {
+        Self { epoch, ..self }
+    }
+    pub fn number_style(&self, type_: Type) -> &NumberStyle {
+        static DEFAULT: LazyLock<NumberStyle> =
+            LazyLock::new(|| NumberStyle::new("", "", Decimal::Dot, None, false));
+
+        match type_ {
+            Type::F | Type::E => {
+                static F: LazyLock<StyleSet> = LazyLock::new(|| {
+                    StyleSet::new(|p| NumberStyle::new("", "", p.decimal, None, p.leading_zero))
+                });
+                F.get(self)
+            }
+            Type::Comma => {
+                static COMMA: LazyLock<StyleSet> = LazyLock::new(|| {
+                    StyleSet::new(|p| {
+                        NumberStyle::new("", "", p.decimal, Some(!p.decimal), p.leading_zero)
+                    })
+                });
+                COMMA.get(self)
+            }
+            Type::Dot => {
+                static DOT: LazyLock<StyleSet> = LazyLock::new(|| {
+                    StyleSet::new(|p| {
+                        NumberStyle::new("", "", !p.decimal, Some(p.decimal), p.leading_zero)
+                    })
+                });
+                DOT.get(self)
+            }
+            Type::Dollar => {
+                static DOLLAR: LazyLock<StyleSet> = LazyLock::new(|| {
+                    StyleSet::new(|p| NumberStyle::new("$", "", p.decimal, Some(!p.decimal), false))
+                });
+                DOLLAR.get(self)
+            }
+            Type::Pct => {
+                static PCT: LazyLock<StyleSet> = LazyLock::new(|| {
+                    StyleSet::new(|p| NumberStyle::new("", "%", p.decimal, None, false))
+                });
+                PCT.get(self)
+            }
+            Type::CC(cc) => self.ccs[cc].as_deref().unwrap_or(&DEFAULT),
+            Type::N
+            | Type::Z
+            | Type::P
+            | Type::PK
+            | Type::IB
+            | Type::PIB
+            | Type::PIBHex
+            | Type::RB
+            | Type::RBHex
+            | Type::Date
+            | Type::ADate
+            | Type::EDate
+            | Type::JDate
+            | Type::SDate
+            | Type::QYr
+            | Type::MoYr
+            | Type::WkYr
+            | Type::DateTime
+            | Type::YmdHms
+            | Type::MTime
+            | Type::Time
+            | Type::DTime
+            | Type::WkDay
+            | Type::Month
+            | Type::A
+            | Type::AHex => &DEFAULT,
+        }
+    }
+}
+
+/// A numeric output style.  This can express numeric formats in
+/// [Category::Basic] and [Category::Custom].
+#[derive(Clone, Debug, Serialize)]
+pub struct NumberStyle {
+    pub neg_prefix: Affix,
+    pub prefix: Affix,
+    pub suffix: Affix,
+    pub neg_suffix: Affix,
+
+    /// Decimal point.
+    pub decimal: Decimal,
+
+    /// Grouping character.
+    pub grouping: Option<Decimal>,
+
+    /// Format as `.5` or `0.5`?
+    pub leading_zero: bool,
+
+    /// An `Affix` may require more bytes than its display width; for example,
+    /// U+00A5 (¥) is 2 bytes in UTF-8 but occupies only one display column.
+    /// This member is the sum of the number of bytes required by all of the
+    /// `Affix` members in this struct, minus their display widths.  Thus, it
+    /// can be used to size memory allocations: for example, the formatted
+    /// result of `CCA20.5` requires no more than `(20 + extra_bytes)` bytes in
+    /// UTF-8.
+    #[serde(skip)]
+    pub extra_bytes: usize,
+}
+
+impl Display for NumberStyle {
+    /// Display this number style in the format used for custom currency.
+    ///
+    /// This format can only accurately represent number styles that include a
+    /// grouping character.  If this number style doesn't, it will pretend that
+    /// the grouping character is the opposite of the decimal point character.
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        let grouping = char::from(!self.decimal);
+        write!(
+            f,
+            "{}{}{}{}{}{}{}",
+            self.neg_prefix.display(grouping),
+            grouping,
+            self.prefix.display(grouping),
+            grouping,
+            self.suffix.display(grouping),
+            grouping,
+            self.neg_suffix.display(grouping),
+        )
+    }
+}
+
+impl NumberStyle {
+    fn new(
+        prefix: &str,
+        suffix: &str,
+        decimal: Decimal,
+        grouping: Option<Decimal>,
+        leading_zero: bool,
+    ) -> Self {
+        // These assertions ensure that zero is correct for `extra_bytes`.
+        debug_assert!(prefix.is_ascii());
+        debug_assert!(suffix.is_ascii());
+
+        Self {
+            neg_prefix: Affix::new("-"),
+            prefix: Affix::new(prefix),
+            suffix: Affix::new(suffix),
+            neg_suffix: Affix::new(""),
+            decimal,
+            grouping,
+            leading_zero,
+            extra_bytes: 0,
+        }
+    }
+
+    fn affix_width(&self) -> usize {
+        self.prefix.width + self.suffix.width
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct Affix {
+    /// String contents of affix.
+    pub s: String,
+
+    #[serde(skip)]
+    /// Display width in columns (see [unicode_width])
+    pub width: usize,
+}
+
+impl Affix {
+    fn new(s: impl Into<String>) -> Self {
+        let s = s.into();
+        Self {
+            width: s.width(),
+            s,
+        }
+    }
+
+    fn extra_bytes(&self) -> usize {
+        self.s.len().checked_sub(self.width).unwrap()
+    }
+
+    fn display(&self, escape: char) -> DisplayAffix<'_> {
+        DisplayAffix {
+            affix: self.s.as_str(),
+            escape,
+        }
+    }
+}
+
+pub struct DisplayAffix<'a> {
+    affix: &'a str,
+    escape: char,
+}
+
+impl Display for DisplayAffix<'_> {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        for c in self.affix.chars() {
+            if c == self.escape {
+                f.write_char('\'')?;
+            }
+            f.write_char(c)?;
+        }
+        Ok(())
+    }
+}
+
+impl FromStr for NumberStyle {
+    type Err = ();
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        fn find_separator(s: &str) -> Option<char> {
+            // Count commas and periods.  There must be exactly three of one or
+            // the other, except that an apostrophe escapes a following comma or
+            // period.
+            let mut n_commas = 0;
+            let mut n_periods = 0;
+            let s = s.as_bytes();
+            for i in 0..s.len() {
+                if i > 0 && s[i - 1] == b'\'' {
+                } else if s[i] == b',' {
+                    n_commas += 1;
+                } else if s[i] == b'.' {
+                    n_periods += 1;
+                }
+            }
+
+            if n_commas == 3 && n_periods != 3 {
+                Some(',')
+            } else if n_periods == 3 && n_commas != 3 {
+                Some('.')
+            } else {
+                None
+            }
+        }
+
+        fn take_cc_token(iter: &mut Chars<'_>, grouping: char) -> Affix {
+            let mut s = String::new();
+            let mut quote = false;
+            for c in iter {
+                if c == '\'' && !quote {
+                    quote = true;
+                } else if c == grouping && !quote {
+                    break;
+                } else {
+                    s.push(c);
+                    quote = false;
+                }
+            }
+            Affix::new(s)
+        }
+
+        let Some(grouping) = find_separator(s) else {
+            return Err(());
+        };
+        let mut iter = s.chars();
+        let neg_prefix = take_cc_token(&mut iter, grouping);
+        let prefix = take_cc_token(&mut iter, grouping);
+        let suffix = take_cc_token(&mut iter, grouping);
+        let neg_suffix = take_cc_token(&mut iter, grouping);
+        let grouping: Decimal = grouping.try_into().unwrap();
+        let decimal = !grouping;
+        let extra_bytes = neg_prefix.extra_bytes()
+            + prefix.extra_bytes()
+            + suffix.extra_bytes()
+            + neg_suffix.extra_bytes();
+        Ok(Self {
+            neg_prefix,
+            prefix,
+            suffix,
+            neg_suffix,
+            decimal,
+            grouping: Some(grouping),
+            leading_zero: false,
+            extra_bytes,
+        })
+    }
+}
+
+/// An item within a [DateTemplate].
+pub struct TemplateItem {
+    /// Character in the template.
+    pub c: char,
+
+    /// Number of repetitions of the character.
+    pub n: usize,
+}
+
+/// A template for date and time formats.
+#[derive(Clone)]
+pub struct DateTemplate(&'static str);
+
+impl DateTemplate {
+    /// Returns a [DateTemplate] used for date and time input and output in a
+    /// field of the given `type_` and `width`.
+    ///
+    /// `width` only affects whether a 2-digit year or a 4-digit year is used,
+    /// that is, whether the returned string contains `yy` or `yyyy`, and
+    /// whether seconds are included, that is, whether the returned string
+    /// contains `:SS`.  A caller that doesn't care whether the returned string
+    /// contains `yy` or `yyyy` or `:SS` can just specify 0 to omit them.
+    pub fn new(type_: Type, width: usize) -> Option<Self> {
+        let (short, long) = match type_ {
+            Type::F
+            | Type::Comma
+            | Type::Dot
+            | Type::Dollar
+            | Type::Pct
+            | Type::E
+            | Type::CC(_)
+            | Type::N
+            | Type::Z
+            | Type::P
+            | Type::PK
+            | Type::IB
+            | Type::PIB
+            | Type::PIBHex
+            | Type::RB
+            | Type::RBHex
+            | Type::WkDay
+            | Type::Month
+            | Type::A
+            | Type::AHex => return None,
+            Type::Date => ("dd-mmm-yy", "dd-mmm-yyyy"),
+            Type::ADate => ("mm/dd/yy", "mm/dd/yyyy"),
+            Type::EDate => ("dd.mm.yy", "dd.mm.yyyy"),
+            Type::JDate => ("yyddd", "yyyyddd"),
+            Type::SDate => ("yy/mm/dd", "yyyy/mm/dd"),
+            Type::QYr => ("q Q yy", "q Q yyyy"),
+            Type::MoYr => ("mmm yy", "mmm yyyy"),
+            Type::WkYr => ("ww WK yy", "ww WK yyyy"),
+            Type::DateTime => ("dd-mmm-yyyy HH:MM", "dd-mmm-yyyy HH:MM:SS"),
+            Type::YmdHms => ("yyyy-mm-dd HH:MM", "yyyy-mm-dd HH:MM:SS"),
+            Type::MTime => ("MM", "MM:SS"),
+            Type::Time => ("HH:MM", "HH:MM:SS"),
+            Type::DTime => ("D HH:MM", "D HH:MM:SS"),
+        };
+        if width >= long.len() {
+            Some(DateTemplate(long))
+        } else {
+            Some(DateTemplate(short))
+        }
+    }
+
+    pub fn for_format(format: Format) -> Option<Self> {
+        Self::new(format.type_(), format.w())
+    }
+
+    #[allow(clippy::len_without_is_empty)]
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+}
+
+impl Iterator for DateTemplate {
+    type Item = TemplateItem;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        let mut iter = self.0.chars();
+        let c = iter.next()?;
+        self.0 = iter.as_str();
+        let mut n = 1;
+        while iter.next() == Some(c) {
+            self.0 = iter.as_str();
+            n += 1;
+        }
+        Some(TemplateItem { c, n })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::format::{Format, Type, Width};
+
+    #[test]
+    fn codepage_to_unicode() {
+        fn check_format(input: Format, expected_width: Width) {
+            let mut output = input;
+            output.codepage_to_unicode();
+            let expected = Format::new(input.type_, expected_width, input.d).unwrap();
+            assert_eq!(output, expected);
+        }
+        check_format(Format::new(Type::A, 1, 0).unwrap(), 3);
+        check_format(Format::new(Type::A, 2, 0).unwrap(), 6);
+        check_format(Format::new(Type::A, 3, 0).unwrap(), 9);
+        check_format(Format::new(Type::A, 1000, 0).unwrap(), 3000);
+        check_format(Format::new(Type::A, 20000, 0).unwrap(), 32767);
+
+        check_format(Format::new(Type::AHex, 2, 0).unwrap(), 6);
+        check_format(Format::new(Type::AHex, 4, 0).unwrap(), 12);
+        check_format(Format::new(Type::AHex, 6, 0).unwrap(), 18);
+        check_format(Format::new(Type::AHex, 2000, 0).unwrap(), 6000);
+        check_format(Format::new(Type::AHex, 20000, 0).unwrap(), 60000);
+        check_format(Format::new(Type::AHex, 30000, 0).unwrap(), 65534);
+
+        check_format(Format::new(Type::F, 40, 0).unwrap(), 40);
+    }
+}
diff --git a/rust/pspp/src/format/display.rs b/rust/pspp/src/format/display.rs

new file mode 100644 (file)

index 0000000..5b3bbe2
--- /dev/null
+++ b/rust/pspp/src/format/display.rs
@@ -0,0 +1,1197 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+use std::{
+    cmp::min,
+    fmt::{Display, Error as FmtError, Formatter, Result as FmtResult, Write as _},
+    io::{Error as IoError, Write as IoWrite},
+    str::from_utf8_unchecked,
+};
+
+use binrw::Endian;
+use chrono::{Datelike, NaiveDate};
+use encoding_rs::{Encoding, UTF_8};
+use libm::frexp;
+use smallstr::SmallString;
+use smallvec::{Array, SmallVec};
+
+use crate::{
+    calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name},
+    data::{ByteStr, Datum, EncodedString, QuotedDatum, WithEncoding},
+    endian::ToBytes,
+    format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type},
+    settings::{EndianSettings, Settings as PsppSettings},
+    util::ToSmallString,
+};
+
+pub struct DisplayDatum<'b, B> {
+    format: Format,
+    settings: &'b Settings,
+    endian: EndianSettings,
+    datum: Datum<B>,
+
+    /// If true, the output will remove leading and trailing spaces from numeric
+    /// values, and trailing spaces from string values.  (This might make the
+    /// output narrower than the requested width.)
+    trim_spaces: bool,
+
+    /// If true, the output will include a double quote before and after string
+    /// values.
+    quote_strings: bool,
+}
+
+#[cfg(test)]
+mod test;
+
+pub trait DisplayPlain {
+    fn display_plain(&self) -> DisplayPlainF64;
+}
+
+impl DisplayPlain for f64 {
+    fn display_plain(&self) -> DisplayPlainF64 {
+        DisplayPlainF64 {
+            value: *self,
+            decimal: '.',
+        }
+    }
+}
+
+pub struct DisplayPlainF64 {
+    pub value: f64,
+    pub decimal: char,
+}
+
+impl DisplayPlainF64 {
+    pub fn with_decimal(self, decimal: char) -> Self {
+        Self { decimal, ..self }
+    }
+}
+
+impl Display for DisplayPlainF64 {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        struct Inner(f64);
+
+        impl Display for Inner {
+            fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+                let value = self.0;
+                if (value.abs() < 0.0005 && value != 0.0) || value.abs() > 1e15 {
+                    // Print 0s that would otherwise have lots of leading or
+                    // trailing zeros in scientific notation with full precision.
+                    write!(f, "{value:.e}")
+                } else if value == value.trunc() {
+                    // Print integers without decimal places.
+                    write!(f, "{value:.0}")
+                } else {
+                    // Print other numbers with full precision.
+                    write!(f, "{value:.}")
+                }
+            }
+        }
+
+        match self.decimal {
+            '.' => write!(f, "{}", Inner(self.value)),
+            _ => {
+                let tmp = Inner(self.value).to_small_string::<64>();
+                if let Some(position) = tmp.find('.') {
+                    f.write_str(&tmp[..position])?;
+                    f.write_char(self.decimal)?;
+                    f.write_str(&tmp[position + 1..])
+                } else {
+                    f.write_str(&tmp)
+                }
+            }
+        }
+    }
+}
+
+impl<'a, D> Datum<D>
+where
+    D: EncodedString,
+{
+    /// Returns an object that implements [Display] for printing this [Datum] as
+    /// `format`.
+    ///
+    /// [Display]: std::fmt::Display
+    pub fn display(&'a self, format: Format) -> DisplayDatum<'a, WithEncoding<&'a ByteStr>> {
+        DisplayDatum::new(format, self.as_borrowed())
+    }
+
+    pub fn display_plain(&self) -> QuotedDatum<'_, D> {
+        self.quoted()
+    }
+}
+
+impl<'b, B> Display for DisplayDatum<'b, B>
+where
+    B: EncodedString,
+{
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        let number = match &self.datum {
+            Datum::Number(number) => *number,
+            Datum::String(string) => {
+                if self.format.type_() == Type::AHex {
+                    for byte in string.raw_string_bytes() {
+                        write!(f, "{byte:02x}")?;
+                    }
+                } else {
+                    let quote = if self.quote_strings { "\"" } else { "" };
+                    let s = string.as_str();
+                    let s = if self.trim_spaces {
+                        s.trim_end_matches(' ')
+                    } else {
+                        &s
+                    };
+                    write!(f, "{quote}{s}{quote}")?;
+                }
+                return Ok(());
+            }
+        };
+
+        let Some(number) = number else {
+            return self.missing(f);
+        };
+
+        match self.format.type_() {
+            Type::F
+            | Type::Comma
+            | Type::Dot
+            | Type::Dollar
+            | Type::Pct
+            | Type::E
+            | Type::CC(_) => self.number(f, number),
+            Type::N => self.n(f, number),
+            Type::Z => self.z(f, number),
+
+            Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => self.fmt_binary(f),
+
+            Type::PIBHex => self.pibhex(f, number),
+            Type::RBHex => self.rbhex(f, number),
+            Type::Date
+            | Type::ADate
+            | Type::EDate
+            | Type::JDate
+            | Type::SDate
+            | Type::QYr
+            | Type::MoYr
+            | Type::WkYr
+            | Type::DateTime
+            | Type::YmdHms
+            | Type::MTime
+            | Type::Time
+            | Type::DTime
+            | Type::WkDay => self.date(f, number),
+            Type::Month => self.month(f, number),
+            Type::A | Type::AHex => unreachable!(),
+        }
+    }
+}
+
+impl<'b, B> DisplayDatum<'b, B>
+where
+    B: EncodedString,
+{
+    pub fn new(format: Format, datum: Datum<B>) -> Self {
+        let settings = PsppSettings::global();
+        Self {
+            format,
+            datum,
+            settings: &settings.formats,
+            endian: settings.endian,
+            trim_spaces: false,
+            quote_strings: false,
+        }
+    }
+    pub fn with_settings(self, settings: &'b Settings) -> Self {
+        Self { settings, ..self }
+    }
+    pub fn with_endian(self, endian: EndianSettings) -> Self {
+        Self { endian, ..self }
+    }
+    pub fn with_trimming(self) -> Self {
+        Self {
+            trim_spaces: true,
+            ..self
+        }
+    }
+    pub fn with_quoted_string(self) -> Self {
+        Self {
+            quote_strings: true,
+            ..self
+        }
+    }
+    fn fmt_binary(&self, f: &mut Formatter) -> FmtResult {
+        let output = self.to_binary().unwrap();
+        for b in output {
+            f.write_char(b as char)?;
+        }
+        Ok(())
+    }
+    fn number(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        if number.is_finite() {
+            let style = self.settings.number_style(self.format.type_);
+            if self.format.type_ != Type::E && number.abs() < 1.5 * power10(self.format.w()) {
+                let rounder = Rounder::new(style, number, self.format.d);
+                if self.decimal(f, &rounder, style, true)?
+                    || self.scientific(f, number, style, true)?
+                    || self.decimal(f, &rounder, style, false)?
+                {
+                    return Ok(());
+                }
+            }
+
+            if !self.scientific(f, number, style, false)? {
+                self.overflow(f)?;
+            }
+            Ok(())
+        } else {
+            self.infinite(f, number)
+        }
+    }
+
+    fn infinite(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        if self.format.w >= 3 {
+            let s = if number.is_nan() {
+                "NaN"
+            } else if number.is_infinite() {
+                if number.is_sign_positive() {
+                    "+Infinity"
+                } else {
+                    "-Infinity"
+                }
+            } else {
+                "Unknown"
+            };
+            let w = if self.trim_spaces { 0 } else { self.format.w() };
+            write!(f, "{s:>w$.w$}")
+        } else {
+            self.overflow(f)
+        }
+    }
+
+    fn missing(&self, f: &mut Formatter<'_>) -> FmtResult {
+        match self.format.type_ {
+            Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => return self.fmt_binary(f),
+            Type::RBHex => return self.rbhex(f, -f64::MAX),
+            _ => (),
+        }
+
+        if self.trim_spaces {
+            return write!(f, ".");
+        }
+
+        let w = self.format.w() as isize;
+        let d = self.format.d() as isize;
+        let dot_position = match self.format.type_ {
+            Type::N => w - 1,
+            Type::Pct => w - d - 2,
+            Type::E => w - d - 5,
+            _ => w - d - 1,
+        };
+        let dot_position = dot_position.max(0) as u16;
+
+        for i in 0..self.format.w {
+            if i == dot_position {
+                write!(f, ".")?;
+            } else {
+                write!(f, " ")?;
+            }
+        }
+        Ok(())
+    }
+
+    fn overflow(&self, f: &mut Formatter<'_>) -> FmtResult {
+        if self.trim_spaces {
+            write!(f, "*")?;
+        } else {
+            for _ in 0..self.format.w {
+                write!(f, "*")?;
+            }
+        }
+        Ok(())
+    }
+
+    fn decimal(
+        &self,
+        f: &mut Formatter<'_>,
+        rounder: &Rounder,
+        style: &NumberStyle,
+        require_affixes: bool,
+    ) -> Result<bool, FmtError> {
+        for decimals in (0..=self.format.d).rev() {
+            // Make sure there's room for the number's magnitude, plus the
+            // negative suffix, plus (if negative) the negative prefix.
+            let RounderWidth {
+                mut width,
+                integer_digits,
+                negative,
+            } = rounder.width(decimals as usize);
+            width += style.neg_suffix.width;
+            if negative {
+                width += style.neg_prefix.width;
+            }
+            if width > self.format.w() {
+                continue;
+            }
+
+            // If there's room for the prefix and suffix, allocate
+            // space.  If the affixes are required, but there's no
+            // space, give up.
+            let add_affixes = allocate_space(style.affix_width(), self.format.w(), &mut width);
+            if !add_affixes && require_affixes {
+                continue;
+            }
+
+            // Check whether we should include grouping characters.  We need
+            // room for a complete set or we don't insert any at all.  We don't
+            // include grouping characters if decimal places were requested but
+            // they were all dropped.
+            let grouping = style.grouping.filter(|_| {
+                integer_digits > 3
+                    && (self.format.d == 0 || decimals > 0)
+                    && allocate_space((integer_digits - 1) / 3, self.format.w(), &mut width)
+            });
+
+            // Assemble number.
+            let magnitude = rounder.format(decimals as usize);
+            let mut output = SmallString::<[u8; 40]>::new();
+            if !self.trim_spaces {
+                for _ in width..self.format.w() {
+                    output.push(' ');
+                }
+            }
+            if negative {
+                output.push_str(&style.neg_prefix.s);
+            }
+            if add_affixes {
+                output.push_str(&style.prefix.s);
+            }
+            if let Some(grouping) = grouping {
+                for (i, digit) in magnitude[..integer_digits].bytes().enumerate() {
+                    if i > 0 && (integer_digits - i) % 3 == 0 {
+                        output.push(grouping.into());
+                    }
+                    output.push(digit as char);
+                }
+            } else {
+                output.push_str(&magnitude[..integer_digits]);
+            }
+            if decimals > 0 {
+                output.push(style.decimal.into());
+                let s = &magnitude[integer_digits + 1..];
+                output.push_str(&s[..decimals as usize]);
+            }
+            if add_affixes {
+                output.push_str(&style.suffix.s);
+            }
+            if negative {
+                output.push_str(&style.neg_suffix.s);
+            } else {
+                for _ in 0..style.neg_suffix.width {
+                    output.push(' ');
+                }
+            }
+
+            debug_assert!(self.trim_spaces || output.len() >= self.format.w());
+            debug_assert!(output.len() <= self.format.w() + style.extra_bytes);
+            f.write_str(&output)?;
+            return Ok(true);
+        }
+        Ok(false)
+    }
+
+    fn scientific(
+        &self,
+        f: &mut Formatter<'_>,
+        number: f64,
+        style: &NumberStyle,
+        require_affixes: bool,
+    ) -> Result<bool, FmtError> {
+        // Allocate minimum required space.
+        let mut width = 6 + style.neg_suffix.width;
+        if number < 0.0 {
+            width += style.neg_prefix.width;
+        }
+        if width > self.format.w() {
+            return Ok(false);
+        }
+
+        // Check for room for prefix and suffix.
+        let add_affixes = allocate_space(style.affix_width(), self.format.w(), &mut width);
+        if require_affixes && !add_affixes {
+            return Ok(false);
+        }
+
+        // Figure out number of characters we can use for the fraction, if any.
+        // (If that turns out to be `1`, then we'll output a decimal point
+        // without any digits following.)
+        let mut fraction_width = min(self.format.d as usize + 1, self.format.w() - width).min(16);
+        if self.format.type_ != Type::E && fraction_width == 1 {
+            fraction_width = 0;
+        }
+        width += fraction_width;
+
+        let mut output = SmallString::<[u8; 40]>::new();
+        if !self.trim_spaces {
+            for _ in width..self.format.w() {
+                output.push(' ');
+            }
+        }
+        if number < 0.0 {
+            output.push_str(&style.neg_prefix.s);
+        }
+        if add_affixes {
+            output.push_str(&style.prefix.s);
+        }
+        write!(
+            &mut output,
+            "{:.*E}",
+            fraction_width.saturating_sub(1),
+            number.abs()
+        )
+        .unwrap();
+        if fraction_width == 1 {
+            // Insert `.` before the `E`, to get a value like "1.E+000".
+            output.insert(output.find('E').unwrap(), '.');
+        }
+
+        // Rust always uses `.` as the decimal point. Translate to `,` if
+        // necessary.
+        if style.decimal == Decimal::Comma {
+            fix_decimal_point(&mut output);
+        }
+
+        // Make exponent have exactly three digits, plus sign.
+        let e = output.as_bytes().iter().position(|c| *c == b'E').unwrap();
+        let exponent: isize = output[e + 1..].parse().unwrap();
+        if exponent.abs() > 999 {
+            return Ok(false);
+        }
+        output.truncate(e + 1);
+        write!(&mut output, "{exponent:+04}").unwrap();
+
+        // Add suffixes.
+        if add_affixes {
+            output.push_str(&style.suffix.s);
+        }
+        if number.is_sign_negative() {
+            output.push_str(&style.neg_suffix.s);
+        } else {
+            for _ in 0..style.neg_suffix.width {
+                output.push(' ');
+            }
+        }
+
+        println!(
+            "{} for {number} width={width} fraction_width={fraction_width}: {output:?}",
+            self.format
+        );
+        debug_assert!(self.trim_spaces || output.len() >= self.format.w());
+        debug_assert!(output.len() <= self.format.w() + style.extra_bytes);
+        f.write_str(&output)?;
+        Ok(true)
+    }
+
+    fn n(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        if number < 0.0 {
+            return self.missing(f);
+        }
+
+        let legacy = LegacyFormat::new(number, self.format.d());
+        let w = self.format.w();
+        let len = legacy.len();
+        if len > w {
+            self.overflow(f)
+        } else {
+            write!(f, "{}{legacy}", Zeros(w.saturating_sub(len)))
+        }
+    }
+
+    fn z(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        let legacy = LegacyFormat::new(number, self.format.d());
+        let w = self.format.w();
+        let len = legacy.len();
+        if len > w {
+            self.overflow(f)
+        } else {
+            let mut s = legacy.to_small_string::<40>();
+            if number < 0.0 {
+                if let Some(last) = s.pop() {
+                    let last = last.to_digit(10).unwrap();
+                    s.push(b"}JKLMNOPQR"[last as usize] as char);
+                }
+            }
+            write!(f, "{}{s}", Zeros(w.saturating_sub(len)))
+        }
+    }
+
+    fn pibhex(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        if number < 0.0 {
+            self.overflow(f)
+        } else {
+            let number = number.round();
+            if number >= power256(self.format.w / 2) {
+                self.overflow(f)
+            } else {
+                let binary = integer_to_binary(number as u64, self.format.w / 2);
+                output_hex(f, &binary)
+            }
+        }
+    }
+
+    fn rbhex(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        let rb = self.rb(Some(number), self.format.w() / 2);
+        output_hex(f, &rb)
+    }
+
+    fn date(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        const MINUTE: f64 = 60.0;
+        const HOUR: f64 = 60.0 * 60.0;
+        const DAY: f64 = 60.0 * 60.0 * 24.0;
+
+        let (date, mut time) = match self.format.type_.category() {
+            Category::Date => {
+                if number < 0.0 {
+                    return self.missing(f);
+                }
+                let Some(date) = calendar_offset_to_gregorian(number / DAY) else {
+                    return self.missing(f);
+                };
+                (date, number % DAY)
+            }
+            Category::Time => (NaiveDate::MIN, number),
+            _ => unreachable!(),
+        };
+
+        let mut output = SmallString::<[u8; 40]>::new();
+        for TemplateItem { c, n } in DateTemplate::for_format(self.format).unwrap() {
+            match c {
+                'd' if n < 3 => write!(&mut output, "{:02}", date.day()).unwrap(),
+                'd' => write!(&mut output, "{:03}", day_of_year(date).unwrap_or(1)).unwrap(),
+                'm' if n < 3 => write!(&mut output, "{:02}", date.month()).unwrap(),
+                'm' => write!(&mut output, "{}", short_month_name(date.month()).unwrap()).unwrap(),
+                'y' if n >= 4 => {
+                    let year = date.year();
+                    if year <= 9999 {
+                        write!(&mut output, "{year:04}").unwrap();
+                    } else if self.format.type_ == Type::DateTime
+                        || self.format.type_ == Type::YmdHms
+                    {
+                        write!(&mut output, "****").unwrap();
+                    } else {
+                        return self.overflow(f);
+                    }
+                }
+                'y' => {
+                    let epoch = self.settings.epoch.0;
+                    let offset = date.year() - epoch;
+                    if !(0..=99).contains(&offset) {
+                        return self.overflow(f);
+                    }
+                    write!(&mut output, "{:02}", date.year().abs() % 100).unwrap();
+                }
+                'q' => write!(&mut output, "{}", date.month0() / 3 + 1).unwrap(),
+                'w' => write!(
+                    &mut output,
+                    "{:2}",
+                    (day_of_year(date).unwrap_or(1) - 1) / 7 + 1
+                )
+                .unwrap(),
+                'D' => {
+                    if time < 0.0 {
+                        output.push('-');
+                    }
+                    time = time.abs();
+                    write!(&mut output, "{:1$.0}", (time / DAY).floor(), n).unwrap();
+                    time %= DAY;
+                }
+                'H' => {
+                    if time < 0.0 {
+                        output.push('-');
+                    }
+                    time = time.abs();
+                    write!(&mut output, "{:01$.0}", (time / HOUR).floor(), n).unwrap();
+                    time %= HOUR;
+                }
+                'M' => {
+                    if time < 0.0 {
+                        output.push('-');
+                    }
+                    time = time.abs();
+                    write!(&mut output, "{:02.0}", (time / MINUTE).floor()).unwrap();
+                    time %= MINUTE;
+
+                    let excess_width = self.format.w() as isize - output.len() as isize;
+                    if excess_width < 0 || (self.format.type_ == Type::MTime && excess_width < 3) {
+                        return self.overflow(f);
+                    }
+                    if excess_width == 3
+                        || excess_width == 4
+                        || (excess_width >= 5 && self.format.d == 0)
+                    {
+                        write!(&mut output, ":{:02.0}", time.floor()).unwrap();
+                    } else if excess_width >= 5 {
+                        let d = min(self.format.d(), excess_width as usize - 4);
+                        let w = d + 3;
+                        write!(&mut output, ":{time:0w$.d$}").unwrap();
+                        if self.settings.decimal == Decimal::Comma {
+                            fix_decimal_point(&mut output);
+                        }
+                    }
+                    break;
+                }
+                c if n == 1 => output.push(c),
+                _ => unreachable!(),
+            }
+        }
+        if !self.trim_spaces {
+            write!(f, "{:>1$}", &output, self.format.w())
+        } else {
+            f.write_str(&output)
+        }
+    }
+
+    fn month(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
+        if let Some(month) = month_name(number as u32) {
+            if !self.trim_spaces {
+                write!(f, "{month:.*}", self.format.w())
+            } else {
+                f.write_str(month)
+            }
+        } else {
+            self.missing(f)
+        }
+    }
+
+    /// Writes this object to `w`. Writes binary formats ([Type::P],
+    /// [Type::PIB], and so on) as binary values, and writes other output
+    /// formats in the given `encoding`.
+    ///
+    /// If `dv` is a [DisplayDatum], the difference between `write!(f, "{}",
+    /// dv)` and `dv.write(f, encoding)` is:
+    ///
+    /// * `write!` always outputs UTF-8. Binary formats are encoded as the
+    ///   Unicode characters corresponding to their bytes.
+    ///
+    /// * `dv.write` outputs the desired `encoding`. Binary formats are not
+    ///   encoded in `encoding` (and thus they might be invalid for the
+    ///   encoding).
+    pub fn write<W>(&self, mut w: W, encoding: &'static Encoding) -> Result<(), IoError>
+    where
+        W: IoWrite,
+    {
+        match self.to_binary() {
+            Some(binary) => w.write_all(&binary),
+            None if encoding == UTF_8 => {
+                write!(&mut w, "{self}")
+            }
+            None => w.write_all(&encoding.encode(&self.to_small_string::<64>()).0),
+        }
+    }
+
+    fn to_binary(&self) -> Option<SmallVec<[u8; 16]>> {
+        let number = self.datum.as_number()?;
+        match self.format.type_() {
+            Type::P => Some(self.p(number)),
+            Type::PK => Some(self.pk(number)),
+            Type::IB => Some(self.ib(number)),
+            Type::PIB => Some(self.pib(number)),
+            Type::RB => Some(self.rb(number, self.format.w())),
+            _ => None,
+        }
+    }
+
+    fn bcd(&self, number: Option<f64>, digits: usize) -> (bool, SmallVec<[u8; 16]>) {
+        let legacy = LegacyFormat::new(number.unwrap_or_default(), self.format.d());
+        let len = legacy.len();
+
+        let mut output = SmallVec::new();
+        if len > digits {
+            output.resize(digits.div_ceil(2), 0);
+            (false, output)
+        } else {
+            let mut decimal = SmallString::<[u8; 16]>::new();
+            write!(
+                &mut decimal,
+                "{}{legacy}",
+                Zeros(digits.saturating_sub(len))
+            )
+            .unwrap();
+
+            let mut src = decimal.bytes();
+            for _ in 0..digits / 2 {
+                let d0 = src.next().unwrap() - b'0';
+                let d1 = src.next().unwrap() - b'0';
+                output.push((d0 << 4) + d1);
+            }
+            if digits % 2 != 0 {
+                let d = src.next().unwrap() - b'0';
+                output.push(d << 4);
+            }
+            (true, output)
+        }
+    }
+
+    fn p(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
+        let (valid, mut output) = self.bcd(number, self.format.w() * 2 - 1);
+        if valid && number.is_some_and(|number| number < 0.0) {
+            *output.last_mut().unwrap() |= 0xd;
+        } else {
+            *output.last_mut().unwrap() |= 0xf;
+        }
+        output
+    }
+
+    fn pk(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
+        let number = match number {
+            Some(number) if number < 0.0 => None,
+            other => other,
+        };
+        let (_valid, output) = self.bcd(number, self.format.w() * 2);
+        output
+    }
+
+    fn ib(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
+        let number = number.map_or(0.0, |number| (number * power10(self.format.d())).round());
+        let number = if number >= power256(self.format.w) / 2.0 - 1.0
+            || number < -power256(self.format.w) / 2.0
+        {
+            0.0
+        } else {
+            number
+        };
+        let integer = number.abs() as u64;
+        let integer = if number < 0.0 {
+            (-(integer as i64)) as u64
+        } else {
+            integer
+        };
+        endian_to_smallvec(self.endian.output, integer, self.format.w())
+    }
+
+    fn pib(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
+        let number = number.map_or(0.0, |number| (number * power10(self.format.d())).round());
+        let number = if number >= power256(self.format.w) || number < 0.0 {
+            0.0
+        } else {
+            number
+        };
+        let integer = number.abs() as u64;
+        endian_to_smallvec(self.endian.output, integer, self.format.w())
+    }
+
+    fn rb(&self, number: Option<f64>, w: usize) -> SmallVec<[u8; 16]> {
+        let number = number.unwrap_or(-f64::MAX);
+        let bytes: [u8; 8] = self.endian.output.to_bytes(number);
+        let mut vec = SmallVec::new();
+        vec.extend_from_slice(&bytes);
+        vec.resize(w, 0);
+        vec
+    }
+}
+
+struct LegacyFormat {
+    s: SmallVec<[u8; 40]>,
+    trailing_zeros: usize,
+}
+
+impl LegacyFormat {
+    fn new(number: f64, d: usize) -> Self {
+        let mut s = SmallVec::<[u8; 40]>::new();
+        write!(&mut s, "{:E}", number.abs()).unwrap();
+        debug_assert!(s.is_ascii());
+
+        // Parse exponent.
+        //
+        // Add 1 because of the transformation we will do just below, and `d` so
+        // that we just need to round to the nearest integer.
+        let e_index = s.iter().position(|c| *c == b'E').unwrap();
+        let mut exponent = unsafe { from_utf8_unchecked(&s[e_index + 1..]) }
+            .parse::<i32>()
+            .unwrap()
+            + 1
+            + d as i32;
+
+        // Transform `1.234E56` into `1234`.
+        if e_index == 1 {
+            // No decimals, e.g. `1E4` or `0E0`.
+            s.truncate(1)
+        } else {
+            s.remove(1);
+            s.truncate(e_index - 1);
+        };
+        debug_assert!(s.iter().all(|c| c.is_ascii_digit()));
+
+        if exponent >= 0 && exponent < s.len() as i32 {
+            // The first `exponent` digits are before the decimal point.  We
+            // need to round off there.
+            let exp = exponent as usize;
+
+            fn round_up(digits: &mut [u8], position: usize) -> bool {
+                for index in (0..position).rev() {
+                    match digits[index] {
+                        b'0'..=b'8' => {
+                            digits[index] += 1;
+                            return true;
+                        }
+                        b'9' => {
+                            digits[index] = b'0';
+                        }
+                        _ => unreachable!(),
+                    }
+                }
+                false
+            }
+
+            if s[exp] >= b'5' && !round_up(&mut s, exp) {
+                s.clear();
+                s.push(b'1');
+                exponent += 1;
+            }
+        }
+
+        let exponent = exponent.max(0) as usize;
+        s.truncate(exponent);
+        s.resize(exponent, b'0');
+        let trailing_zeros = exponent.saturating_sub(s.len());
+        Self { s, trailing_zeros }
+    }
+    fn s(&self) -> &str {
+        unsafe { from_utf8_unchecked(&self.s) }
+    }
+    fn len(&self) -> usize {
+        self.s.len() + self.trailing_zeros
+    }
+}
+
+impl Display for LegacyFormat {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        write!(f, "{}{}", self.s(), Zeros(self.trailing_zeros))
+    }
+}
+
+struct Zeros(usize);
+
+impl Display for Zeros {
+    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
+        let mut n = self.0;
+        while n > 0 {
+            static ZEROS: &str = "0000000000000000000000000000000000000000";
+            let chunk = n.min(ZEROS.len());
+            f.write_str(&ZEROS[..chunk])?;
+            n -= chunk;
+        }
+        Ok(())
+    }
+}
+
+fn integer_to_binary(number: u64, width: u16) -> SmallVec<[u8; 8]> {
+    let bytes = (number << ((8 - width) * 8)).to_be_bytes();
+    SmallVec::from_slice(&bytes[..width as usize])
+}
+
+fn output_hex(f: &mut Formatter<'_>, bytes: &[u8]) -> FmtResult {
+    for byte in bytes {
+        write!(f, "{byte:02X}")?;
+    }
+    Ok(())
+}
+
+fn allocate_space(want: usize, capacity: usize, used: &mut usize) -> bool {
+    if *used + want <= capacity {
+        *used += want;
+        true
+    } else {
+        false
+    }
+}
+
+/// A representation of a number that can be quickly rounded to any desired
+/// number of decimal places (up to a specified maximum).
+#[derive(Debug)]
+struct Rounder {
+    /// Magnitude of number with excess precision.
+    string: SmallString<[u8; 40]>,
+
+    /// Number of digits before decimal point.
+    integer_digits: usize,
+
+    /// Number of `9`s or `.`s at start of string.
+    leading_nines: usize,
+
+    /// Number of `0`s or `.`s at start of string.
+    leading_zeros: usize,
+
+    /// Is the number negative?
+    negative: bool,
+}
+
+impl Rounder {
+    fn new(style: &NumberStyle, number: f64, max_decimals: u8) -> Self {
+        debug_assert!(number.abs() < 1e41);
+        debug_assert!((0..=16).contains(&max_decimals));
+
+        let mut string = SmallString::new();
+        if max_decimals == 0 {
+            // Fast path.  No rounding needed.
+            //
+            // We append `.00` to the integer representation because
+            // [Self::round_up] assumes that fractional digits are present.
+            write!(&mut string, "{:.0}.00", number.round().abs()).unwrap()
+        } else {
+            // Slow path.
+            //
+            // This is more difficult than it really should be because we have
+            // to make sure that numbers that are exactly halfway between two
+            // representations are always rounded away from zero.  This is not
+            // what format! normally does (usually it rounds to even), so we
+            // have to fake it as best we can, by formatting with extra
+            // precision and then doing the rounding ourselves.
+            //
+            // We take up to two rounds to format numbers.  In the first round,
+            // we obtain 2 digits of precision beyond those requested by the
+            // user.  If those digits are exactly "50", then in a second round
+            // we format with as many digits as are significant in a "double".
+            //
+            // It might be better to directly implement our own floating-point
+            // formatting routine instead of relying on the system's sprintf
+            // implementation.  But the classic Steele and White paper on
+            // printing floating-point numbers does not hint how to do what we
+            // want, and it's not obvious how to change their algorithms to do
+            // so.  It would also be a lot of work.
+            write!(
+                &mut string,
+                "{:.*}",
+                max_decimals as usize + 2,
+                number.abs()
+            )
+            .unwrap();
+            if string.ends_with("50") {
+                let (_sig, binary_exponent) = frexp(number);
+                let decimal_exponent = binary_exponent * 3 / 10;
+                let format_decimals = (f64::DIGITS as i32 + 1) - decimal_exponent;
+                if format_decimals > max_decimals as i32 + 2 {
+                    string.clear();
+                    write!(&mut string, "{:.*}", format_decimals as usize, number.abs()).unwrap();
+                }
+            }
+        };
+
+        if !style.leading_zero && string.starts_with("0") {
+            string.remove(0);
+        }
+        let leading_zeros = string
+            .bytes()
+            .take_while(|c| *c == b'0' || *c == b'.')
+            .count();
+        let leading_nines = string
+            .bytes()
+            .take_while(|c| *c == b'9' || *c == b'.')
+            .count();
+        let integer_digits = string.bytes().take_while(u8::is_ascii_digit).count();
+        let negative = number.is_sign_negative();
+        Self {
+            string,
+            integer_digits,
+            leading_nines,
+            leading_zeros,
+            negative,
+        }
+    }
+
+    /// Returns a [RounderWdith] for formatting the magnitude to `decimals`
+    /// decimal places. `decimals` must be in `0..=16`.
+    fn width(&self, decimals: usize) -> RounderWidth {
+        // Calculate base measures.
+        let mut width = self.integer_digits;
+        if decimals > 0 {
+            width += decimals + 1;
+        }
+        let mut integer_digits = self.integer_digits;
+        let mut negative = self.negative;
+
+        // Rounding can cause adjustments.
+        if self.should_round_up(decimals) {
+            // Rounding up leading `9s` adds a new digit (a `1`).
+            if self.leading_nines >= width {
+                width += 1;
+                integer_digits += 1;
+            }
+        } else {
+            // Rounding down.
+            if self.leading_zeros >= width {
+                // All digits that remain after rounding are zeros.  Therefore
+                // we drop the negative sign.
+                negative = false;
+                if self.integer_digits == 0 && decimals == 0 {
+                    // No digits at all are left.  We need to display
+                    // at least a single digit (a zero).
+                    debug_assert_eq!(width, 0);
+                    width += 1;
+                    integer_digits = 1;
+                }
+            }
+        }
+        RounderWidth {
+            width,
+            integer_digits,
+            negative,
+        }
+    }
+
+    /// Returns true if the number should be rounded up when chopped off at
+    /// `decimals` decimal places, false if it should be rounded down.
+    fn should_round_up(&self, decimals: usize) -> bool {
+        let digit = self.string.as_bytes()[self.integer_digits + decimals + 1];
+        debug_assert!(digit.is_ascii_digit());
+        digit >= b'5'
+    }
+
+    /// Formats the number, rounding to `decimals` decimal places.  Exactly as
+    /// many characters as indicated by [Self::width(decimals)] are written.
+    fn format(&self, decimals: usize) -> SmallString<[u8; 40]> {
+        let mut output = SmallString::new();
+        let mut base_width = self.integer_digits;
+        if decimals > 0 {
+            base_width += decimals + 1;
+        }
+
+        if self.should_round_up(decimals) {
+            if self.leading_nines < base_width {
+                // Rounding up.  This is the common case where rounding up
+                // doesn't add an extra digit.
+                output.push_str(&self.string[..base_width]);
+
+                // SAFETY: This loop only changes ASCII characters to other
+                // ASCII characters.
+                unsafe {
+                    for c in output.as_bytes_mut().iter_mut().rev() {
+                        match *c {
+                            b'9' => *c = b'0',
+                            b'0'..=b'8' => {
+                                *c += 1;
+                                break;
+                            }
+                            b'.' => (),
+                            _ => unreachable!(),
+                        }
+                    }
+                }
+            } else {
+                // Rounding up leading 9s causes the result to be a 1 followed
+                // by a number of 0s, plus a decimal point.
+                output.push('1');
+                for _ in 0..self.integer_digits {
+                    output.push('0');
+                }
+                if decimals > 0 {
+                    output.push('.');
+                    for _ in 0..decimals {
+                        output.push('0');
+                    }
+                }
+                debug_assert_eq!(output.len(), base_width + 1);
+            }
+        } else {
+            // Rounding down.
+            if self.integer_digits != 0 || decimals != 0 {
+                // Common case: just copy the digits.
+                output.push_str(&self.string);
+            } else {
+                // No digits remain.  The output is just a zero.
+                output.push('0');
+            }
+        }
+        output
+    }
+}
+
+struct RounderWidth {
+    /// Number of characters required to format the number to a specified number
+    /// of decimal places.  This includes integer digits and a decimal point and
+    /// fractional digits, if any, but it does not include any negative prefix
+    /// or suffix or other affixes.
+    width: usize,
+
+    /// Number of digits before the decimal point, between 0 and 40.
+    integer_digits: usize,
+
+    /// True if the number is negative and its rounded representation would
+    /// include at least one nonzero digit.
+    negative: bool,
+}
+
+/// Returns `10^x`.
+fn power10(x: usize) -> f64 {
+    const POWERS: [f64; 41] = [
+        1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
+        1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
+        1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40,
+    ];
+    POWERS
+        .get(x)
+        .copied()
+        .unwrap_or_else(|| 10.0_f64.powi(x as i32))
+}
+
+/// Returns `256^x`.
+fn power256(x: u16) -> f64 {
+    const POWERS: [f64; 9] = [
+        1.0,
+        256.0,
+        65536.0,
+        16777216.0,
+        4294967296.0,
+        1099511627776.0,
+        281474976710656.0,
+        72057594037927936.0,
+        18446744073709551616.0,
+    ];
+    POWERS
+        .get(x as usize)
+        .copied()
+        .unwrap_or_else(|| 256.0_f64.powi(x as i32))
+}
+
+fn fix_decimal_point<A>(s: &mut SmallString<A>)
+where
+    A: Array<Item = u8>,
+{
+    // SAFETY: This only changes only one ASCII character (`.`) to
+    // another ASCII character (`,`).
+    unsafe {
+        if let Some(dot) = s.as_bytes_mut().iter_mut().find(|c| **c == b'.') {
+            *dot = b',';
+        }
+    }
+}
+
+pub fn endian_to_smallvec<const N: usize>(
+    endian: Endian,
+    mut value: u64,
+    n: usize,
+) -> SmallVec<[u8; N]> {
+    debug_assert!(n <= 8);
+    let mut vec = SmallVec::new();
+    value <<= 8 * (8 - n);
+    for _ in 0..n {
+        vec.push((value >> 56) as u8);
+        value <<= 8;
+    }
+    if endian == Endian::Little {
+        vec.reverse();
+    }
+    vec
+}
diff --git a/rust/pspp/src/format/display/mod.rs b/rust/pspp/src/format/display/mod.rs

deleted file mode 100644 (file)

index 5b3bbe2..0000000
--- a/rust/pspp/src/format/display/mod.rs
+++ /dev/null
@@ -1,1197 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-use std::{
-    cmp::min,
-    fmt::{Display, Error as FmtError, Formatter, Result as FmtResult, Write as _},
-    io::{Error as IoError, Write as IoWrite},
-    str::from_utf8_unchecked,
-};
-
-use binrw::Endian;
-use chrono::{Datelike, NaiveDate};
-use encoding_rs::{Encoding, UTF_8};
-use libm::frexp;
-use smallstr::SmallString;
-use smallvec::{Array, SmallVec};
-
-use crate::{
-    calendar::{calendar_offset_to_gregorian, day_of_year, month_name, short_month_name},
-    data::{ByteStr, Datum, EncodedString, QuotedDatum, WithEncoding},
-    endian::ToBytes,
-    format::{Category, DateTemplate, Decimal, Format, NumberStyle, Settings, TemplateItem, Type},
-    settings::{EndianSettings, Settings as PsppSettings},
-    util::ToSmallString,
-};
-
-pub struct DisplayDatum<'b, B> {
-    format: Format,
-    settings: &'b Settings,
-    endian: EndianSettings,
-    datum: Datum<B>,
-
-    /// If true, the output will remove leading and trailing spaces from numeric
-    /// values, and trailing spaces from string values.  (This might make the
-    /// output narrower than the requested width.)
-    trim_spaces: bool,
-
-    /// If true, the output will include a double quote before and after string
-    /// values.
-    quote_strings: bool,
-}
-
-#[cfg(test)]
-mod test;
-
-pub trait DisplayPlain {
-    fn display_plain(&self) -> DisplayPlainF64;
-}
-
-impl DisplayPlain for f64 {
-    fn display_plain(&self) -> DisplayPlainF64 {
-        DisplayPlainF64 {
-            value: *self,
-            decimal: '.',
-        }
-    }
-}
-
-pub struct DisplayPlainF64 {
-    pub value: f64,
-    pub decimal: char,
-}
-
-impl DisplayPlainF64 {
-    pub fn with_decimal(self, decimal: char) -> Self {
-        Self { decimal, ..self }
-    }
-}
-
-impl Display for DisplayPlainF64 {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        struct Inner(f64);
-
-        impl Display for Inner {
-            fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-                let value = self.0;
-                if (value.abs() < 0.0005 && value != 0.0) || value.abs() > 1e15 {
-                    // Print 0s that would otherwise have lots of leading or
-                    // trailing zeros in scientific notation with full precision.
-                    write!(f, "{value:.e}")
-                } else if value == value.trunc() {
-                    // Print integers without decimal places.
-                    write!(f, "{value:.0}")
-                } else {
-                    // Print other numbers with full precision.
-                    write!(f, "{value:.}")
-                }
-            }
-        }
-
-        match self.decimal {
-            '.' => write!(f, "{}", Inner(self.value)),
-            _ => {
-                let tmp = Inner(self.value).to_small_string::<64>();
-                if let Some(position) = tmp.find('.') {
-                    f.write_str(&tmp[..position])?;
-                    f.write_char(self.decimal)?;
-                    f.write_str(&tmp[position + 1..])
-                } else {
-                    f.write_str(&tmp)
-                }
-            }
-        }
-    }
-}
-
-impl<'a, D> Datum<D>
-where
-    D: EncodedString,
-{
-    /// Returns an object that implements [Display] for printing this [Datum] as
-    /// `format`.
-    ///
-    /// [Display]: std::fmt::Display
-    pub fn display(&'a self, format: Format) -> DisplayDatum<'a, WithEncoding<&'a ByteStr>> {
-        DisplayDatum::new(format, self.as_borrowed())
-    }
-
-    pub fn display_plain(&self) -> QuotedDatum<'_, D> {
-        self.quoted()
-    }
-}
-
-impl<'b, B> Display for DisplayDatum<'b, B>
-where
-    B: EncodedString,
-{
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        let number = match &self.datum {
-            Datum::Number(number) => *number,
-            Datum::String(string) => {
-                if self.format.type_() == Type::AHex {
-                    for byte in string.raw_string_bytes() {
-                        write!(f, "{byte:02x}")?;
-                    }
-                } else {
-                    let quote = if self.quote_strings { "\"" } else { "" };
-                    let s = string.as_str();
-                    let s = if self.trim_spaces {
-                        s.trim_end_matches(' ')
-                    } else {
-                        &s
-                    };
-                    write!(f, "{quote}{s}{quote}")?;
-                }
-                return Ok(());
-            }
-        };
-
-        let Some(number) = number else {
-            return self.missing(f);
-        };
-
-        match self.format.type_() {
-            Type::F
-            | Type::Comma
-            | Type::Dot
-            | Type::Dollar
-            | Type::Pct
-            | Type::E
-            | Type::CC(_) => self.number(f, number),
-            Type::N => self.n(f, number),
-            Type::Z => self.z(f, number),
-
-            Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => self.fmt_binary(f),
-
-            Type::PIBHex => self.pibhex(f, number),
-            Type::RBHex => self.rbhex(f, number),
-            Type::Date
-            | Type::ADate
-            | Type::EDate
-            | Type::JDate
-            | Type::SDate
-            | Type::QYr
-            | Type::MoYr
-            | Type::WkYr
-            | Type::DateTime
-            | Type::YmdHms
-            | Type::MTime
-            | Type::Time
-            | Type::DTime
-            | Type::WkDay => self.date(f, number),
-            Type::Month => self.month(f, number),
-            Type::A | Type::AHex => unreachable!(),
-        }
-    }
-}
-
-impl<'b, B> DisplayDatum<'b, B>
-where
-    B: EncodedString,
-{
-    pub fn new(format: Format, datum: Datum<B>) -> Self {
-        let settings = PsppSettings::global();
-        Self {
-            format,
-            datum,
-            settings: &settings.formats,
-            endian: settings.endian,
-            trim_spaces: false,
-            quote_strings: false,
-        }
-    }
-    pub fn with_settings(self, settings: &'b Settings) -> Self {
-        Self { settings, ..self }
-    }
-    pub fn with_endian(self, endian: EndianSettings) -> Self {
-        Self { endian, ..self }
-    }
-    pub fn with_trimming(self) -> Self {
-        Self {
-            trim_spaces: true,
-            ..self
-        }
-    }
-    pub fn with_quoted_string(self) -> Self {
-        Self {
-            quote_strings: true,
-            ..self
-        }
-    }
-    fn fmt_binary(&self, f: &mut Formatter) -> FmtResult {
-        let output = self.to_binary().unwrap();
-        for b in output {
-            f.write_char(b as char)?;
-        }
-        Ok(())
-    }
-    fn number(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        if number.is_finite() {
-            let style = self.settings.number_style(self.format.type_);
-            if self.format.type_ != Type::E && number.abs() < 1.5 * power10(self.format.w()) {
-                let rounder = Rounder::new(style, number, self.format.d);
-                if self.decimal(f, &rounder, style, true)?
-                    || self.scientific(f, number, style, true)?
-                    || self.decimal(f, &rounder, style, false)?
-                {
-                    return Ok(());
-                }
-            }
-
-            if !self.scientific(f, number, style, false)? {
-                self.overflow(f)?;
-            }
-            Ok(())
-        } else {
-            self.infinite(f, number)
-        }
-    }
-
-    fn infinite(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        if self.format.w >= 3 {
-            let s = if number.is_nan() {
-                "NaN"
-            } else if number.is_infinite() {
-                if number.is_sign_positive() {
-                    "+Infinity"
-                } else {
-                    "-Infinity"
-                }
-            } else {
-                "Unknown"
-            };
-            let w = if self.trim_spaces { 0 } else { self.format.w() };
-            write!(f, "{s:>w$.w$}")
-        } else {
-            self.overflow(f)
-        }
-    }
-
-    fn missing(&self, f: &mut Formatter<'_>) -> FmtResult {
-        match self.format.type_ {
-            Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => return self.fmt_binary(f),
-            Type::RBHex => return self.rbhex(f, -f64::MAX),
-            _ => (),
-        }
-
-        if self.trim_spaces {
-            return write!(f, ".");
-        }
-
-        let w = self.format.w() as isize;
-        let d = self.format.d() as isize;
-        let dot_position = match self.format.type_ {
-            Type::N => w - 1,
-            Type::Pct => w - d - 2,
-            Type::E => w - d - 5,
-            _ => w - d - 1,
-        };
-        let dot_position = dot_position.max(0) as u16;
-
-        for i in 0..self.format.w {
-            if i == dot_position {
-                write!(f, ".")?;
-            } else {
-                write!(f, " ")?;
-            }
-        }
-        Ok(())
-    }
-
-    fn overflow(&self, f: &mut Formatter<'_>) -> FmtResult {
-        if self.trim_spaces {
-            write!(f, "*")?;
-        } else {
-            for _ in 0..self.format.w {
-                write!(f, "*")?;
-            }
-        }
-        Ok(())
-    }
-
-    fn decimal(
-        &self,
-        f: &mut Formatter<'_>,
-        rounder: &Rounder,
-        style: &NumberStyle,
-        require_affixes: bool,
-    ) -> Result<bool, FmtError> {
-        for decimals in (0..=self.format.d).rev() {
-            // Make sure there's room for the number's magnitude, plus the
-            // negative suffix, plus (if negative) the negative prefix.
-            let RounderWidth {
-                mut width,
-                integer_digits,
-                negative,
-            } = rounder.width(decimals as usize);
-            width += style.neg_suffix.width;
-            if negative {
-                width += style.neg_prefix.width;
-            }
-            if width > self.format.w() {
-                continue;
-            }
-
-            // If there's room for the prefix and suffix, allocate
-            // space.  If the affixes are required, but there's no
-            // space, give up.
-            let add_affixes = allocate_space(style.affix_width(), self.format.w(), &mut width);
-            if !add_affixes && require_affixes {
-                continue;
-            }
-
-            // Check whether we should include grouping characters.  We need
-            // room for a complete set or we don't insert any at all.  We don't
-            // include grouping characters if decimal places were requested but
-            // they were all dropped.
-            let grouping = style.grouping.filter(|_| {
-                integer_digits > 3
-                    && (self.format.d == 0 || decimals > 0)
-                    && allocate_space((integer_digits - 1) / 3, self.format.w(), &mut width)
-            });
-
-            // Assemble number.
-            let magnitude = rounder.format(decimals as usize);
-            let mut output = SmallString::<[u8; 40]>::new();
-            if !self.trim_spaces {
-                for _ in width..self.format.w() {
-                    output.push(' ');
-                }
-            }
-            if negative {
-                output.push_str(&style.neg_prefix.s);
-            }
-            if add_affixes {
-                output.push_str(&style.prefix.s);
-            }
-            if let Some(grouping) = grouping {
-                for (i, digit) in magnitude[..integer_digits].bytes().enumerate() {
-                    if i > 0 && (integer_digits - i) % 3 == 0 {
-                        output.push(grouping.into());
-                    }
-                    output.push(digit as char);
-                }
-            } else {
-                output.push_str(&magnitude[..integer_digits]);
-            }
-            if decimals > 0 {
-                output.push(style.decimal.into());
-                let s = &magnitude[integer_digits + 1..];
-                output.push_str(&s[..decimals as usize]);
-            }
-            if add_affixes {
-                output.push_str(&style.suffix.s);
-            }
-            if negative {
-                output.push_str(&style.neg_suffix.s);
-            } else {
-                for _ in 0..style.neg_suffix.width {
-                    output.push(' ');
-                }
-            }
-
-            debug_assert!(self.trim_spaces || output.len() >= self.format.w());
-            debug_assert!(output.len() <= self.format.w() + style.extra_bytes);
-            f.write_str(&output)?;
-            return Ok(true);
-        }
-        Ok(false)
-    }
-
-    fn scientific(
-        &self,
-        f: &mut Formatter<'_>,
-        number: f64,
-        style: &NumberStyle,
-        require_affixes: bool,
-    ) -> Result<bool, FmtError> {
-        // Allocate minimum required space.
-        let mut width = 6 + style.neg_suffix.width;
-        if number < 0.0 {
-            width += style.neg_prefix.width;
-        }
-        if width > self.format.w() {
-            return Ok(false);
-        }
-
-        // Check for room for prefix and suffix.
-        let add_affixes = allocate_space(style.affix_width(), self.format.w(), &mut width);
-        if require_affixes && !add_affixes {
-            return Ok(false);
-        }
-
-        // Figure out number of characters we can use for the fraction, if any.
-        // (If that turns out to be `1`, then we'll output a decimal point
-        // without any digits following.)
-        let mut fraction_width = min(self.format.d as usize + 1, self.format.w() - width).min(16);
-        if self.format.type_ != Type::E && fraction_width == 1 {
-            fraction_width = 0;
-        }
-        width += fraction_width;
-
-        let mut output = SmallString::<[u8; 40]>::new();
-        if !self.trim_spaces {
-            for _ in width..self.format.w() {
-                output.push(' ');
-            }
-        }
-        if number < 0.0 {
-            output.push_str(&style.neg_prefix.s);
-        }
-        if add_affixes {
-            output.push_str(&style.prefix.s);
-        }
-        write!(
-            &mut output,
-            "{:.*E}",
-            fraction_width.saturating_sub(1),
-            number.abs()
-        )
-        .unwrap();
-        if fraction_width == 1 {
-            // Insert `.` before the `E`, to get a value like "1.E+000".
-            output.insert(output.find('E').unwrap(), '.');
-        }
-
-        // Rust always uses `.` as the decimal point. Translate to `,` if
-        // necessary.
-        if style.decimal == Decimal::Comma {
-            fix_decimal_point(&mut output);
-        }
-
-        // Make exponent have exactly three digits, plus sign.
-        let e = output.as_bytes().iter().position(|c| *c == b'E').unwrap();
-        let exponent: isize = output[e + 1..].parse().unwrap();
-        if exponent.abs() > 999 {
-            return Ok(false);
-        }
-        output.truncate(e + 1);
-        write!(&mut output, "{exponent:+04}").unwrap();
-
-        // Add suffixes.
-        if add_affixes {
-            output.push_str(&style.suffix.s);
-        }
-        if number.is_sign_negative() {
-            output.push_str(&style.neg_suffix.s);
-        } else {
-            for _ in 0..style.neg_suffix.width {
-                output.push(' ');
-            }
-        }
-
-        println!(
-            "{} for {number} width={width} fraction_width={fraction_width}: {output:?}",
-            self.format
-        );
-        debug_assert!(self.trim_spaces || output.len() >= self.format.w());
-        debug_assert!(output.len() <= self.format.w() + style.extra_bytes);
-        f.write_str(&output)?;
-        Ok(true)
-    }
-
-    fn n(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        if number < 0.0 {
-            return self.missing(f);
-        }
-
-        let legacy = LegacyFormat::new(number, self.format.d());
-        let w = self.format.w();
-        let len = legacy.len();
-        if len > w {
-            self.overflow(f)
-        } else {
-            write!(f, "{}{legacy}", Zeros(w.saturating_sub(len)))
-        }
-    }
-
-    fn z(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        let legacy = LegacyFormat::new(number, self.format.d());
-        let w = self.format.w();
-        let len = legacy.len();
-        if len > w {
-            self.overflow(f)
-        } else {
-            let mut s = legacy.to_small_string::<40>();
-            if number < 0.0 {
-                if let Some(last) = s.pop() {
-                    let last = last.to_digit(10).unwrap();
-                    s.push(b"}JKLMNOPQR"[last as usize] as char);
-                }
-            }
-            write!(f, "{}{s}", Zeros(w.saturating_sub(len)))
-        }
-    }
-
-    fn pibhex(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        if number < 0.0 {
-            self.overflow(f)
-        } else {
-            let number = number.round();
-            if number >= power256(self.format.w / 2) {
-                self.overflow(f)
-            } else {
-                let binary = integer_to_binary(number as u64, self.format.w / 2);
-                output_hex(f, &binary)
-            }
-        }
-    }
-
-    fn rbhex(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        let rb = self.rb(Some(number), self.format.w() / 2);
-        output_hex(f, &rb)
-    }
-
-    fn date(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        const MINUTE: f64 = 60.0;
-        const HOUR: f64 = 60.0 * 60.0;
-        const DAY: f64 = 60.0 * 60.0 * 24.0;
-
-        let (date, mut time) = match self.format.type_.category() {
-            Category::Date => {
-                if number < 0.0 {
-                    return self.missing(f);
-                }
-                let Some(date) = calendar_offset_to_gregorian(number / DAY) else {
-                    return self.missing(f);
-                };
-                (date, number % DAY)
-            }
-            Category::Time => (NaiveDate::MIN, number),
-            _ => unreachable!(),
-        };
-
-        let mut output = SmallString::<[u8; 40]>::new();
-        for TemplateItem { c, n } in DateTemplate::for_format(self.format).unwrap() {
-            match c {
-                'd' if n < 3 => write!(&mut output, "{:02}", date.day()).unwrap(),
-                'd' => write!(&mut output, "{:03}", day_of_year(date).unwrap_or(1)).unwrap(),
-                'm' if n < 3 => write!(&mut output, "{:02}", date.month()).unwrap(),
-                'm' => write!(&mut output, "{}", short_month_name(date.month()).unwrap()).unwrap(),
-                'y' if n >= 4 => {
-                    let year = date.year();
-                    if year <= 9999 {
-                        write!(&mut output, "{year:04}").unwrap();
-                    } else if self.format.type_ == Type::DateTime
-                        || self.format.type_ == Type::YmdHms
-                    {
-                        write!(&mut output, "****").unwrap();
-                    } else {
-                        return self.overflow(f);
-                    }
-                }
-                'y' => {
-                    let epoch = self.settings.epoch.0;
-                    let offset = date.year() - epoch;
-                    if !(0..=99).contains(&offset) {
-                        return self.overflow(f);
-                    }
-                    write!(&mut output, "{:02}", date.year().abs() % 100).unwrap();
-                }
-                'q' => write!(&mut output, "{}", date.month0() / 3 + 1).unwrap(),
-                'w' => write!(
-                    &mut output,
-                    "{:2}",
-                    (day_of_year(date).unwrap_or(1) - 1) / 7 + 1
-                )
-                .unwrap(),
-                'D' => {
-                    if time < 0.0 {
-                        output.push('-');
-                    }
-                    time = time.abs();
-                    write!(&mut output, "{:1$.0}", (time / DAY).floor(), n).unwrap();
-                    time %= DAY;
-                }
-                'H' => {
-                    if time < 0.0 {
-                        output.push('-');
-                    }
-                    time = time.abs();
-                    write!(&mut output, "{:01$.0}", (time / HOUR).floor(), n).unwrap();
-                    time %= HOUR;
-                }
-                'M' => {
-                    if time < 0.0 {
-                        output.push('-');
-                    }
-                    time = time.abs();
-                    write!(&mut output, "{:02.0}", (time / MINUTE).floor()).unwrap();
-                    time %= MINUTE;
-
-                    let excess_width = self.format.w() as isize - output.len() as isize;
-                    if excess_width < 0 || (self.format.type_ == Type::MTime && excess_width < 3) {
-                        return self.overflow(f);
-                    }
-                    if excess_width == 3
-                        || excess_width == 4
-                        || (excess_width >= 5 && self.format.d == 0)
-                    {
-                        write!(&mut output, ":{:02.0}", time.floor()).unwrap();
-                    } else if excess_width >= 5 {
-                        let d = min(self.format.d(), excess_width as usize - 4);
-                        let w = d + 3;
-                        write!(&mut output, ":{time:0w$.d$}").unwrap();
-                        if self.settings.decimal == Decimal::Comma {
-                            fix_decimal_point(&mut output);
-                        }
-                    }
-                    break;
-                }
-                c if n == 1 => output.push(c),
-                _ => unreachable!(),
-            }
-        }
-        if !self.trim_spaces {
-            write!(f, "{:>1$}", &output, self.format.w())
-        } else {
-            f.write_str(&output)
-        }
-    }
-
-    fn month(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
-        if let Some(month) = month_name(number as u32) {
-            if !self.trim_spaces {
-                write!(f, "{month:.*}", self.format.w())
-            } else {
-                f.write_str(month)
-            }
-        } else {
-            self.missing(f)
-        }
-    }
-
-    /// Writes this object to `w`. Writes binary formats ([Type::P],
-    /// [Type::PIB], and so on) as binary values, and writes other output
-    /// formats in the given `encoding`.
-    ///
-    /// If `dv` is a [DisplayDatum], the difference between `write!(f, "{}",
-    /// dv)` and `dv.write(f, encoding)` is:
-    ///
-    /// * `write!` always outputs UTF-8. Binary formats are encoded as the
-    ///   Unicode characters corresponding to their bytes.
-    ///
-    /// * `dv.write` outputs the desired `encoding`. Binary formats are not
-    ///   encoded in `encoding` (and thus they might be invalid for the
-    ///   encoding).
-    pub fn write<W>(&self, mut w: W, encoding: &'static Encoding) -> Result<(), IoError>
-    where
-        W: IoWrite,
-    {
-        match self.to_binary() {
-            Some(binary) => w.write_all(&binary),
-            None if encoding == UTF_8 => {
-                write!(&mut w, "{self}")
-            }
-            None => w.write_all(&encoding.encode(&self.to_small_string::<64>()).0),
-        }
-    }
-
-    fn to_binary(&self) -> Option<SmallVec<[u8; 16]>> {
-        let number = self.datum.as_number()?;
-        match self.format.type_() {
-            Type::P => Some(self.p(number)),
-            Type::PK => Some(self.pk(number)),
-            Type::IB => Some(self.ib(number)),
-            Type::PIB => Some(self.pib(number)),
-            Type::RB => Some(self.rb(number, self.format.w())),
-            _ => None,
-        }
-    }
-
-    fn bcd(&self, number: Option<f64>, digits: usize) -> (bool, SmallVec<[u8; 16]>) {
-        let legacy = LegacyFormat::new(number.unwrap_or_default(), self.format.d());
-        let len = legacy.len();
-
-        let mut output = SmallVec::new();
-        if len > digits {
-            output.resize(digits.div_ceil(2), 0);
-            (false, output)
-        } else {
-            let mut decimal = SmallString::<[u8; 16]>::new();
-            write!(
-                &mut decimal,
-                "{}{legacy}",
-                Zeros(digits.saturating_sub(len))
-            )
-            .unwrap();
-
-            let mut src = decimal.bytes();
-            for _ in 0..digits / 2 {
-                let d0 = src.next().unwrap() - b'0';
-                let d1 = src.next().unwrap() - b'0';
-                output.push((d0 << 4) + d1);
-            }
-            if digits % 2 != 0 {
-                let d = src.next().unwrap() - b'0';
-                output.push(d << 4);
-            }
-            (true, output)
-        }
-    }
-
-    fn p(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
-        let (valid, mut output) = self.bcd(number, self.format.w() * 2 - 1);
-        if valid && number.is_some_and(|number| number < 0.0) {
-            *output.last_mut().unwrap() |= 0xd;
-        } else {
-            *output.last_mut().unwrap() |= 0xf;
-        }
-        output
-    }
-
-    fn pk(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
-        let number = match number {
-            Some(number) if number < 0.0 => None,
-            other => other,
-        };
-        let (_valid, output) = self.bcd(number, self.format.w() * 2);
-        output
-    }
-
-    fn ib(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
-        let number = number.map_or(0.0, |number| (number * power10(self.format.d())).round());
-        let number = if number >= power256(self.format.w) / 2.0 - 1.0
-            || number < -power256(self.format.w) / 2.0
-        {
-            0.0
-        } else {
-            number
-        };
-        let integer = number.abs() as u64;
-        let integer = if number < 0.0 {
-            (-(integer as i64)) as u64
-        } else {
-            integer
-        };
-        endian_to_smallvec(self.endian.output, integer, self.format.w())
-    }
-
-    fn pib(&self, number: Option<f64>) -> SmallVec<[u8; 16]> {
-        let number = number.map_or(0.0, |number| (number * power10(self.format.d())).round());
-        let number = if number >= power256(self.format.w) || number < 0.0 {
-            0.0
-        } else {
-            number
-        };
-        let integer = number.abs() as u64;
-        endian_to_smallvec(self.endian.output, integer, self.format.w())
-    }
-
-    fn rb(&self, number: Option<f64>, w: usize) -> SmallVec<[u8; 16]> {
-        let number = number.unwrap_or(-f64::MAX);
-        let bytes: [u8; 8] = self.endian.output.to_bytes(number);
-        let mut vec = SmallVec::new();
-        vec.extend_from_slice(&bytes);
-        vec.resize(w, 0);
-        vec
-    }
-}
-
-struct LegacyFormat {
-    s: SmallVec<[u8; 40]>,
-    trailing_zeros: usize,
-}
-
-impl LegacyFormat {
-    fn new(number: f64, d: usize) -> Self {
-        let mut s = SmallVec::<[u8; 40]>::new();
-        write!(&mut s, "{:E}", number.abs()).unwrap();
-        debug_assert!(s.is_ascii());
-
-        // Parse exponent.
-        //
-        // Add 1 because of the transformation we will do just below, and `d` so
-        // that we just need to round to the nearest integer.
-        let e_index = s.iter().position(|c| *c == b'E').unwrap();
-        let mut exponent = unsafe { from_utf8_unchecked(&s[e_index + 1..]) }
-            .parse::<i32>()
-            .unwrap()
-            + 1
-            + d as i32;
-
-        // Transform `1.234E56` into `1234`.
-        if e_index == 1 {
-            // No decimals, e.g. `1E4` or `0E0`.
-            s.truncate(1)
-        } else {
-            s.remove(1);
-            s.truncate(e_index - 1);
-        };
-        debug_assert!(s.iter().all(|c| c.is_ascii_digit()));
-
-        if exponent >= 0 && exponent < s.len() as i32 {
-            // The first `exponent` digits are before the decimal point.  We
-            // need to round off there.
-            let exp = exponent as usize;
-
-            fn round_up(digits: &mut [u8], position: usize) -> bool {
-                for index in (0..position).rev() {
-                    match digits[index] {
-                        b'0'..=b'8' => {
-                            digits[index] += 1;
-                            return true;
-                        }
-                        b'9' => {
-                            digits[index] = b'0';
-                        }
-                        _ => unreachable!(),
-                    }
-                }
-                false
-            }
-
-            if s[exp] >= b'5' && !round_up(&mut s, exp) {
-                s.clear();
-                s.push(b'1');
-                exponent += 1;
-            }
-        }
-
-        let exponent = exponent.max(0) as usize;
-        s.truncate(exponent);
-        s.resize(exponent, b'0');
-        let trailing_zeros = exponent.saturating_sub(s.len());
-        Self { s, trailing_zeros }
-    }
-    fn s(&self) -> &str {
-        unsafe { from_utf8_unchecked(&self.s) }
-    }
-    fn len(&self) -> usize {
-        self.s.len() + self.trailing_zeros
-    }
-}
-
-impl Display for LegacyFormat {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        write!(f, "{}{}", self.s(), Zeros(self.trailing_zeros))
-    }
-}
-
-struct Zeros(usize);
-
-impl Display for Zeros {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        let mut n = self.0;
-        while n > 0 {
-            static ZEROS: &str = "0000000000000000000000000000000000000000";
-            let chunk = n.min(ZEROS.len());
-            f.write_str(&ZEROS[..chunk])?;
-            n -= chunk;
-        }
-        Ok(())
-    }
-}
-
-fn integer_to_binary(number: u64, width: u16) -> SmallVec<[u8; 8]> {
-    let bytes = (number << ((8 - width) * 8)).to_be_bytes();
-    SmallVec::from_slice(&bytes[..width as usize])
-}
-
-fn output_hex(f: &mut Formatter<'_>, bytes: &[u8]) -> FmtResult {
-    for byte in bytes {
-        write!(f, "{byte:02X}")?;
-    }
-    Ok(())
-}
-
-fn allocate_space(want: usize, capacity: usize, used: &mut usize) -> bool {
-    if *used + want <= capacity {
-        *used += want;
-        true
-    } else {
-        false
-    }
-}
-
-/// A representation of a number that can be quickly rounded to any desired
-/// number of decimal places (up to a specified maximum).
-#[derive(Debug)]
-struct Rounder {
-    /// Magnitude of number with excess precision.
-    string: SmallString<[u8; 40]>,
-
-    /// Number of digits before decimal point.
-    integer_digits: usize,
-
-    /// Number of `9`s or `.`s at start of string.
-    leading_nines: usize,
-
-    /// Number of `0`s or `.`s at start of string.
-    leading_zeros: usize,
-
-    /// Is the number negative?
-    negative: bool,
-}
-
-impl Rounder {
-    fn new(style: &NumberStyle, number: f64, max_decimals: u8) -> Self {
-        debug_assert!(number.abs() < 1e41);
-        debug_assert!((0..=16).contains(&max_decimals));
-
-        let mut string = SmallString::new();
-        if max_decimals == 0 {
-            // Fast path.  No rounding needed.
-            //
-            // We append `.00` to the integer representation because
-            // [Self::round_up] assumes that fractional digits are present.
-            write!(&mut string, "{:.0}.00", number.round().abs()).unwrap()
-        } else {
-            // Slow path.
-            //
-            // This is more difficult than it really should be because we have
-            // to make sure that numbers that are exactly halfway between two
-            // representations are always rounded away from zero.  This is not
-            // what format! normally does (usually it rounds to even), so we
-            // have to fake it as best we can, by formatting with extra
-            // precision and then doing the rounding ourselves.
-            //
-            // We take up to two rounds to format numbers.  In the first round,
-            // we obtain 2 digits of precision beyond those requested by the
-            // user.  If those digits are exactly "50", then in a second round
-            // we format with as many digits as are significant in a "double".
-            //
-            // It might be better to directly implement our own floating-point
-            // formatting routine instead of relying on the system's sprintf
-            // implementation.  But the classic Steele and White paper on
-            // printing floating-point numbers does not hint how to do what we
-            // want, and it's not obvious how to change their algorithms to do
-            // so.  It would also be a lot of work.
-            write!(
-                &mut string,
-                "{:.*}",
-                max_decimals as usize + 2,
-                number.abs()
-            )
-            .unwrap();
-            if string.ends_with("50") {
-                let (_sig, binary_exponent) = frexp(number);
-                let decimal_exponent = binary_exponent * 3 / 10;
-                let format_decimals = (f64::DIGITS as i32 + 1) - decimal_exponent;
-                if format_decimals > max_decimals as i32 + 2 {
-                    string.clear();
-                    write!(&mut string, "{:.*}", format_decimals as usize, number.abs()).unwrap();
-                }
-            }
-        };
-
-        if !style.leading_zero && string.starts_with("0") {
-            string.remove(0);
-        }
-        let leading_zeros = string
-            .bytes()
-            .take_while(|c| *c == b'0' || *c == b'.')
-            .count();
-        let leading_nines = string
-            .bytes()
-            .take_while(|c| *c == b'9' || *c == b'.')
-            .count();
-        let integer_digits = string.bytes().take_while(u8::is_ascii_digit).count();
-        let negative = number.is_sign_negative();
-        Self {
-            string,
-            integer_digits,
-            leading_nines,
-            leading_zeros,
-            negative,
-        }
-    }
-
-    /// Returns a [RounderWdith] for formatting the magnitude to `decimals`
-    /// decimal places. `decimals` must be in `0..=16`.
-    fn width(&self, decimals: usize) -> RounderWidth {
-        // Calculate base measures.
-        let mut width = self.integer_digits;
-        if decimals > 0 {
-            width += decimals + 1;
-        }
-        let mut integer_digits = self.integer_digits;
-        let mut negative = self.negative;
-
-        // Rounding can cause adjustments.
-        if self.should_round_up(decimals) {
-            // Rounding up leading `9s` adds a new digit (a `1`).
-            if self.leading_nines >= width {
-                width += 1;
-                integer_digits += 1;
-            }
-        } else {
-            // Rounding down.
-            if self.leading_zeros >= width {
-                // All digits that remain after rounding are zeros.  Therefore
-                // we drop the negative sign.
-                negative = false;
-                if self.integer_digits == 0 && decimals == 0 {
-                    // No digits at all are left.  We need to display
-                    // at least a single digit (a zero).
-                    debug_assert_eq!(width, 0);
-                    width += 1;
-                    integer_digits = 1;
-                }
-            }
-        }
-        RounderWidth {
-            width,
-            integer_digits,
-            negative,
-        }
-    }
-
-    /// Returns true if the number should be rounded up when chopped off at
-    /// `decimals` decimal places, false if it should be rounded down.
-    fn should_round_up(&self, decimals: usize) -> bool {
-        let digit = self.string.as_bytes()[self.integer_digits + decimals + 1];
-        debug_assert!(digit.is_ascii_digit());
-        digit >= b'5'
-    }
-
-    /// Formats the number, rounding to `decimals` decimal places.  Exactly as
-    /// many characters as indicated by [Self::width(decimals)] are written.
-    fn format(&self, decimals: usize) -> SmallString<[u8; 40]> {
-        let mut output = SmallString::new();
-        let mut base_width = self.integer_digits;
-        if decimals > 0 {
-            base_width += decimals + 1;
-        }
-
-        if self.should_round_up(decimals) {
-            if self.leading_nines < base_width {
-                // Rounding up.  This is the common case where rounding up
-                // doesn't add an extra digit.
-                output.push_str(&self.string[..base_width]);
-
-                // SAFETY: This loop only changes ASCII characters to other
-                // ASCII characters.
-                unsafe {
-                    for c in output.as_bytes_mut().iter_mut().rev() {
-                        match *c {
-                            b'9' => *c = b'0',
-                            b'0'..=b'8' => {
-                                *c += 1;
-                                break;
-                            }
-                            b'.' => (),
-                            _ => unreachable!(),
-                        }
-                    }
-                }
-            } else {
-                // Rounding up leading 9s causes the result to be a 1 followed
-                // by a number of 0s, plus a decimal point.
-                output.push('1');
-                for _ in 0..self.integer_digits {
-                    output.push('0');
-                }
-                if decimals > 0 {
-                    output.push('.');
-                    for _ in 0..decimals {
-                        output.push('0');
-                    }
-                }
-                debug_assert_eq!(output.len(), base_width + 1);
-            }
-        } else {
-            // Rounding down.
-            if self.integer_digits != 0 || decimals != 0 {
-                // Common case: just copy the digits.
-                output.push_str(&self.string);
-            } else {
-                // No digits remain.  The output is just a zero.
-                output.push('0');
-            }
-        }
-        output
-    }
-}
-
-struct RounderWidth {
-    /// Number of characters required to format the number to a specified number
-    /// of decimal places.  This includes integer digits and a decimal point and
-    /// fractional digits, if any, but it does not include any negative prefix
-    /// or suffix or other affixes.
-    width: usize,
-
-    /// Number of digits before the decimal point, between 0 and 40.
-    integer_digits: usize,
-
-    /// True if the number is negative and its rounded representation would
-    /// include at least one nonzero digit.
-    negative: bool,
-}
-
-/// Returns `10^x`.
-fn power10(x: usize) -> f64 {
-    const POWERS: [f64; 41] = [
-        1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16,
-        1e17, 1e18, 1e19, 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, 1e30, 1e31,
-        1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, 1e40,
-    ];
-    POWERS
-        .get(x)
-        .copied()
-        .unwrap_or_else(|| 10.0_f64.powi(x as i32))
-}
-
-/// Returns `256^x`.
-fn power256(x: u16) -> f64 {
-    const POWERS: [f64; 9] = [
-        1.0,
-        256.0,
-        65536.0,
-        16777216.0,
-        4294967296.0,
-        1099511627776.0,
-        281474976710656.0,
-        72057594037927936.0,
-        18446744073709551616.0,
-    ];
-    POWERS
-        .get(x as usize)
-        .copied()
-        .unwrap_or_else(|| 256.0_f64.powi(x as i32))
-}
-
-fn fix_decimal_point<A>(s: &mut SmallString<A>)
-where
-    A: Array<Item = u8>,
-{
-    // SAFETY: This only changes only one ASCII character (`.`) to
-    // another ASCII character (`,`).
-    unsafe {
-        if let Some(dot) = s.as_bytes_mut().iter_mut().find(|c| **c == b'.') {
-            *dot = b',';
-        }
-    }
-}
-
-pub fn endian_to_smallvec<const N: usize>(
-    endian: Endian,
-    mut value: u64,
-    n: usize,
-) -> SmallVec<[u8; N]> {
-    debug_assert!(n <= 8);
-    let mut vec = SmallVec::new();
-    value <<= 8 * (8 - n);
-    for _ in 0..n {
-        vec.push((value >> 56) as u8);
-        value <<= 8;
-    }
-    if endian == Endian::Little {
-        vec.reverse();
-    }
-    vec
-}
diff --git a/rust/pspp/src/format/mod.rs b/rust/pspp/src/format/mod.rs

deleted file mode 100644 (file)

index 43ba519..0000000
--- a/rust/pspp/src/format/mod.rs
+++ /dev/null
@@ -1,1390 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-use std::{
-    fmt::{Debug, Display, Formatter, Result as FmtResult, Write},
-    ops::{Not, RangeInclusive},
-    str::{Chars, FromStr},
-    sync::LazyLock,
-};
-
-use chrono::{Datelike, Local};
-use enum_iterator::{all, Sequence};
-use enum_map::{Enum, EnumMap};
-use serde::{Deserialize, Serialize};
-use thiserror::Error as ThisError;
-use unicode_width::UnicodeWidthStr;
-
-use crate::{
-    data::{ByteString, Datum},
-    sys::raw,
-    util::ToSmallString,
-    variable::{VarType, VarWidth},
-};
-
-mod display;
-mod parse;
-pub use display::{DisplayDatum, DisplayPlain, DisplayPlainF64};
-
-#[derive(Clone, ThisError, Debug, PartialEq, Eq)]
-pub enum Error {
-    #[error("Unknown format type {value}.")]
-    UnknownFormat { value: u16 },
-
-    #[error("Output format {0} specifies width {}, but {} requires an even width.", .0.w, .0.type_)]
-    OddWidthNotAllowed(UncheckedFormat),
-
-    #[error("Output format {0} specifies width {}, but {} requires a width between {} and {}.", .0.w, .0.type_, .0.type_.min_width(), .0.type_.max_width())]
-    BadWidth(UncheckedFormat),
-
-    #[error("Output format {0} specifies decimal places, but {} format does not allow any decimals.", .0.type_)]
-    DecimalsNotAllowedForFormat(UncheckedFormat),
-
-    #[error("Output format {0} specifies {} decimal places, but with a width of {}, {} does not allow any decimal places.", .0.d, .0.w, .0.type_)]
-    DecimalsNotAllowedForWidth(UncheckedFormat),
-
-    #[error("Output format {spec} specifies {} decimal places but, with a width of {}, {} allows at most {max_d} decimal places.", .spec.d, .spec.w, .spec.type_)]
-    TooManyDecimalsForWidth {
-        spec: UncheckedFormat,
-        max_d: Decimals,
-    },
-
-    #[error("String variable is not compatible with numeric format {0}.")]
-    UnnamedVariableNotCompatibleWithNumericFormat(Type),
-
-    #[error("Numeric variable is not compatible with string format {0}.")]
-    UnnamedVariableNotCompatibleWithStringFormat(Type),
-
-    #[error("String variable {variable} with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
-    NamedStringVariableBadSpecWidth {
-        variable: String,
-        width: Width,
-        bad_spec: Format,
-        good_spec: Format,
-    },
-
-    #[error("String variable with width {width} is not compatible with format {bad_spec}.  Use format {good_spec} instead.")]
-    UnnamedStringVariableBadSpecWidth {
-        width: Width,
-        bad_spec: Format,
-        good_spec: Format,
-    },
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-pub enum Category {
-    // Numeric formats.
-    Basic,
-    Custom,
-    Legacy,
-    Binary,
-    Hex,
-    Date,
-    Time,
-    DateComponent,
-
-    // String formats.
-    String,
-}
-
-impl From<Type> for Category {
-    fn from(source: Type) -> Self {
-        match source {
-            Type::F | Type::Comma | Type::Dot | Type::Dollar | Type::Pct | Type::E => Self::Basic,
-            Type::CC(_) => Self::Custom,
-            Type::N | Type::Z => Self::Legacy,
-            Type::P | Type::PK | Type::IB | Type::PIB | Type::RB => Self::Binary,
-            Type::PIBHex | Type::RBHex => Self::Hex,
-            Type::Date
-            | Type::ADate
-            | Type::EDate
-            | Type::JDate
-            | Type::SDate
-            | Type::QYr
-            | Type::MoYr
-            | Type::WkYr
-            | Type::DateTime
-            | Type::YmdHms => Self::Date,
-            Type::MTime | Type::Time | Type::DTime => Self::Time,
-            Type::WkDay | Type::Month => Self::DateComponent,
-            Type::A | Type::AHex => Self::String,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Hash, Sequence, Serialize)]
-pub enum CC {
-    A,
-    B,
-    C,
-    D,
-    E,
-}
-
-impl CC {
-    pub fn as_string(&self) -> &'static str {
-        match self {
-            CC::A => "A",
-            CC::B => "B",
-            CC::C => "C",
-            CC::D => "D",
-            CC::E => "E",
-        }
-    }
-}
-
-impl Display for CC {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{}", self.as_string())
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, Sequence, Serialize)]
-pub enum Type {
-    // Basic numeric formats.
-    F,
-    Comma,
-    Dot,
-    Dollar,
-    Pct,
-    E,
-
-    // Custom currency formats.
-    CC(CC),
-
-    // Legacy numeric formats.
-    N,
-    Z,
-
-    // Binary and hexadecimal formats.
-    P,
-    PK,
-    IB,
-    PIB,
-    PIBHex,
-    RB,
-    RBHex,
-
-    // Time and date formats.
-    Date,
-    ADate,
-    EDate,
-    JDate,
-    SDate,
-    QYr,
-    MoYr,
-    WkYr,
-    DateTime,
-    YmdHms,
-    MTime,
-    Time,
-    DTime,
-
-    // Date component formats.
-    WkDay,
-    Month,
-
-    // String formats.
-    A,
-    AHex,
-}
-
-pub type Width = u16;
-pub type SignedWidth = i16;
-
-pub type Decimals = u8;
-
-impl Type {
-    pub fn max_width(self) -> Width {
-        match self {
-            Self::P | Self::PK | Self::PIBHex | Self::RBHex => 16,
-            Self::IB | Self::PIB | Self::RB => 8,
-            Self::A => 32767,
-            Self::AHex => 32767 * 2,
-            _ => 40,
-        }
-    }
-
-    pub fn min_width(self) -> Width {
-        match self {
-            // Basic numeric formats.
-            Self::F => 1,
-            Self::Comma => 1,
-            Self::Dot => 1,
-            Self::Dollar => 2,
-            Self::Pct => 2,
-            Self::E => 6,
-
-            // Custom currency formats.
-            Self::CC(_) => 2,
-
-            // Legacy numeric formats.
-            Self::N => 1,
-            Self::Z => 1,
-
-            // Binary and hexadecimal formats.
-            Self::P => 1,
-            Self::PK => 1,
-            Self::IB => 1,
-            Self::PIB => 1,
-            Self::PIBHex => 2,
-            Self::RB => 2,
-            Self::RBHex => 4,
-
-            // Time and date formats.
-            Self::Date => 9,
-            Self::ADate => 8,
-            Self::EDate => 8,
-            Self::JDate => 5,
-            Self::SDate => 8,
-            Self::QYr => 6,
-            Self::MoYr => 6,
-            Self::WkYr => 8,
-            Self::DateTime => 17,
-            Self::YmdHms => 16,
-            Self::MTime => 5,
-            Self::Time => 5,
-            Self::DTime => 8,
-
-            // Date component formats.
-            Self::WkDay => 2,
-            Self::Month => 3,
-
-            // String formats.
-            Self::A => 1,
-            Self::AHex => 2,
-        }
-    }
-
-    pub fn width_range(self) -> RangeInclusive<Width> {
-        self.min_width()..=self.max_width()
-    }
-
-    pub fn max_decimals(self, width: Width) -> Decimals {
-        let width = width.clamp(1, 40) as SignedWidth;
-        let max = match self {
-            Self::F | Self::Comma | Self::Dot | Self::CC(_) => width - 1,
-            Self::Dollar | Self::Pct => width - 2,
-            Self::E => width - 7,
-            Self::N | Self::Z => width,
-            Self::P => width * 2 - 1,
-            Self::PK => width * 2,
-            Self::IB | Self::PIB => max_digits_for_bytes(width as usize) as SignedWidth,
-            Self::PIBHex => 0,
-            Self::RB | Self::RBHex => 16,
-            Self::Date
-            | Self::ADate
-            | Self::EDate
-            | Self::JDate
-            | Self::SDate
-            | Self::QYr
-            | Self::MoYr
-            | Self::WkYr => 0,
-            Self::DateTime => width - 21,
-            Self::YmdHms => width - 20,
-            Self::MTime => width - 6,
-            Self::Time => width - 9,
-            Self::DTime => width - 12,
-            Self::WkDay | Self::Month | Self::A | Self::AHex => 0,
-        };
-        max.clamp(0, 16) as Decimals
-    }
-
-    pub fn takes_decimals(self) -> bool {
-        self.max_decimals(Width::MAX) > 0
-    }
-
-    pub fn category(self) -> Category {
-        self.into()
-    }
-
-    pub fn width_step(self) -> Width {
-        if self.category() == Category::Hex || self == Self::AHex {
-            2
-        } else {
-            1
-        }
-    }
-
-    pub fn clamp_width(self, width: Width) -> Width {
-        let (min, max) = self.width_range().into_inner();
-        let width = width.clamp(min, max);
-        if self.width_step() == 2 {
-            width / 2 * 2
-        } else {
-            width
-        }
-    }
-
-    pub fn var_type(self) -> VarType {
-        match self {
-            Self::A | Self::AHex => VarType::String,
-            _ => VarType::Numeric,
-        }
-    }
-
-    /// Checks whether this format is valid for a variable with the given
-    /// `var_type`.
-    pub fn check_type_compatibility(self, var_type: VarType) -> Result<(), Error> {
-        let my_type = self.var_type();
-        match (my_type, var_type) {
-            (VarType::Numeric, VarType::String) => {
-                Err(Error::UnnamedVariableNotCompatibleWithNumericFormat(self))
-            }
-            (VarType::String, VarType::Numeric) => {
-                Err(Error::UnnamedVariableNotCompatibleWithStringFormat(self))
-            }
-            _ => Ok(()),
-        }
-    }
-
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Self::F => "F",
-            Self::Comma => "COMMA",
-            Self::Dot => "DOT",
-            Self::Dollar => "DOLLAR",
-            Self::Pct => "PCT",
-            Self::E => "E",
-            Self::CC(CC::A) => "CCA",
-            Self::CC(CC::B) => "CCB",
-            Self::CC(CC::C) => "CCC",
-            Self::CC(CC::D) => "CCD",
-            Self::CC(CC::E) => "CCE",
-            Self::N => "N",
-            Self::Z => "Z",
-            Self::P => "P",
-            Self::PK => "PK",
-            Self::IB => "IB",
-            Self::PIB => "PIB",
-            Self::PIBHex => "PIBHEX",
-            Self::RB => "RB",
-            Self::RBHex => "RBHEX",
-            Self::Date => "DATE",
-            Self::ADate => "ADATE",
-            Self::EDate => "EDATE",
-            Self::JDate => "JDATE",
-            Self::SDate => "SDATE",
-            Self::QYr => "QYR",
-            Self::MoYr => "MOYR",
-            Self::WkYr => "WKYR",
-            Self::DateTime => "DATETIME",
-            Self::YmdHms => "YMDHMS",
-            Self::MTime => "MTIME",
-            Self::Time => "TIME",
-            Self::DTime => "DTIME",
-            Self::WkDay => "WKDAY",
-            Self::Month => "MONTH",
-            Self::A => "A",
-            Self::AHex => "AHEX",
-        }
-    }
-
-    pub fn default_value(&self) -> Datum<ByteString> {
-        match self.var_type() {
-            VarType::Numeric => Datum::sysmis(),
-            VarType::String => Datum::String(ByteString::default()),
-        }
-    }
-}
-
-impl Display for Type {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{}", self.as_str())
-    }
-}
-
-impl FromStr for Type {
-    type Err = ();
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        for type_ in all::<Type>() {
-            if type_.as_str().eq_ignore_ascii_case(s) {
-                return Ok(type_);
-            }
-        }
-        Err(())
-    }
-}
-
-fn max_digits_for_bytes(bytes: usize) -> usize {
-    *[0, 3, 5, 8, 10, 13, 15, 17].get(bytes).unwrap_or(&20)
-}
-
-#[derive(Debug, PartialEq, Eq, Hash)]
-pub struct AbstractFormat {
-    pub name: String,
-    w: Width,
-    d: Decimals,
-}
-
-fn split<F>(s: &str, predicate: F) -> (&str, &str)
-where
-    F: Fn(&char) -> bool,
-{
-    let rest = s.trim_start_matches(|c| predicate(&c));
-    let start = &s[..s.len() - rest.len()];
-    (start, rest)
-}
-
-impl FromStr for AbstractFormat {
-    type Err = ();
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        let (name, s) = split(s, char::is_ascii_alphabetic);
-        if name.is_empty() {
-            return Err(());
-        }
-
-        let (w, s) = split(s, char::is_ascii_digit);
-        let Ok(w) = w.parse() else {
-            return Err(());
-        };
-
-        let (d, rest) = if let Some(s) = s.strip_prefix('.') {
-            let (d, rest) = split(s, char::is_ascii_digit);
-            let Ok(d) = d.parse() else {
-                return Err(());
-            };
-            (d, rest)
-        } else {
-            (0, s)
-        };
-
-        if !rest.is_empty() {
-            return Err(());
-        }
-        Ok(Self {
-            name: name.into(),
-            w,
-            d,
-        })
-    }
-}
-
-impl TryFrom<AbstractFormat> for UncheckedFormat {
-    type Error = ();
-
-    fn try_from(value: AbstractFormat) -> Result<Self, Self::Error> {
-        Ok(UncheckedFormat::new(value.name.parse()?, value.w, value.d))
-    }
-}
-
-#[derive(Copy, Clone, PartialEq, Eq, Hash)]
-pub struct Format {
-    type_: Type,
-    w: Width,
-    d: Decimals,
-}
-
-impl Serialize for Format {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        self.to_small_string::<16>().serialize(serializer)
-    }
-}
-
-impl Format {
-    pub const F40: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 0,
-    };
-
-    pub const F40_1: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 1,
-    };
-
-    pub const F40_2: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 2,
-    };
-
-    pub const F40_3: Format = Format {
-        type_: Type::F,
-        w: 40,
-        d: 3,
-    };
-
-    pub const PCT40_1: Format = Format {
-        type_: Type::Pct,
-        w: 40,
-        d: 1,
-    };
-
-    pub const F8_2: Format = Format {
-        type_: Type::F,
-        w: 8,
-        d: 2,
-    };
-
-    pub const DATETIME40_0: Format = Format {
-        type_: Type::DateTime,
-        w: 40,
-        d: 0,
-    };
-
-    pub fn type_(self) -> Type {
-        self.type_
-    }
-    pub fn w(self) -> usize {
-        self.w as usize
-    }
-    pub fn d(self) -> usize {
-        self.d as usize
-    }
-
-    pub fn new(type_: Type, w: Width, d: Decimals) -> Option<Self> {
-        UncheckedFormat { type_, w, d }.try_into().ok()
-    }
-
-    pub fn default_for_width(var_width: VarWidth) -> Self {
-        match var_width {
-            VarWidth::Numeric => Format {
-                type_: Type::F,
-                w: 8,
-                d: 2,
-            },
-            VarWidth::String(w) => Format {
-                type_: Type::A,
-                w,
-                d: 0,
-            },
-        }
-    }
-
-    pub fn fixed_from(source: &UncheckedFormat) -> Self {
-        let UncheckedFormat {
-            type_: format,
-            w,
-            d,
-        } = *source;
-        let (min, max) = format.width_range().into_inner();
-        let mut w = w.clamp(min, max);
-        if d <= format.max_decimals(Width::MAX) {
-            while d > format.max_decimals(w) {
-                w += 1;
-                assert!(w <= 40);
-            }
-        }
-        let d = d.clamp(0, format.max_decimals(w));
-        Self {
-            type_: format,
-            w,
-            d,
-        }
-    }
-
-    pub fn var_width(self) -> VarWidth {
-        match self.type_ {
-            Type::A => VarWidth::String(self.w),
-            Type::AHex => VarWidth::String(self.w / 2),
-            _ => VarWidth::Numeric,
-        }
-    }
-
-    pub fn var_type(self) -> VarType {
-        self.type_.var_type()
-    }
-
-    /// Checks whether this format specification is valid for a variable with
-    /// width `var_width`.
-    pub fn check_width_compatibility(self, var_width: VarWidth) -> Result<Self, Error> {
-        // Verify that the format is right for the variable's type.
-        self.type_.check_type_compatibility(var_width.into())?;
-
-        if let VarWidth::String(w) = var_width {
-            if var_width != self.var_width() {
-                let bad_spec = self;
-                let good_spec = if self.type_ == Type::A {
-                    Format { w, ..self }
-                } else {
-                    Format { w: w * 2, ..self }
-                };
-                return Err(Error::UnnamedStringVariableBadSpecWidth {
-                    width: w,
-                    bad_spec,
-                    good_spec,
-                });
-            }
-        }
-
-        Ok(self)
-    }
-
-    pub fn default_value(&self) -> Datum<ByteString> {
-        match self.var_width() {
-            VarWidth::Numeric => Datum::sysmis(),
-            VarWidth::String(width) => Datum::String(ByteString::spaces(width as usize)),
-        }
-    }
-
-    pub fn resize(&mut self, width: VarWidth) {
-        match (self.var_width(), width) {
-            (VarWidth::Numeric, VarWidth::Numeric) => {}
-            (VarWidth::String(_), VarWidth::String(new_width)) => {
-                self.w = if self.type_ == Type::AHex {
-                    new_width * 2
-                } else {
-                    new_width
-                };
-            }
-            _ => *self = Self::default_for_width(width),
-        }
-    }
-
-    pub fn codepage_to_unicode(&mut self) {
-        let mut width = self.var_width();
-        width.codepage_to_unicode();
-        if let Some(width) = width.as_string_width() {
-            if self.type_ == Type::AHex {
-                self.w = width as u16 * 2;
-            } else {
-                self.w = width as u16;
-            }
-        }
-    }
-}
-
-impl Debug for Format {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        write!(f, "{self}")
-    }
-}
-
-impl Display for Format {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{}{}", self.type_, self.w)?;
-        if self.type_.takes_decimals() || self.d > 0 {
-            write!(f, ".{}", self.d)?;
-        }
-        Ok(())
-    }
-}
-
-impl TryFrom<UncheckedFormat> for Format {
-    type Error = Error;
-
-    fn try_from(source: UncheckedFormat) -> Result<Self, Self::Error> {
-        let UncheckedFormat {
-            type_: format,
-            w,
-            d,
-        } = source;
-        let max_d = format.max_decimals(w);
-        if w % format.width_step() != 0 {
-            Err(Error::OddWidthNotAllowed(source))
-        } else if !format.width_range().contains(&w) {
-            Err(Error::BadWidth(source))
-        } else if d > max_d {
-            if format.takes_decimals() {
-                Err(Error::DecimalsNotAllowedForFormat(source))
-            } else if max_d > 0 {
-                Err(Error::TooManyDecimalsForWidth {
-                    spec: source,
-                    max_d,
-                })
-            } else {
-                Err(Error::DecimalsNotAllowedForWidth(source))
-            }
-        } else {
-            Ok(Format {
-                type_: format,
-                w,
-                d,
-            })
-        }
-    }
-}
-
-impl From<Type> for u16 {
-    fn from(source: Type) -> Self {
-        match source {
-            Type::A => 1,
-            Type::AHex => 2,
-            Type::Comma => 3,
-            Type::Dollar => 4,
-            Type::F => 5,
-            Type::IB => 6,
-            Type::PIBHex => 7,
-            Type::P => 8,
-            Type::PIB => 9,
-            Type::PK => 10,
-            Type::RB => 11,
-            Type::RBHex => 12,
-            Type::Z => 15,
-            Type::N => 16,
-            Type::E => 17,
-            Type::Date => 20,
-            Type::Time => 21,
-            Type::DateTime => 22,
-            Type::ADate => 23,
-            Type::JDate => 24,
-            Type::DTime => 25,
-            Type::WkDay => 26,
-            Type::Month => 27,
-            Type::MoYr => 28,
-            Type::QYr => 29,
-            Type::WkYr => 30,
-            Type::Pct => 31,
-            Type::Dot => 32,
-            Type::CC(CC::A) => 33,
-            Type::CC(CC::B) => 34,
-            Type::CC(CC::C) => 35,
-            Type::CC(CC::D) => 36,
-            Type::CC(CC::E) => 37,
-            Type::EDate => 38,
-            Type::SDate => 39,
-            Type::MTime => 40,
-            Type::YmdHms => 41,
-        }
-    }
-}
-
-impl TryFrom<u16> for Type {
-    type Error = Error;
-
-    fn try_from(source: u16) -> Result<Self, Self::Error> {
-        match source {
-            1 => Ok(Self::A),
-            2 => Ok(Self::AHex),
-            3 => Ok(Self::Comma),
-            4 => Ok(Self::Dollar),
-            5 => Ok(Self::F),
-            6 => Ok(Self::IB),
-            7 => Ok(Self::PIBHex),
-            8 => Ok(Self::P),
-            9 => Ok(Self::PIB),
-            10 => Ok(Self::PK),
-            11 => Ok(Self::RB),
-            12 => Ok(Self::RBHex),
-            15 => Ok(Self::Z),
-            16 => Ok(Self::N),
-            17 => Ok(Self::E),
-            20 => Ok(Self::Date),
-            21 => Ok(Self::Time),
-            22 => Ok(Self::DateTime),
-            23 => Ok(Self::ADate),
-            24 => Ok(Self::JDate),
-            25 => Ok(Self::DTime),
-            26 => Ok(Self::WkDay),
-            27 => Ok(Self::Month),
-            28 => Ok(Self::MoYr),
-            29 => Ok(Self::QYr),
-            30 => Ok(Self::WkYr),
-            31 => Ok(Self::Pct),
-            32 => Ok(Self::Dot),
-            33 => Ok(Self::CC(CC::A)),
-            34 => Ok(Self::CC(CC::B)),
-            35 => Ok(Self::CC(CC::C)),
-            36 => Ok(Self::CC(CC::D)),
-            37 => Ok(Self::CC(CC::E)),
-            38 => Ok(Self::EDate),
-            39 => Ok(Self::SDate),
-            40 => Ok(Self::MTime),
-            41 => Ok(Self::YmdHms),
-            _ => Err(Error::UnknownFormat { value: source }),
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
-pub struct UncheckedFormat {
-    pub type_: Type,
-
-    pub w: Width,
-
-    pub d: Decimals,
-}
-
-impl UncheckedFormat {
-    pub fn new(type_: Type, w: Width, d: Decimals) -> Self {
-        Self { type_, w, d }
-    }
-    pub fn fix(&self) -> Format {
-        Format::fixed_from(self)
-    }
-}
-
-impl TryFrom<raw::records::RawFormat> for UncheckedFormat {
-    type Error = Error;
-
-    fn try_from(raw: raw::records::RawFormat) -> Result<Self, Self::Error> {
-        let raw = raw.0;
-        let raw_format = (raw >> 16) as u16;
-        let format = raw_format.try_into()?;
-        let w = ((raw >> 8) & 0xff) as Width;
-        let d = (raw & 0xff) as Decimals;
-        Ok(Self {
-            type_: format,
-            w,
-            d,
-        })
-    }
-}
-
-impl Display for UncheckedFormat {
-    fn fmt(&self, f: &mut Formatter) -> FmtResult {
-        write!(f, "{}{}", self.type_, self.w)?;
-        if self.type_.takes_decimals() || self.d > 0 {
-            write!(f, ".{}", self.d)?;
-        }
-        Ok(())
-    }
-}
-
-#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Enum, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum Decimal {
-    #[default]
-    Dot,
-    Comma,
-}
-
-impl Decimal {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Decimal::Dot => ".",
-            Decimal::Comma => ",",
-        }
-    }
-}
-
-impl From<Decimal> for char {
-    fn from(value: Decimal) -> Self {
-        u8::from(value).into()
-    }
-}
-
-impl From<Decimal> for u8 {
-    fn from(value: Decimal) -> Self {
-        match value {
-            Decimal::Dot => b'.',
-            Decimal::Comma => b',',
-        }
-    }
-}
-
-impl TryFrom<char> for Decimal {
-    type Error = ();
-
-    fn try_from(c: char) -> Result<Self, Self::Error> {
-        match c {
-            '.' => Ok(Self::Dot),
-            ',' => Ok(Self::Comma),
-            _ => Err(()),
-        }
-    }
-}
-
-impl Not for Decimal {
-    type Output = Self;
-
-    fn not(self) -> Self::Output {
-        match self {
-            Self::Dot => Self::Comma,
-            Self::Comma => Self::Dot,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
-pub struct Epoch(pub i32);
-
-impl Epoch {
-    /// Applies the epoch to `year`:
-    ///
-    /// - If `year` is 2 digits (between 0 and 99, inclusive), returns it
-    ///   converted it to the correct year considering the epoch.
-    ///
-    /// - Otherwise, returns `year` unchanged.
-    pub fn apply(&self, year: i32) -> i32 {
-        match year {
-            0..=99 => {
-                let century = self.0 / 100 * 100;
-                let offset = self.0 - century;
-                if year >= offset {
-                    year + century
-                } else {
-                    year + century + 100
-                }
-            }
-            other => other,
-        }
-    }
-}
-
-impl Default for Epoch {
-    fn default() -> Self {
-        static DEFAULT: LazyLock<Epoch> = LazyLock::new(|| Epoch(Local::now().year() - 69));
-        *DEFAULT
-    }
-}
-
-impl Display for Epoch {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        write!(f, "{}", self.0)
-    }
-}
-
-#[derive(Clone, Debug, Default, Serialize)]
-pub struct Settings {
-    pub epoch: Epoch,
-
-    /// Either `'.'` or `','`.
-    pub decimal: Decimal,
-
-    /// Format `F`, `E`, `COMMA`, and `DOT` with leading zero (e.g. `0.5`
-    /// instead of `.5`)?
-    pub leading_zero: bool,
-
-    /// Custom currency styles.
-    pub ccs: EnumMap<CC, Option<Box<NumberStyle>>>,
-}
-
-#[derive(Copy, Clone, Enum)]
-struct StyleParams {
-    decimal: Decimal,
-    leading_zero: bool,
-}
-impl From<&Settings> for StyleParams {
-    fn from(value: &Settings) -> Self {
-        Self {
-            decimal: value.decimal,
-            leading_zero: value.leading_zero,
-        }
-    }
-}
-
-struct StyleSet(EnumMap<StyleParams, NumberStyle>);
-
-impl StyleSet {
-    fn new(f: impl Fn(StyleParams) -> NumberStyle) -> Self {
-        Self(EnumMap::from_fn(f))
-    }
-    fn get(&self, settings: &Settings) -> &NumberStyle {
-        &self.0[settings.into()]
-    }
-}
-
-impl Settings {
-    pub fn with_cc(mut self, cc: CC, style: NumberStyle) -> Self {
-        self.ccs[cc] = Some(Box::new(style));
-        self
-    }
-    pub fn with_leading_zero(self, leading_zero: bool) -> Self {
-        Self {
-            leading_zero,
-            ..self
-        }
-    }
-    pub fn with_epoch(self, epoch: Epoch) -> Self {
-        Self { epoch, ..self }
-    }
-    pub fn number_style(&self, type_: Type) -> &NumberStyle {
-        static DEFAULT: LazyLock<NumberStyle> =
-            LazyLock::new(|| NumberStyle::new("", "", Decimal::Dot, None, false));
-
-        match type_ {
-            Type::F | Type::E => {
-                static F: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| NumberStyle::new("", "", p.decimal, None, p.leading_zero))
-                });
-                F.get(self)
-            }
-            Type::Comma => {
-                static COMMA: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| {
-                        NumberStyle::new("", "", p.decimal, Some(!p.decimal), p.leading_zero)
-                    })
-                });
-                COMMA.get(self)
-            }
-            Type::Dot => {
-                static DOT: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| {
-                        NumberStyle::new("", "", !p.decimal, Some(p.decimal), p.leading_zero)
-                    })
-                });
-                DOT.get(self)
-            }
-            Type::Dollar => {
-                static DOLLAR: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| NumberStyle::new("$", "", p.decimal, Some(!p.decimal), false))
-                });
-                DOLLAR.get(self)
-            }
-            Type::Pct => {
-                static PCT: LazyLock<StyleSet> = LazyLock::new(|| {
-                    StyleSet::new(|p| NumberStyle::new("", "%", p.decimal, None, false))
-                });
-                PCT.get(self)
-            }
-            Type::CC(cc) => self.ccs[cc].as_deref().unwrap_or(&DEFAULT),
-            Type::N
-            | Type::Z
-            | Type::P
-            | Type::PK
-            | Type::IB
-            | Type::PIB
-            | Type::PIBHex
-            | Type::RB
-            | Type::RBHex
-            | Type::Date
-            | Type::ADate
-            | Type::EDate
-            | Type::JDate
-            | Type::SDate
-            | Type::QYr
-            | Type::MoYr
-            | Type::WkYr
-            | Type::DateTime
-            | Type::YmdHms
-            | Type::MTime
-            | Type::Time
-            | Type::DTime
-            | Type::WkDay
-            | Type::Month
-            | Type::A
-            | Type::AHex => &DEFAULT,
-        }
-    }
-}
-
-/// A numeric output style.  This can express numeric formats in
-/// [Category::Basic] and [Category::Custom].
-#[derive(Clone, Debug, Serialize)]
-pub struct NumberStyle {
-    pub neg_prefix: Affix,
-    pub prefix: Affix,
-    pub suffix: Affix,
-    pub neg_suffix: Affix,
-
-    /// Decimal point.
-    pub decimal: Decimal,
-
-    /// Grouping character.
-    pub grouping: Option<Decimal>,
-
-    /// Format as `.5` or `0.5`?
-    pub leading_zero: bool,
-
-    /// An `Affix` may require more bytes than its display width; for example,
-    /// U+00A5 (¥) is 2 bytes in UTF-8 but occupies only one display column.
-    /// This member is the sum of the number of bytes required by all of the
-    /// `Affix` members in this struct, minus their display widths.  Thus, it
-    /// can be used to size memory allocations: for example, the formatted
-    /// result of `CCA20.5` requires no more than `(20 + extra_bytes)` bytes in
-    /// UTF-8.
-    #[serde(skip)]
-    pub extra_bytes: usize,
-}
-
-impl Display for NumberStyle {
-    /// Display this number style in the format used for custom currency.
-    ///
-    /// This format can only accurately represent number styles that include a
-    /// grouping character.  If this number style doesn't, it will pretend that
-    /// the grouping character is the opposite of the decimal point character.
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        let grouping = char::from(!self.decimal);
-        write!(
-            f,
-            "{}{}{}{}{}{}{}",
-            self.neg_prefix.display(grouping),
-            grouping,
-            self.prefix.display(grouping),
-            grouping,
-            self.suffix.display(grouping),
-            grouping,
-            self.neg_suffix.display(grouping),
-        )
-    }
-}
-
-impl NumberStyle {
-    fn new(
-        prefix: &str,
-        suffix: &str,
-        decimal: Decimal,
-        grouping: Option<Decimal>,
-        leading_zero: bool,
-    ) -> Self {
-        // These assertions ensure that zero is correct for `extra_bytes`.
-        debug_assert!(prefix.is_ascii());
-        debug_assert!(suffix.is_ascii());
-
-        Self {
-            neg_prefix: Affix::new("-"),
-            prefix: Affix::new(prefix),
-            suffix: Affix::new(suffix),
-            neg_suffix: Affix::new(""),
-            decimal,
-            grouping,
-            leading_zero,
-            extra_bytes: 0,
-        }
-    }
-
-    fn affix_width(&self) -> usize {
-        self.prefix.width + self.suffix.width
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct Affix {
-    /// String contents of affix.
-    pub s: String,
-
-    #[serde(skip)]
-    /// Display width in columns (see [unicode_width])
-    pub width: usize,
-}
-
-impl Affix {
-    fn new(s: impl Into<String>) -> Self {
-        let s = s.into();
-        Self {
-            width: s.width(),
-            s,
-        }
-    }
-
-    fn extra_bytes(&self) -> usize {
-        self.s.len().checked_sub(self.width).unwrap()
-    }
-
-    fn display(&self, escape: char) -> DisplayAffix<'_> {
-        DisplayAffix {
-            affix: self.s.as_str(),
-            escape,
-        }
-    }
-}
-
-pub struct DisplayAffix<'a> {
-    affix: &'a str,
-    escape: char,
-}
-
-impl Display for DisplayAffix<'_> {
-    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
-        for c in self.affix.chars() {
-            if c == self.escape {
-                f.write_char('\'')?;
-            }
-            f.write_char(c)?;
-        }
-        Ok(())
-    }
-}
-
-impl FromStr for NumberStyle {
-    type Err = ();
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        fn find_separator(s: &str) -> Option<char> {
-            // Count commas and periods.  There must be exactly three of one or
-            // the other, except that an apostrophe escapes a following comma or
-            // period.
-            let mut n_commas = 0;
-            let mut n_periods = 0;
-            let s = s.as_bytes();
-            for i in 0..s.len() {
-                if i > 0 && s[i - 1] == b'\'' {
-                } else if s[i] == b',' {
-                    n_commas += 1;
-                } else if s[i] == b'.' {
-                    n_periods += 1;
-                }
-            }
-
-            if n_commas == 3 && n_periods != 3 {
-                Some(',')
-            } else if n_periods == 3 && n_commas != 3 {
-                Some('.')
-            } else {
-                None
-            }
-        }
-
-        fn take_cc_token(iter: &mut Chars<'_>, grouping: char) -> Affix {
-            let mut s = String::new();
-            let mut quote = false;
-            for c in iter {
-                if c == '\'' && !quote {
-                    quote = true;
-                } else if c == grouping && !quote {
-                    break;
-                } else {
-                    s.push(c);
-                    quote = false;
-                }
-            }
-            Affix::new(s)
-        }
-
-        let Some(grouping) = find_separator(s) else {
-            return Err(());
-        };
-        let mut iter = s.chars();
-        let neg_prefix = take_cc_token(&mut iter, grouping);
-        let prefix = take_cc_token(&mut iter, grouping);
-        let suffix = take_cc_token(&mut iter, grouping);
-        let neg_suffix = take_cc_token(&mut iter, grouping);
-        let grouping: Decimal = grouping.try_into().unwrap();
-        let decimal = !grouping;
-        let extra_bytes = neg_prefix.extra_bytes()
-            + prefix.extra_bytes()
-            + suffix.extra_bytes()
-            + neg_suffix.extra_bytes();
-        Ok(Self {
-            neg_prefix,
-            prefix,
-            suffix,
-            neg_suffix,
-            decimal,
-            grouping: Some(grouping),
-            leading_zero: false,
-            extra_bytes,
-        })
-    }
-}
-
-/// An item within a [DateTemplate].
-pub struct TemplateItem {
-    /// Character in the template.
-    pub c: char,
-
-    /// Number of repetitions of the character.
-    pub n: usize,
-}
-
-/// A template for date and time formats.
-#[derive(Clone)]
-pub struct DateTemplate(&'static str);
-
-impl DateTemplate {
-    /// Returns a [DateTemplate] used for date and time input and output in a
-    /// field of the given `type_` and `width`.
-    ///
-    /// `width` only affects whether a 2-digit year or a 4-digit year is used,
-    /// that is, whether the returned string contains `yy` or `yyyy`, and
-    /// whether seconds are included, that is, whether the returned string
-    /// contains `:SS`.  A caller that doesn't care whether the returned string
-    /// contains `yy` or `yyyy` or `:SS` can just specify 0 to omit them.
-    pub fn new(type_: Type, width: usize) -> Option<Self> {
-        let (short, long) = match type_ {
-            Type::F
-            | Type::Comma
-            | Type::Dot
-            | Type::Dollar
-            | Type::Pct
-            | Type::E
-            | Type::CC(_)
-            | Type::N
-            | Type::Z
-            | Type::P
-            | Type::PK
-            | Type::IB
-            | Type::PIB
-            | Type::PIBHex
-            | Type::RB
-            | Type::RBHex
-            | Type::WkDay
-            | Type::Month
-            | Type::A
-            | Type::AHex => return None,
-            Type::Date => ("dd-mmm-yy", "dd-mmm-yyyy"),
-            Type::ADate => ("mm/dd/yy", "mm/dd/yyyy"),
-            Type::EDate => ("dd.mm.yy", "dd.mm.yyyy"),
-            Type::JDate => ("yyddd", "yyyyddd"),
-            Type::SDate => ("yy/mm/dd", "yyyy/mm/dd"),
-            Type::QYr => ("q Q yy", "q Q yyyy"),
-            Type::MoYr => ("mmm yy", "mmm yyyy"),
-            Type::WkYr => ("ww WK yy", "ww WK yyyy"),
-            Type::DateTime => ("dd-mmm-yyyy HH:MM", "dd-mmm-yyyy HH:MM:SS"),
-            Type::YmdHms => ("yyyy-mm-dd HH:MM", "yyyy-mm-dd HH:MM:SS"),
-            Type::MTime => ("MM", "MM:SS"),
-            Type::Time => ("HH:MM", "HH:MM:SS"),
-            Type::DTime => ("D HH:MM", "D HH:MM:SS"),
-        };
-        if width >= long.len() {
-            Some(DateTemplate(long))
-        } else {
-            Some(DateTemplate(short))
-        }
-    }
-
-    pub fn for_format(format: Format) -> Option<Self> {
-        Self::new(format.type_(), format.w())
-    }
-
-    #[allow(clippy::len_without_is_empty)]
-    pub fn len(&self) -> usize {
-        self.0.len()
-    }
-}
-
-impl Iterator for DateTemplate {
-    type Item = TemplateItem;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        let mut iter = self.0.chars();
-        let c = iter.next()?;
-        self.0 = iter.as_str();
-        let mut n = 1;
-        while iter.next() == Some(c) {
-            self.0 = iter.as_str();
-            n += 1;
-        }
-        Some(TemplateItem { c, n })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::format::{Format, Type, Width};
-
-    #[test]
-    fn codepage_to_unicode() {
-        fn check_format(input: Format, expected_width: Width) {
-            let mut output = input;
-            output.codepage_to_unicode();
-            let expected = Format::new(input.type_, expected_width, input.d).unwrap();
-            assert_eq!(output, expected);
-        }
-        check_format(Format::new(Type::A, 1, 0).unwrap(), 3);
-        check_format(Format::new(Type::A, 2, 0).unwrap(), 6);
-        check_format(Format::new(Type::A, 3, 0).unwrap(), 9);
-        check_format(Format::new(Type::A, 1000, 0).unwrap(), 3000);
-        check_format(Format::new(Type::A, 20000, 0).unwrap(), 32767);
-
-        check_format(Format::new(Type::AHex, 2, 0).unwrap(), 6);
-        check_format(Format::new(Type::AHex, 4, 0).unwrap(), 12);
-        check_format(Format::new(Type::AHex, 6, 0).unwrap(), 18);
-        check_format(Format::new(Type::AHex, 2000, 0).unwrap(), 6000);
-        check_format(Format::new(Type::AHex, 20000, 0).unwrap(), 60000);
-        check_format(Format::new(Type::AHex, 30000, 0).unwrap(), 65534);
-
-        check_format(Format::new(Type::F, 40, 0).unwrap(), 40);
-    }
-}
diff --git a/rust/pspp/src/lex.rs b/rust/pspp/src/lex.rs

new file mode 100644 (file)

index 0000000..f92407b
--- /dev/null
+++ b/rust/pspp/src/lex.rs
@@ -0,0 +1,40 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Lexical analysis for PSPP syntax.
+//!
+//! PSPP divides traditional "lexical analysis" or "tokenization" into three
+//! phases:
+//!
+//! 1. A low level called "segmentation", implemented in the [segment] module.
+//!    This labels syntax strings with [Segment](segment::Segment)s.
+//!
+//! 2. A middle level called "scanning", implemented in the [scan] module.
+//!    This transforms and merges segments to form [Token]s.
+//!
+//! 3. A high level called "lexing", implemented in the [lexer] module.  Lexing
+//!    brings together multiple source files and invokes macro expansion on the
+//!    tokens output by the scanner.
+
+// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
+#![cfg_attr(not(test), warn(missing_docs))]
+
+pub mod command_name;
+pub mod lexer;
+pub mod scan;
+pub mod segment;
+mod token;
+pub use token::{Punct, Token};
diff --git a/rust/pspp/src/lex/mod.rs b/rust/pspp/src/lex/mod.rs

deleted file mode 100644 (file)

index f92407b..0000000
--- a/rust/pspp/src/lex/mod.rs
+++ /dev/null
@@ -1,40 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Lexical analysis for PSPP syntax.
-//!
-//! PSPP divides traditional "lexical analysis" or "tokenization" into three
-//! phases:
-//!
-//! 1. A low level called "segmentation", implemented in the [segment] module.
-//!    This labels syntax strings with [Segment](segment::Segment)s.
-//!
-//! 2. A middle level called "scanning", implemented in the [scan] module.
-//!    This transforms and merges segments to form [Token]s.
-//!
-//! 3. A high level called "lexing", implemented in the [lexer] module.  Lexing
-//!    brings together multiple source files and invokes macro expansion on the
-//!    tokens output by the scanner.
-
-// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
-#![cfg_attr(not(test), warn(missing_docs))]
-
-pub mod command_name;
-pub mod lexer;
-pub mod scan;
-pub mod segment;
-mod token;
-pub use token::{Punct, Token};
diff --git a/rust/pspp/src/lex/scan.rs b/rust/pspp/src/lex/scan.rs

new file mode 100644 (file)

index 0000000..fcb1bc3
--- /dev/null
+++ b/rust/pspp/src/lex/scan.rs
@@ -0,0 +1,482 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Mid-level lexical analysis.
+//!
+//! This module implements mid-level lexical analysis using the segments
+//! output by the lower-level [segmentation phase](super::segment).
+//!
+//! Scanning accepts as input a stream of segments, which are UTF-8 strings
+//! labeled with a [segment type](super::segment::Segment).  It outputs a stream
+//! of [Token]s used by the PSPP parser or an error.
+
+use crate::identifier::{Identifier, ReservedWord};
+
+use super::{
+    segment::{Segment, Segmenter, Syntax},
+    token::{Punct, Token},
+};
+use std::collections::VecDeque;
+use thiserror::Error as ThisError;
+
+/// Error returned by [merge_tokens].
+#[derive(ThisError, Clone, Debug, PartialEq, Eq)]
+pub enum ScanError {
+    /// Unterminated string constant.
+    #[error("Unterminated string constant.")]
+    ExpectedQuote,
+
+    /// Missing exponent.
+    #[error("Missing exponent following `{0}`")]
+    ExpectedExponent(String),
+
+    /// Odd length hex string.
+    #[error("String of hex digits has {0} characters, which is not a multiple of 2.")]
+    OddLengthHexString(usize),
+
+    /// Invalid hex digit.
+    #[error("Invalid hex digit {0:?}.")]
+    BadHexDigit(char),
+
+    /// Incomplete UTF-8 sequence.
+    #[error("Incomplete UTF-8 sequence `{substring}` starting {offset} digits into hex string.")]
+    IncompleteUtf8 {
+        /// Incomplete sequence.
+        substring: String,
+        /// Offset of start of sequence.
+        offset: usize,
+    },
+
+    /// Bad UTF-8 sequence.
+    #[error("Invalid UTF-8 sequence `{substring}` starting {offset} digits into hex string.")]
+    BadUtf8 {
+        /// Invalid sequence.
+        substring: String,
+        /// Offset of start of sequence.
+        offset: usize,
+    },
+
+    /// Invalid length Unicode string.
+    #[error("Unicode string contains {0} bytes, which is not in the valid range of 1 to 8 bytes.")]
+    BadLengthUnicodeString(usize),
+
+    /// Invalid code point.
+    #[error("U+{0:04X} is not a valid Unicode code point.")]
+    BadCodePoint(u32),
+
+    /// Expected hexadecimal Unicode code point
+    #[error("Expected hexadecimal Unicode code point.")]
+    ExpectedCodePoint,
+
+    /// `DO REPEAT` nested too deeply.
+    #[error("`DO REPEAT` nested too deeply.")]
+    DoRepeatOverflow,
+
+    /// Unexpected character.
+    #[error("Unexpected character {0:?} in input.")]
+    UnexpectedChar(char),
+}
+
+/// The action returned by [merge_tokens].
+#[derive(Clone, Debug)]
+pub enum MergeAction {
+    /// Copy one token literally from input to output.
+    Copy,
+
+    /// Expand `n` tokens from the input into `token` in the output.
+    Expand {
+        /// Number of tokens to expand.
+        n: usize,
+
+        /// Replacement token.
+        token: Token,
+    },
+}
+
+/// Used by [merge_tokens] to indicate that more input is needed.
+#[derive(Copy, Clone, Debug)]
+pub struct Incomplete;
+
+impl Segment {
+    /// Tries to transform this segment, which was obtained for `s`, into a
+    /// token.  Returns one of:
+    ///
+    /// - `None`: This segment doesn't correspond to any token (because it is a
+    ///   comment, white space, etc.) and can be dropped in tokenization.
+    ///
+    /// - `Some(Ok(token))`: This segment corresponds to the given token.
+    ///
+    /// - `Some(Err(error))`: The segment contains an error, which the caller
+    ///   should report.
+    ///
+    /// The raw token (or error) that this function returns should ordinarily be
+    /// merged with adjacent tokens with [merge_tokens] or some higher-level
+    /// construct.
+    pub fn to_token(self, s: &str) -> Option<Result<Token, ScanError>> {
+        match self {
+            Segment::Number => Some(Ok(Token::Number(s.parse().unwrap()))),
+            Segment::QuotedString => {
+                // Trim quote mark from front and back.
+                let mut chars = s.chars();
+                let quote = chars.next().unwrap();
+                let s = chars.as_str().strip_suffix(quote).unwrap();
+
+                // Replace doubled quotes by single ones.
+                let (single_quote, double_quote) = match quote {
+                    '\'' => ("'", "''"),
+                    '"' => ("\"", "\"\""),
+                    _ => unreachable!(),
+                };
+                Some(Ok(Token::String(s.replace(double_quote, single_quote))))
+            }
+            Segment::HexString => {
+                // Strip `X"` prefix and `"` suffix (or variations).
+                let s = &s[2..s.len() - 1];
+                for c in s.chars() {
+                    if !c.is_ascii_hexdigit() {
+                        return Some(Err(ScanError::BadHexDigit(c)));
+                    }
+                }
+                if s.len() % 2 != 0 {
+                    return Some(Err(ScanError::OddLengthHexString(s.len())));
+                }
+                let bytes = s
+                    .as_bytes()
+                    .chunks_exact(2)
+                    .map(|pair| {
+                        let hi = char::from(pair[0]).to_digit(16).unwrap() as u8;
+                        let lo = char::from(pair[1]).to_digit(16).unwrap() as u8;
+                        hi * 16 + lo
+                    })
+                    .collect::<Vec<_>>();
+                match String::from_utf8(bytes) {
+                    Ok(string) => Some(Ok(Token::String(string))),
+                    Err(error) => {
+                        let details = error.utf8_error();
+                        let offset = details.valid_up_to() * 2;
+                        let end = details
+                            .error_len()
+                            .map(|len| offset + len * 2)
+                            .unwrap_or(s.len());
+                        let substring = String::from(&s[offset..end]);
+                        Some(Err(if details.error_len().is_some() {
+                            ScanError::BadUtf8 { substring, offset }
+                        } else {
+                            ScanError::IncompleteUtf8 { substring, offset }
+                        }))
+                    }
+                }
+            }
+            Segment::UnicodeString => {
+                // Strip `U"` prefix and `"` suffix (or variations).
+                let s = &s[2..s.len() - 1];
+                if !(1..=8).contains(&s.len()) {
+                    return Some(Err(ScanError::BadLengthUnicodeString(s.len())));
+                }
+                let Ok(code_point) = u32::from_str_radix(s, 16) else {
+                    return Some(Err(ScanError::ExpectedCodePoint));
+                };
+                let Some(c) = char::from_u32(code_point) else {
+                    return Some(Err(ScanError::BadCodePoint(code_point)));
+                };
+                Some(Ok(Token::String(String::from(c))))
+            }
+
+            Segment::UnquotedString
+            | Segment::DoRepeatCommand
+            | Segment::InlineData
+            | Segment::Document
+            | Segment::MacroBody
+            | Segment::MacroName => Some(Ok(Token::String(String::from(s)))),
+
+            Segment::Identifier => {
+                if let Ok(reserved_word) = ReservedWord::try_from(s) {
+                    match reserved_word {
+                        ReservedWord::And => Some(Ok(Token::Punct(Punct::And))),
+                        ReservedWord::Or => Some(Ok(Token::Punct(Punct::Or))),
+                        ReservedWord::Not => Some(Ok(Token::Punct(Punct::Not))),
+                        ReservedWord::Eq => Some(Ok(Token::Punct(Punct::Eq))),
+                        ReservedWord::Ge => Some(Ok(Token::Punct(Punct::Ge))),
+                        ReservedWord::Gt => Some(Ok(Token::Punct(Punct::Gt))),
+                        ReservedWord::Le => Some(Ok(Token::Punct(Punct::Le))),
+                        ReservedWord::Lt => Some(Ok(Token::Punct(Punct::Lt))),
+                        ReservedWord::Ne => Some(Ok(Token::Punct(Punct::Ne))),
+                        ReservedWord::All => Some(Ok(Token::Punct(Punct::All))),
+                        ReservedWord::By => Some(Ok(Token::Punct(Punct::By))),
+                        ReservedWord::To => Some(Ok(Token::Punct(Punct::To))),
+                        ReservedWord::With => Some(Ok(Token::Punct(Punct::With))),
+                    }
+                } else {
+                    Some(Ok(Token::Id(Identifier::new(s).unwrap())))
+                }
+            }
+            Segment::Punct => match s {
+                "(" => Some(Ok(Token::Punct(Punct::LParen))),
+                ")" => Some(Ok(Token::Punct(Punct::RParen))),
+                "[" => Some(Ok(Token::Punct(Punct::LSquare))),
+                "]" => Some(Ok(Token::Punct(Punct::RSquare))),
+                "{" => Some(Ok(Token::Punct(Punct::LCurly))),
+                "}" => Some(Ok(Token::Punct(Punct::RCurly))),
+                "," => Some(Ok(Token::Punct(Punct::Comma))),
+                "=" => Some(Ok(Token::Punct(Punct::Equals))),
+                "-" => Some(Ok(Token::Punct(Punct::Dash))),
+                "&" => Some(Ok(Token::Punct(Punct::And))),
+                "|" => Some(Ok(Token::Punct(Punct::Or))),
+                "+" => Some(Ok(Token::Punct(Punct::Plus))),
+                "/" => Some(Ok(Token::Punct(Punct::Slash))),
+                "*" => Some(Ok(Token::Punct(Punct::Asterisk))),
+                "<" => Some(Ok(Token::Punct(Punct::Lt))),
+                ">" => Some(Ok(Token::Punct(Punct::Gt))),
+                "~" => Some(Ok(Token::Punct(Punct::Not))),
+                ":" => Some(Ok(Token::Punct(Punct::Colon))),
+                ";" => Some(Ok(Token::Punct(Punct::Semicolon))),
+                "**" => Some(Ok(Token::Punct(Punct::Exp))),
+                "<=" => Some(Ok(Token::Punct(Punct::Le))),
+                "<>" => Some(Ok(Token::Punct(Punct::Ne))),
+                "~=" => Some(Ok(Token::Punct(Punct::Ne))),
+                ">=" => Some(Ok(Token::Punct(Punct::Ge))),
+                "!" => Some(Ok(Token::Punct(Punct::Bang))),
+                "%" => Some(Ok(Token::Punct(Punct::Percent))),
+                "?" => Some(Ok(Token::Punct(Punct::Question))),
+                "`" => Some(Ok(Token::Punct(Punct::Backtick))),
+                "_" => Some(Ok(Token::Punct(Punct::Underscore))),
+                "." => Some(Ok(Token::Punct(Punct::Dot))),
+                "!*" => Some(Ok(Token::Punct(Punct::BangAsterisk))),
+                _ => unreachable!("bad punctuator {s:?}"),
+            },
+            Segment::Shbang
+            | Segment::Spaces
+            | Segment::Comment
+            | Segment::Newline
+            | Segment::CommentCommand => None,
+            Segment::DoRepeatOverflow => Some(Err(ScanError::DoRepeatOverflow)),
+            Segment::StartDocument => Some(Ok(Token::Id(Identifier::new("DOCUMENT").unwrap()))),
+            Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => {
+                Some(Ok(Token::End))
+            }
+            Segment::ExpectedQuote => Some(Err(ScanError::ExpectedQuote)),
+            Segment::ExpectedExponent => Some(Err(ScanError::ExpectedExponent(String::from(s)))),
+            Segment::UnexpectedChar => {
+                Some(Err(ScanError::UnexpectedChar(s.chars().next().unwrap())))
+            }
+        }
+    }
+}
+
+/// Attempts to merge a sequence of tokens together into a single token.
+///
+/// The tokens are taken from the beginning of `input`, which given
+/// 0-based token index returns:
+///
+/// * `Ok(Some(token))`: The token with the given index.
+///
+/// * `Ok(None)`: End of input.
+///
+/// * `Err(Incomplete)`: The given token isn't available yet (it may or may not
+///   exist).
+///
+/// This function returns one of:
+///
+/// * `Ok(Some(MergeAction))`: How to transform one or more input tokens into an
+///   output token.
+///
+/// * `Ok(None)`: End of input.  (Only returned if `input(0)` is `Ok(None)`.)
+///
+/// * `Err(Incomplete)`: More input tokens are needed.  Call again with longer
+///   `input`.  ([Token::End] or [Token::Punct(Punct::EndCmd)] is
+///   always sufficient as extra input.)
+///
+/// This performs two different kinds of token merging:
+///
+/// - String concatenation, where syntax like `"a" + "b"` is converted into a
+///   single string token.  This is definitely needed because the parser relies
+///   on it.
+///
+/// - Negative number merging, where syntax like `-5` is converted from a pair
+///   of tokens (a dash and a positive number) into a single token (a negative
+///   number).  This might not be needed anymore because the segmenter
+///   directly treats a dash followed by a number, with optional intervening
+///   white space, as a negative number.  It's only needed if we want
+///   intervening comments to be allowed or for part of the negative number
+///   token to be produced by macro expansion.
+pub fn merge_tokens<'a, F>(input: F) -> Result<Option<MergeAction>, Incomplete>
+where
+    F: Fn(usize) -> Result<Option<&'a Token>, Incomplete>,
+{
+    let Some(token) = input(0)? else {
+        return Ok(None);
+    };
+    match token {
+        Token::Punct(Punct::Dash) => match input(1)? {
+            Some(Token::Number(number)) if number.is_sign_positive() => {
+                let number = *number;
+                Ok(Some(MergeAction::Expand {
+                    n: 2,
+                    token: Token::Number(-number),
+                }))
+            }
+            _ => Ok(Some(MergeAction::Copy)),
+        },
+        Token::String(_) => {
+            let mut i = 0;
+            while matches!(input(i * 2 + 1)?, Some(Token::Punct(Punct::Plus)))
+                && matches!(input(i * 2 + 2)?, Some(Token::String(_)))
+            {
+                i += 1;
+            }
+            if i == 0 {
+                Ok(Some(MergeAction::Copy))
+            } else {
+                let mut output = String::new();
+                for i in 0..=i {
+                    let Token::String(s) = input(i * 2).unwrap().unwrap() else {
+                        unreachable!()
+                    };
+                    output.push_str(s);
+                }
+                Ok(Some(MergeAction::Expand {
+                    n: i * 2 + 1,
+                    token: Token::String(output),
+                }))
+            }
+        }
+        _ => Ok(Some(MergeAction::Copy)),
+    }
+}
+
+/// Too-simple lexical analyzer for strings.
+///
+/// Given a string, [StringSegmenter] provides iteration over raw tokens.
+/// Unlike [StringScanner], [StringSegmenter] does not merge tokens using
+/// [merge_tokens].  Usually merging is desirable, so [StringScanner] should be
+/// preferred.
+///
+/// This is used as part of macro expansion.
+pub struct StringSegmenter<'a> {
+    input: &'a str,
+    segmenter: Segmenter,
+}
+
+impl<'a> StringSegmenter<'a> {
+    /// Creates a new [StringSegmenter] for `input` using syntax variant `mode`.
+    /// See [Segmenter::new] for an explanation of `is_snippet`.
+    pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self {
+        Self {
+            input,
+            segmenter: Segmenter::new(mode, is_snippet),
+        }
+    }
+}
+
+impl<'a> Iterator for StringSegmenter<'a> {
+    type Item = (&'a str, Result<Token, ScanError>);
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap()?;
+            let (s, rest) = self.input.split_at(seg_len);
+            self.input = rest;
+
+            if let Some(token) = seg_type.to_token(s) {
+                return Some((s, token));
+            }
+        }
+    }
+}
+
+/// Simple lexical analyzer for strings.
+///
+/// Given a string, [StringScanner] provides iteration over tokens.
+pub struct StringScanner<'a> {
+    input: &'a str,
+    eof: bool,
+    segmenter: Segmenter,
+    tokens: VecDeque<Token>,
+}
+
+impl<'a> StringScanner<'a> {
+    /// Creates a new [StringScanner] for `input` using syntax variant `mode`.
+    /// See [Segmenter::new] for an explanation of `is_snippet`.
+    pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self {
+        Self {
+            input,
+            eof: false,
+            segmenter: Segmenter::new(mode, is_snippet),
+            tokens: VecDeque::with_capacity(1),
+        }
+    }
+
+    fn merge(&mut self, eof: bool) -> Result<Option<Result<Token, ScanError>>, Incomplete> {
+        match merge_tokens(|index| {
+            if let Some(token) = self.tokens.get(index) {
+                Ok(Some(token))
+            } else if eof {
+                Ok(None)
+            } else {
+                Err(Incomplete)
+            }
+        })? {
+            Some(MergeAction::Copy) => Ok(Some(Ok(self.tokens.pop_front().unwrap()))),
+            Some(MergeAction::Expand { n, token }) => {
+                self.tokens.drain(..n);
+                Ok(Some(Ok(token)))
+            }
+            None => Ok(None),
+        }
+    }
+
+    /// Transforms this [StringScanner] into an iterator that includes only the
+    /// [Token]s, omitting [ScanError]s.
+    pub fn unwrapped(self) -> impl Iterator<Item = Token> + use<'a> {
+        self.map(|scan_token| scan_token.ok().unwrap())
+    }
+}
+
+impl Iterator for StringScanner<'_> {
+    type Item = Result<Token, ScanError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            if let Ok(Some(token)) = self.merge(self.eof) {
+                return Some(token);
+            }
+
+            let Some((seg_len, seg_type)) = self.segmenter.push(self.input, true).unwrap() else {
+                self.eof = true;
+                return self.merge(true).unwrap();
+            };
+            let (s, rest) = self.input.split_at(seg_len);
+
+            match seg_type.to_token(s) {
+                Some(Err(error)) => {
+                    if let Ok(Some(token)) = self.merge(true) {
+                        return Some(token);
+                    }
+                    self.input = rest;
+                    return Some(Err(error));
+                }
+                Some(Ok(token)) => {
+                    self.tokens.push_back(token);
+                }
+                None => (),
+            }
+            self.input = rest;
+        }
+    }
+}
+
+#[cfg(test)]
+mod test;
diff --git a/rust/pspp/src/lex/scan/mod.rs b/rust/pspp/src/lex/scan/mod.rs

deleted file mode 100644 (file)

index fcb1bc3..0000000
--- a/rust/pspp/src/lex/scan/mod.rs
+++ /dev/null
@@ -1,482 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Mid-level lexical analysis.
-//!
-//! This module implements mid-level lexical analysis using the segments
-//! output by the lower-level [segmentation phase](super::segment).
-//!
-//! Scanning accepts as input a stream of segments, which are UTF-8 strings
-//! labeled with a [segment type](super::segment::Segment).  It outputs a stream
-//! of [Token]s used by the PSPP parser or an error.
-
-use crate::identifier::{Identifier, ReservedWord};
-
-use super::{
-    segment::{Segment, Segmenter, Syntax},
-    token::{Punct, Token},
-};
-use std::collections::VecDeque;
-use thiserror::Error as ThisError;
-
-/// Error returned by [merge_tokens].
-#[derive(ThisError, Clone, Debug, PartialEq, Eq)]
-pub enum ScanError {
-    /// Unterminated string constant.
-    #[error("Unterminated string constant.")]
-    ExpectedQuote,
-
-    /// Missing exponent.
-    #[error("Missing exponent following `{0}`")]
-    ExpectedExponent(String),
-
-    /// Odd length hex string.
-    #[error("String of hex digits has {0} characters, which is not a multiple of 2.")]
-    OddLengthHexString(usize),
-
-    /// Invalid hex digit.
-    #[error("Invalid hex digit {0:?}.")]
-    BadHexDigit(char),
-
-    /// Incomplete UTF-8 sequence.
-    #[error("Incomplete UTF-8 sequence `{substring}` starting {offset} digits into hex string.")]
-    IncompleteUtf8 {
-        /// Incomplete sequence.
-        substring: String,
-        /// Offset of start of sequence.
-        offset: usize,
-    },
-
-    /// Bad UTF-8 sequence.
-    #[error("Invalid UTF-8 sequence `{substring}` starting {offset} digits into hex string.")]
-    BadUtf8 {
-        /// Invalid sequence.
-        substring: String,
-        /// Offset of start of sequence.
-        offset: usize,
-    },
-
-    /// Invalid length Unicode string.
-    #[error("Unicode string contains {0} bytes, which is not in the valid range of 1 to 8 bytes.")]
-    BadLengthUnicodeString(usize),
-
-    /// Invalid code point.
-    #[error("U+{0:04X} is not a valid Unicode code point.")]
-    BadCodePoint(u32),
-
-    /// Expected hexadecimal Unicode code point
-    #[error("Expected hexadecimal Unicode code point.")]
-    ExpectedCodePoint,
-
-    /// `DO REPEAT` nested too deeply.
-    #[error("`DO REPEAT` nested too deeply.")]
-    DoRepeatOverflow,
-
-    /// Unexpected character.
-    #[error("Unexpected character {0:?} in input.")]
-    UnexpectedChar(char),
-}
-
-/// The action returned by [merge_tokens].
-#[derive(Clone, Debug)]
-pub enum MergeAction {
-    /// Copy one token literally from input to output.
-    Copy,
-
-    /// Expand `n` tokens from the input into `token` in the output.
-    Expand {
-        /// Number of tokens to expand.
-        n: usize,
-
-        /// Replacement token.
-        token: Token,
-    },
-}
-
-/// Used by [merge_tokens] to indicate that more input is needed.
-#[derive(Copy, Clone, Debug)]
-pub struct Incomplete;
-
-impl Segment {
-    /// Tries to transform this segment, which was obtained for `s`, into a
-    /// token.  Returns one of:
-    ///
-    /// - `None`: This segment doesn't correspond to any token (because it is a
-    ///   comment, white space, etc.) and can be dropped in tokenization.
-    ///
-    /// - `Some(Ok(token))`: This segment corresponds to the given token.
-    ///
-    /// - `Some(Err(error))`: The segment contains an error, which the caller
-    ///   should report.
-    ///
-    /// The raw token (or error) that this function returns should ordinarily be
-    /// merged with adjacent tokens with [merge_tokens] or some higher-level
-    /// construct.
-    pub fn to_token(self, s: &str) -> Option<Result<Token, ScanError>> {
-        match self {
-            Segment::Number => Some(Ok(Token::Number(s.parse().unwrap()))),
-            Segment::QuotedString => {
-                // Trim quote mark from front and back.
-                let mut chars = s.chars();
-                let quote = chars.next().unwrap();
-                let s = chars.as_str().strip_suffix(quote).unwrap();
-
-                // Replace doubled quotes by single ones.
-                let (single_quote, double_quote) = match quote {
-                    '\'' => ("'", "''"),
-                    '"' => ("\"", "\"\""),
-                    _ => unreachable!(),
-                };
-                Some(Ok(Token::String(s.replace(double_quote, single_quote))))
-            }
-            Segment::HexString => {
-                // Strip `X"` prefix and `"` suffix (or variations).
-                let s = &s[2..s.len() - 1];
-                for c in s.chars() {
-                    if !c.is_ascii_hexdigit() {
-                        return Some(Err(ScanError::BadHexDigit(c)));
-                    }
-                }
-                if s.len() % 2 != 0 {
-                    return Some(Err(ScanError::OddLengthHexString(s.len())));
-                }
-                let bytes = s
-                    .as_bytes()
-                    .chunks_exact(2)
-                    .map(|pair| {
-                        let hi = char::from(pair[0]).to_digit(16).unwrap() as u8;
-                        let lo = char::from(pair[1]).to_digit(16).unwrap() as u8;
-                        hi * 16 + lo
-                    })
-                    .collect::<Vec<_>>();
-                match String::from_utf8(bytes) {
-                    Ok(string) => Some(Ok(Token::String(string))),
-                    Err(error) => {
-                        let details = error.utf8_error();
-                        let offset = details.valid_up_to() * 2;
-                        let end = details
-                            .error_len()
-                            .map(|len| offset + len * 2)
-                            .unwrap_or(s.len());
-                        let substring = String::from(&s[offset..end]);
-                        Some(Err(if details.error_len().is_some() {
-                            ScanError::BadUtf8 { substring, offset }
-                        } else {
-                            ScanError::IncompleteUtf8 { substring, offset }
-                        }))
-                    }
-                }
-            }
-            Segment::UnicodeString => {
-                // Strip `U"` prefix and `"` suffix (or variations).
-                let s = &s[2..s.len() - 1];
-                if !(1..=8).contains(&s.len()) {
-                    return Some(Err(ScanError::BadLengthUnicodeString(s.len())));
-                }
-                let Ok(code_point) = u32::from_str_radix(s, 16) else {
-                    return Some(Err(ScanError::ExpectedCodePoint));
-                };
-                let Some(c) = char::from_u32(code_point) else {
-                    return Some(Err(ScanError::BadCodePoint(code_point)));
-                };
-                Some(Ok(Token::String(String::from(c))))
-            }
-
-            Segment::UnquotedString
-            | Segment::DoRepeatCommand
-            | Segment::InlineData
-            | Segment::Document
-            | Segment::MacroBody
-            | Segment::MacroName => Some(Ok(Token::String(String::from(s)))),
-
-            Segment::Identifier => {
-                if let Ok(reserved_word) = ReservedWord::try_from(s) {
-                    match reserved_word {
-                        ReservedWord::And => Some(Ok(Token::Punct(Punct::And))),
-                        ReservedWord::Or => Some(Ok(Token::Punct(Punct::Or))),
-                        ReservedWord::Not => Some(Ok(Token::Punct(Punct::Not))),
-                        ReservedWord::Eq => Some(Ok(Token::Punct(Punct::Eq))),
-                        ReservedWord::Ge => Some(Ok(Token::Punct(Punct::Ge))),
-                        ReservedWord::Gt => Some(Ok(Token::Punct(Punct::Gt))),
-                        ReservedWord::Le => Some(Ok(Token::Punct(Punct::Le))),
-                        ReservedWord::Lt => Some(Ok(Token::Punct(Punct::Lt))),
-                        ReservedWord::Ne => Some(Ok(Token::Punct(Punct::Ne))),
-                        ReservedWord::All => Some(Ok(Token::Punct(Punct::All))),
-                        ReservedWord::By => Some(Ok(Token::Punct(Punct::By))),
-                        ReservedWord::To => Some(Ok(Token::Punct(Punct::To))),
-                        ReservedWord::With => Some(Ok(Token::Punct(Punct::With))),
-                    }
-                } else {
-                    Some(Ok(Token::Id(Identifier::new(s).unwrap())))
-                }
-            }
-            Segment::Punct => match s {
-                "(" => Some(Ok(Token::Punct(Punct::LParen))),
-                ")" => Some(Ok(Token::Punct(Punct::RParen))),
-                "[" => Some(Ok(Token::Punct(Punct::LSquare))),
-                "]" => Some(Ok(Token::Punct(Punct::RSquare))),
-                "{" => Some(Ok(Token::Punct(Punct::LCurly))),
-                "}" => Some(Ok(Token::Punct(Punct::RCurly))),
-                "," => Some(Ok(Token::Punct(Punct::Comma))),
-                "=" => Some(Ok(Token::Punct(Punct::Equals))),
-                "-" => Some(Ok(Token::Punct(Punct::Dash))),
-                "&" => Some(Ok(Token::Punct(Punct::And))),
-                "|" => Some(Ok(Token::Punct(Punct::Or))),
-                "+" => Some(Ok(Token::Punct(Punct::Plus))),
-                "/" => Some(Ok(Token::Punct(Punct::Slash))),
-                "*" => Some(Ok(Token::Punct(Punct::Asterisk))),
-                "<" => Some(Ok(Token::Punct(Punct::Lt))),
-                ">" => Some(Ok(Token::Punct(Punct::Gt))),
-                "~" => Some(Ok(Token::Punct(Punct::Not))),
-                ":" => Some(Ok(Token::Punct(Punct::Colon))),
-                ";" => Some(Ok(Token::Punct(Punct::Semicolon))),
-                "**" => Some(Ok(Token::Punct(Punct::Exp))),
-                "<=" => Some(Ok(Token::Punct(Punct::Le))),
-                "<>" => Some(Ok(Token::Punct(Punct::Ne))),
-                "~=" => Some(Ok(Token::Punct(Punct::Ne))),
-                ">=" => Some(Ok(Token::Punct(Punct::Ge))),
-                "!" => Some(Ok(Token::Punct(Punct::Bang))),
-                "%" => Some(Ok(Token::Punct(Punct::Percent))),
-                "?" => Some(Ok(Token::Punct(Punct::Question))),
-                "`" => Some(Ok(Token::Punct(Punct::Backtick))),
-                "_" => Some(Ok(Token::Punct(Punct::Underscore))),
-                "." => Some(Ok(Token::Punct(Punct::Dot))),
-                "!*" => Some(Ok(Token::Punct(Punct::BangAsterisk))),
-                _ => unreachable!("bad punctuator {s:?}"),
-            },
-            Segment::Shbang
-            | Segment::Spaces
-            | Segment::Comment
-            | Segment::Newline
-            | Segment::CommentCommand => None,
-            Segment::DoRepeatOverflow => Some(Err(ScanError::DoRepeatOverflow)),
-            Segment::StartDocument => Some(Ok(Token::Id(Identifier::new("DOCUMENT").unwrap()))),
-            Segment::StartCommand | Segment::SeparateCommands | Segment::EndCommand => {
-                Some(Ok(Token::End))
-            }
-            Segment::ExpectedQuote => Some(Err(ScanError::ExpectedQuote)),
-            Segment::ExpectedExponent => Some(Err(ScanError::ExpectedExponent(String::from(s)))),
-            Segment::UnexpectedChar => {
-                Some(Err(ScanError::UnexpectedChar(s.chars().next().unwrap())))
-            }
-        }
-    }
-}
-
-/// Attempts to merge a sequence of tokens together into a single token.
-///
-/// The tokens are taken from the beginning of `input`, which given
-/// 0-based token index returns:
-///
-/// * `Ok(Some(token))`: The token with the given index.
-///
-/// * `Ok(None)`: End of input.
-///
-/// * `Err(Incomplete)`: The given token isn't available yet (it may or may not
-///   exist).
-///
-/// This function returns one of:
-///
-/// * `Ok(Some(MergeAction))`: How to transform one or more input tokens into an
-///   output token.
-///
-/// * `Ok(None)`: End of input.  (Only returned if `input(0)` is `Ok(None)`.)
-///
-/// * `Err(Incomplete)`: More input tokens are needed.  Call again with longer
-///   `input`.  ([Token::End] or [Token::Punct(Punct::EndCmd)] is
-///   always sufficient as extra input.)
-///
-/// This performs two different kinds of token merging:
-///
-/// - String concatenation, where syntax like `"a" + "b"` is converted into a
-///   single string token.  This is definitely needed because the parser relies
-///   on it.
-///
-/// - Negative number merging, where syntax like `-5` is converted from a pair
-///   of tokens (a dash and a positive number) into a single token (a negative
-///   number).  This might not be needed anymore because the segmenter
-///   directly treats a dash followed by a number, with optional intervening
-///   white space, as a negative number.  It's only needed if we want
-///   intervening comments to be allowed or for part of the negative number
-///   token to be produced by macro expansion.
-pub fn merge_tokens<'a, F>(input: F) -> Result<Option<MergeAction>, Incomplete>
-where
-    F: Fn(usize) -> Result<Option<&'a Token>, Incomplete>,
-{
-    let Some(token) = input(0)? else {
-        return Ok(None);
-    };
-    match token {
-        Token::Punct(Punct::Dash) => match input(1)? {
-            Some(Token::Number(number)) if number.is_sign_positive() => {
-                let number = *number;
-                Ok(Some(MergeAction::Expand {
-                    n: 2,
-                    token: Token::Number(-number),
-                }))
-            }
-            _ => Ok(Some(MergeAction::Copy)),
-        },
-        Token::String(_) => {
-            let mut i = 0;
-            while matches!(input(i * 2 + 1)?, Some(Token::Punct(Punct::Plus)))
-                && matches!(input(i * 2 + 2)?, Some(Token::String(_)))
-            {
-                i += 1;
-            }
-            if i == 0 {
-                Ok(Some(MergeAction::Copy))
-            } else {
-                let mut output = String::new();
-                for i in 0..=i {
-                    let Token::String(s) = input(i * 2).unwrap().unwrap() else {
-                        unreachable!()
-                    };
-                    output.push_str(s);
-                }
-                Ok(Some(MergeAction::Expand {
-                    n: i * 2 + 1,
-                    token: Token::String(output),
-                }))
-            }
-        }
-        _ => Ok(Some(MergeAction::Copy)),
-    }
-}
-
-/// Too-simple lexical analyzer for strings.
-///
-/// Given a string, [StringSegmenter] provides iteration over raw tokens.
-/// Unlike [StringScanner], [StringSegmenter] does not merge tokens using
-/// [merge_tokens].  Usually merging is desirable, so [StringScanner] should be
-/// preferred.
-///
-/// This is used as part of macro expansion.
-pub struct StringSegmenter<'a> {
-    input: &'a str,
-    segmenter: Segmenter,
-}
-
-impl<'a> StringSegmenter<'a> {
-    /// Creates a new [StringSegmenter] for `input` using syntax variant `mode`.
-    /// See [Segmenter::new] for an explanation of `is_snippet`.
-    pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self {
-        Self {
-            input,
-            segmenter: Segmenter::new(mode, is_snippet),
-        }
-    }
-}
-
-impl<'a> Iterator for StringSegmenter<'a> {
-    type Item = (&'a str, Result<Token, ScanError>);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            let (seg_len, seg_type) = self.segmenter.push(self.input, true).unwrap()?;
-            let (s, rest) = self.input.split_at(seg_len);
-            self.input = rest;
-
-            if let Some(token) = seg_type.to_token(s) {
-                return Some((s, token));
-            }
-        }
-    }
-}
-
-/// Simple lexical analyzer for strings.
-///
-/// Given a string, [StringScanner] provides iteration over tokens.
-pub struct StringScanner<'a> {
-    input: &'a str,
-    eof: bool,
-    segmenter: Segmenter,
-    tokens: VecDeque<Token>,
-}
-
-impl<'a> StringScanner<'a> {
-    /// Creates a new [StringScanner] for `input` using syntax variant `mode`.
-    /// See [Segmenter::new] for an explanation of `is_snippet`.
-    pub fn new(input: &'a str, mode: Syntax, is_snippet: bool) -> Self {
-        Self {
-            input,
-            eof: false,
-            segmenter: Segmenter::new(mode, is_snippet),
-            tokens: VecDeque::with_capacity(1),
-        }
-    }
-
-    fn merge(&mut self, eof: bool) -> Result<Option<Result<Token, ScanError>>, Incomplete> {
-        match merge_tokens(|index| {
-            if let Some(token) = self.tokens.get(index) {
-                Ok(Some(token))
-            } else if eof {
-                Ok(None)
-            } else {
-                Err(Incomplete)
-            }
-        })? {
-            Some(MergeAction::Copy) => Ok(Some(Ok(self.tokens.pop_front().unwrap()))),
-            Some(MergeAction::Expand { n, token }) => {
-                self.tokens.drain(..n);
-                Ok(Some(Ok(token)))
-            }
-            None => Ok(None),
-        }
-    }
-
-    /// Transforms this [StringScanner] into an iterator that includes only the
-    /// [Token]s, omitting [ScanError]s.
-    pub fn unwrapped(self) -> impl Iterator<Item = Token> + use<'a> {
-        self.map(|scan_token| scan_token.ok().unwrap())
-    }
-}
-
-impl Iterator for StringScanner<'_> {
-    type Item = Result<Token, ScanError>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        loop {
-            if let Ok(Some(token)) = self.merge(self.eof) {
-                return Some(token);
-            }
-
-            let Some((seg_len, seg_type)) = self.segmenter.push(self.input, true).unwrap() else {
-                self.eof = true;
-                return self.merge(true).unwrap();
-            };
-            let (s, rest) = self.input.split_at(seg_len);
-
-            match seg_type.to_token(s) {
-                Some(Err(error)) => {
-                    if let Ok(Some(token)) = self.merge(true) {
-                        return Some(token);
-                    }
-                    self.input = rest;
-                    return Some(Err(error));
-                }
-                Some(Ok(token)) => {
-                    self.tokens.push_back(token);
-                }
-                None => (),
-            }
-            self.input = rest;
-        }
-    }
-}
-
-#[cfg(test)]
-mod test;
diff --git a/rust/pspp/src/lex/segment.rs b/rust/pspp/src/lex/segment.rs

new file mode 100644 (file)

index 0000000..5a56869
--- /dev/null
+++ b/rust/pspp/src/lex/segment.rs
@@ -0,0 +1,1442 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Low-level lexical analysis.
+//!
+//! PSPP divides traditional "lexical analysis" or "tokenization" into [three
+//! phases](super).  This module implements the low-level segmentation phase.
+//!
+//! Segmentation accepts a stream of UTF-8 bytes as input.  It outputs a label
+//! (a segment type) for each byte or contiguous sequence of bytes in the input.
+//! It also, in a few corner cases, outputs zero-width segments that label the
+//! boundary between a pair of bytes in the input.
+//!
+//! Some segment types correspond directly to tokens; for example,
+//! [Segment::Identifier] becomes [Token::Id] later in lexical analysis.  Other
+//! segments contribute to tokens but do not correspond directly; for example,
+//! multiple quoted string [Segment::QuotedString] separated by
+//! [Segment::Spaces] and "+" punctuators [Segment::Punct] may be combined to
+//! form a single string token [Token::String].  Still other segments are
+//! ignored (e.g. [Segment::Spaces]) or trigger special behavior such as error
+//! messages later in tokenization (e.g. [Segment::ExpectedQuote]).
+//!
+//! [Token::Id]: crate::lex::token::Token::Id
+//! [Token::String]: crate::lex::token::Token::String
+
+use std::cmp::Ordering;
+
+use crate::{
+    identifier::{id_match, id_match_n, IdentifierChar},
+    prompt::PromptStyle,
+};
+use bitflags::bitflags;
+
+use super::command_name::{command_match, COMMAND_NAMES};
+
+/// Syntax variant.
+///
+/// PSPP syntax is written in one of two syntax variant which are broadly
+/// defined as follows:
+///
+/// - In interactive syntax, commands end with a period at the end of the line
+///   or with a blank line.
+///
+/// - In batch syntax, the second and subsequent lines of a command are indented
+///   from the left margin.
+///
+/// The segmenter can also try to automatically detect the kind of syntax in
+/// use, using a heuristic that is usually correct.
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
+pub enum Syntax {
+    /// Try to interpret input correctly regardless of whether it is written
+    /// for interactive or batch syntax.
+    ///
+    /// This is `Syntax::default()`.
+    #[default]
+    Auto,
+
+    /// Interactive syntax.
+    Interactive,
+
+    /// Batch syntax.
+    Batch,
+}
+
+/// The type of a segment.
+///
+/// A [Segment] is a label for a string slice and is normally paired with one.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub enum Segment {
+    /// A number.
+    Number,
+
+    /// A quoted string (`'...'` or `"..."`)..
+    QuotedString,
+
+    /// A hexadecimal string (`X'...'` or `X"..."`).
+    HexString,
+
+    /// A Unicode string (`U'...'` or `U"..."`).
+    UnicodeString,
+
+    /// An unquoted string.
+    ///
+    /// Unquoted strings appear only in a few special-case constructs, such as
+    /// the `FILE LABEL` command.
+    UnquotedString,
+
+    /// An identifier.
+    Identifier,
+
+    /// A punctuator or operator.
+    Punct,
+
+    /// `#!` at the beginning of a syntax file only.
+    Shbang,
+
+    /// Spaces.
+    Spaces,
+
+    /// A comment (`/* ... */`).
+    Comment,
+
+    /// New-line.
+    Newline,
+
+    /// A comment command (`* ...` or `COMMENT ...`).
+    CommentCommand,
+
+    /// In a `DO REPEAT` command, one of the lines to be repeated.
+    DoRepeatCommand,
+
+    /// Indicates `DO REPEAT` nested more deeply than supported.
+    DoRepeatOverflow,
+
+    /// A line of inline data inside `BEGIN DATA`...`END DATA`.
+    InlineData,
+
+    /// In `!DEFINE`, an identifier for the macro being defined.
+    ///
+    /// Distinguished from [Identifier](Self::Identifier) because a `MacroName`
+    /// must never be macro-expanded.
+    MacroName,
+
+    /// Contents of `!DEFINE`...`!ENDDEFINE`.
+    MacroBody,
+
+    /// Represents the `DOCUMENT` beginning a `DOCUMENT` command.
+    ///
+    /// This token is not associated with any text: the actual `DOCUMENT`
+    /// keyword is part of the following [Document](Self::Document) segment.
+    /// This is because documents include the `DOCUMENT` keyword.
+    StartDocument,
+
+    /// One of the lines of documents in a `DOCUMENT` command.
+    ///
+    /// The first line of a document includes the `DOCUMENT` keyword itself.
+    Document,
+
+    /// A command separator.
+    ///
+    /// This segment is usually for `+`, `-`, or `.` at the beginning of a line.
+    StartCommand,
+
+    /// A command separator.
+    ///
+    /// This segment is usually for a blank line.  It also appears at the end of
+    /// a file.
+    SeparateCommands,
+
+    /// A command separator.
+    ///
+    /// This segment is for `.` at the end of a line.
+    EndCommand,
+
+    /// Missing quote at the end of a line.
+    ///
+    /// This segment contains a partial quoted string.  It starts with a quote
+    /// mark (`"` or `'`, possibly preceded by `X` or `U`) but goes to the end
+    /// of the line without the matching end quote mark.
+    ExpectedQuote,
+
+    /// Missing exponent in number.
+    ///
+    /// This segment contains a number that ends with `E` or `E+` or `E-`
+    /// without a following exponent.
+    ExpectedExponent,
+
+    /// Unexpected character.
+    ///
+    /// The segment is a single character that isn't valid in syntax.
+    UnexpectedChar,
+}
+
+bitflags! {
+    #[derive(Copy, Clone, Debug)]
+    struct Substate: u8 {
+        const START_OF_LINE = 1;
+        const START_OF_COMMAND = 2;
+    }
+}
+
+/// Used by [Segmenter] to indicate that more input is needed.
+#[derive(Copy, Clone, Debug)]
+pub struct Incomplete;
+
+/// Labels syntax input with [Segment]s.
+#[derive(Copy, Clone)]
+pub struct Segmenter {
+    state: (State, Substate),
+    nest: u8,
+    syntax: Syntax,
+}
+
+impl Segmenter {
+    /// Returns a segmenter with the given `syntax`.
+    ///
+    /// If `is_snippet` is false, then the segmenter will parse as if it's being
+    /// given a whole file.  This means, for example, that it will interpret `-`
+    /// or `+` at the beginning of the syntax as a separator between commands
+    /// (since `-` or `+` at the beginning of a line has this meaning).
+    ///
+    /// If `is_snippet` is true, then the segmenter will parse as if it's being
+    /// given an isolated piece of syntax.  This means that, for example, that
+    /// it will interpret `-` or `+` at the beginning of the syntax as an
+    /// operator token or (if followed by a digit) as part of a number.
+    pub fn new(syntax: Syntax, is_snippet: bool) -> Self {
+        Self {
+            state: if is_snippet {
+                (State::General, Substate::empty())
+            } else {
+                (State::Shbang, Substate::empty())
+            },
+            syntax,
+            nest: 0,
+        }
+    }
+
+    /// Returns the [Syntax] variant passed in to [new](Self::new).
+    pub fn syntax(&self) -> Syntax {
+        self.syntax
+    }
+
+    fn start_of_line(&self) -> bool {
+        self.state.1.contains(Substate::START_OF_LINE)
+    }
+
+    fn start_of_command(&self) -> bool {
+        self.state.1.contains(Substate::START_OF_COMMAND)
+    }
+
+    /// Returns the style of command prompt to display to an interactive user
+    /// for input in the current state..  The return value is most accurate in
+    /// with [Syntax::Interactive] syntax and at the beginning of a line (that
+    /// is, if [Segmenter::push] consumed as much as possible of the input up to
+    /// a new-line).
+    pub fn prompt(&self) -> PromptStyle {
+        match self.state.0 {
+            State::Shbang => PromptStyle::First,
+            State::General => {
+                if self.start_of_command() {
+                    PromptStyle::First
+                } else {
+                    PromptStyle::Later
+                }
+            }
+            State::Comment1 | State::Comment2 => PromptStyle::Comment,
+            State::Document1 | State::Document2 => PromptStyle::Document,
+            State::Document3 => PromptStyle::First,
+            State::FileLabel1 => PromptStyle::Later,
+            State::FileLabel2 | State::FileLabel3 => PromptStyle::First,
+            State::DoRepeat1 | State::DoRepeat2 => {
+                if self.start_of_command() {
+                    PromptStyle::First
+                } else {
+                    PromptStyle::Later
+                }
+            }
+            State::DoRepeat3 => PromptStyle::DoRepeat,
+            State::DoRepeat4 => PromptStyle::DoRepeat,
+            State::Define1 | State::Define2 | State::Define3 => {
+                if self.start_of_command() {
+                    PromptStyle::First
+                } else {
+                    PromptStyle::Later
+                }
+            }
+            State::Define4 | State::Define5 | State::Define6 => PromptStyle::Define,
+            State::BeginData1 => PromptStyle::First,
+            State::BeginData2 => PromptStyle::Later,
+            State::BeginData3 | State::BeginData4 => PromptStyle::Data,
+        }
+    }
+
+    fn push_rest<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        if input.is_empty() {
+            if eof {
+                return Ok(None);
+            } else {
+                return Err(Incomplete);
+            };
+        }
+
+        match self.state.0 {
+            State::Shbang => self.parse_shbang(input, eof),
+            State::General => {
+                if self.start_of_line() {
+                    self.parse_start_of_line(input, eof)
+                } else {
+                    self.parse_mid_line(input, eof)
+                }
+            }
+            State::Comment1 => self.parse_comment_1(input, eof),
+            State::Comment2 => self.parse_comment_2(input, eof),
+            State::Document1 => self.parse_document_1(input, eof),
+            State::Document2 => self.parse_document_2(input, eof),
+            State::Document3 => self.parse_document_3(input, eof),
+            State::FileLabel1 => self.parse_file_label_1(input, eof),
+            State::FileLabel2 => self.parse_file_label_2(input, eof),
+            State::FileLabel3 => self.parse_file_label_3(input, eof),
+            State::DoRepeat1 => self.parse_do_repeat_1(input, eof),
+            State::DoRepeat2 => self.parse_do_repeat_2(input, eof),
+            State::DoRepeat3 => self.parse_do_repeat_3(input, eof),
+            State::DoRepeat4 => self.parse_do_repeat_4(input),
+            State::Define1 => self.parse_define_1_2(input, eof),
+            State::Define2 => self.parse_define_1_2(input, eof),
+            State::Define3 => self.parse_define_3(input, eof),
+            State::Define4 => self.parse_define_4_5(input, eof),
+            State::Define5 => self.parse_define_4_5(input, eof),
+            State::Define6 => self.parse_define_6(input, eof),
+            State::BeginData1 => self.parse_begin_data_1(input, eof),
+            State::BeginData2 => self.parse_begin_data_2(input, eof),
+            State::BeginData3 => self.parse_begin_data_3(input, eof),
+            State::BeginData4 => self.parse_begin_data_4(input, eof),
+        }
+    }
+
+    /// Attempts to label a prefix of the remaining input with a segment type.
+    /// The caller supplies a prefix of the remaining input as `input`.  If
+    /// `eof` is true, then `input` is the entire (remainder) of the input; if
+    /// `eof` is false, then further input is potentially available.
+    ///
+    /// The input may contain `\n` or `\r\n` line ends in any combination.
+    ///
+    /// If successful, returns `Ok((n, type))`, where `n` is the number of bytes
+    /// in the segment at the beginning of `input` (a number in
+    /// `0..=input.len()`) and the type of that segment.  The next call should
+    /// not include those bytes in `input`, because the segmenter has
+    /// (figuratively) consumed them.
+    ///
+    /// Segments can have zero length, including segment types
+    /// [Segment::SeparateCommands], [Segment::StartDocument],
+    /// [Segment::InlineData], and [Segment::Spaces].
+    ///
+    /// Failure occurs only if the segment type of the bytes in `input` cannot
+    /// yet be determined.  In this case, this function returns
+    /// `Err(Incomplete)`.  If more input is available, the caller should obtain
+    /// some more, then call again with a longer `input`.  If this is still not
+    /// enough, the process might need to repeat again and again.  If input is
+    /// exhausted, then the caller may call again setting `eof` to true.  This
+    /// function will never return `Err(Incomplete)` when `eof` is true.
+    ///
+    /// The caller must not, in a sequence of calls, supply contradictory input.
+    /// That is, bytes provided as part of `input` in one call, but not
+    /// consumed, must not be provided with *different* values on subsequent
+    /// calls.  This is because the function must often make decisions based on
+    /// looking ahead beyond the bytes that it consumes.
+    pub fn push(&mut self, input: &str, eof: bool) -> Result<Option<(usize, Segment)>, Incomplete> {
+        Ok(self
+            .push_rest(input, eof)?
+            .map(|(rest, seg_type)| (input.len() - rest.len(), seg_type)))
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum State {
+    Shbang,
+    General,
+    Comment1,
+    Comment2,
+    Document1,
+    Document2,
+    Document3,
+    FileLabel1,
+    FileLabel2,
+    FileLabel3,
+    DoRepeat1,
+    DoRepeat2,
+    DoRepeat3,
+    DoRepeat4,
+    Define1,
+    Define2,
+    Define3,
+    Define4,
+    Define5,
+    Define6,
+    BeginData1,
+    BeginData2,
+    BeginData3,
+    BeginData4,
+}
+
+fn take(input: &str, eof: bool) -> Result<(Option<char>, &str), Incomplete> {
+    let mut iter = input.chars();
+    match iter.next() {
+        None if !eof => Err(Incomplete),
+        c => Ok((c, iter.as_str())),
+    }
+}
+
+fn skip_comment(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
+    loop {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(input);
+        };
+        match c {
+            '\n' | '\r' if is_end_of_line(input, eof)? => return Ok(input),
+            '*' => {
+                if let (Some('/'), rest) = take(rest, eof)? {
+                    return Ok(rest);
+                }
+            }
+            _ => (),
+        };
+        input = rest;
+    }
+}
+
+fn skip_matching<F>(f: F, input: &str, eof: bool) -> Result<&str, Incomplete>
+where
+    F: Fn(char) -> bool,
+{
+    let input = input.trim_start_matches(f);
+    if input.is_empty() && !eof {
+        Err(Incomplete)
+    } else {
+        Ok(input)
+    }
+}
+
+fn match_char<F>(f: F, input: &str, eof: bool) -> Result<Option<&str>, Incomplete>
+where
+    F: Fn(char) -> bool,
+{
+    if let (Some(c), rest) = take(input, eof)? {
+        if f(c) {
+            return Ok(Some(rest));
+        }
+    }
+    Ok(None)
+}
+
+fn skip_spaces(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
+    loop {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(input);
+        };
+        match c {
+            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
+            c if c.is_whitespace() => (),
+            _ => return Ok(input),
+        }
+        input = rest;
+    }
+}
+
+fn skip_digits(input: &str, eof: bool) -> Result<&str, Incomplete> {
+    skip_matching(|c| c.is_ascii_digit(), input, eof)
+}
+
+fn skip_spaces_and_comments(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
+    loop {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(input);
+        };
+        match c {
+            '/' => {
+                let (c, rest2) = take(rest, eof)?;
+                match c {
+                    Some('*') => input = skip_comment(rest2, eof)?,
+                    Some(_) | None => return Ok(rest),
+                }
+            }
+            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
+            c if c.is_whitespace() => input = rest,
+            _ => return Ok(input),
+        };
+    }
+}
+
+fn is_start_of_string(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    let (Some(c), rest) = take(input, eof)? else {
+        return Ok(false);
+    };
+    match c {
+        'x' | 'X' | 'u' | 'U' => {
+            let (c, _rest) = take(rest, eof)?;
+            Ok(c == Some('\'') || c == Some('"'))
+        }
+        '\'' | '"' => Ok(true),
+        '\n' | '\r' if is_end_of_line(input, eof)? => Ok(true),
+        _ => Ok(false),
+    }
+}
+
+fn is_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    let (Some(c), rest) = take(input, eof)? else {
+        return Ok(true);
+    };
+    Ok(match c {
+        '\n' => true,
+        '\r' => take(rest, eof)?.0 == Some('\n'),
+        _ => false,
+    })
+}
+
+fn at_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    is_end_of_line(skip_spaces_and_comments(input, eof)?, eof)
+}
+
+fn first(s: &str) -> char {
+    s.chars().next().unwrap()
+}
+fn get_command_name_candidates(target: &str) -> &[&'static str] {
+    if target.is_empty() {
+        return &[];
+    }
+    let target_first = first(target).to_ascii_uppercase();
+    let low = COMMAND_NAMES.partition_point(|s| first(s) < target_first);
+    let high = COMMAND_NAMES.partition_point(|s| first(s) <= target_first);
+    &COMMAND_NAMES[low..high]
+}
+
+fn detect_command_name(input: &str, eof: bool) -> Result<bool, Incomplete> {
+    let command_name = input
+        .split(|c: char| {
+            !((c.is_whitespace() && c != '\n') || (c.may_continue_id() && c != '.') || c == '-')
+        })
+        .next()
+        .unwrap();
+    if !eof && command_name.len() == input.len() {
+        return Err(Incomplete);
+    }
+    let command_name = command_name.trim_end_matches(|c: char| c.is_whitespace() || c == '.');
+    for command in get_command_name_candidates(command_name) {
+        if let Some(m) = command_match(command, command_name) {
+            if m.missing_words <= 0 {
+                return Ok(true);
+            }
+        }
+    }
+    Ok(false)
+}
+
+impl Segmenter {
+    fn parse_shbang<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        if let (Some('#'), rest) = take(input, eof)? {
+            if let (Some('!'), rest) = take(rest, eof)? {
+                let rest = self.parse_full_line(rest, eof)?;
+                self.state = (State::General, Substate::START_OF_COMMAND);
+                return Ok(Some((rest, Segment::Shbang)));
+            }
+        }
+
+        self.state = (
+            State::General,
+            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+        );
+        self.push_rest(input, eof)
+    }
+    fn at_command_start(&self, input: &str, eof: bool) -> Result<bool, Incomplete> {
+        match self.syntax {
+            Syntax::Auto => detect_command_name(input, eof),
+            Syntax::Interactive => Ok(false),
+            Syntax::Batch => Ok(true),
+        }
+    }
+    fn parse_start_of_line<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        debug_assert_eq!(self.state.0, State::General);
+        debug_assert!(self.start_of_line());
+        debug_assert!(!input.is_empty());
+
+        let (Some(c), rest) = take(input, eof).unwrap() else {
+            unreachable!()
+        };
+        match c {
+            '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
+                // This  `+` is punctuation that may separate pieces of a string.
+                self.state = (State::General, Substate::empty());
+                return Ok(Some((rest, Segment::Punct)));
+            }
+            '+' | '-' | '.' => {
+                self.state = (State::General, Substate::START_OF_COMMAND);
+                return Ok(Some((rest, Segment::StartCommand)));
+            }
+            _ if c.is_whitespace() => {
+                if at_end_of_line(input, eof)? {
+                    self.state = (State::General, Substate::START_OF_COMMAND);
+                    return Ok(Some((input, Segment::SeparateCommands)));
+                }
+            }
+            _ => {
+                if self.at_command_start(input, eof)?
+                    && !self.state.1.contains(Substate::START_OF_COMMAND)
+                {
+                    self.state = (State::General, Substate::START_OF_COMMAND);
+                    return Ok(Some((input, Segment::StartCommand)));
+                }
+            }
+        }
+        self.state.1 = Substate::START_OF_COMMAND;
+        self.parse_mid_line(input, eof)
+    }
+    fn parse_mid_line<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        debug_assert!(self.state.0 == State::General);
+        debug_assert!(!self.state.1.contains(Substate::START_OF_LINE));
+        let (Some(c), rest) = take(input, eof)? else {
+            unreachable!()
+        };
+        match c {
+            '\r' | '\n' if is_end_of_line(input, eof)? => {
+                self.state.1 |= Substate::START_OF_LINE;
+                Ok(Some((
+                    self.parse_newline(input, eof).unwrap().unwrap(),
+                    Segment::Newline,
+                )))
+            }
+            '/' => {
+                if let (Some('*'), rest) = take(rest, eof)? {
+                    let rest = skip_comment(rest, eof)?;
+                    Ok(Some((rest, Segment::Comment)))
+                } else {
+                    self.state.1 = Substate::empty();
+                    Ok(Some((rest, Segment::Punct)))
+                }
+            }
+            '-' => {
+                let (c, rest2) = take(skip_spaces(rest, eof)?, eof)?;
+                match c {
+                    Some(c) if c.is_ascii_digit() => {
+                        return self.parse_number(rest, eof);
+                    }
+                    Some('.') => {
+                        if let (Some(c), _rest) = take(rest2, eof)? {
+                            if c.is_ascii_digit() {
+                                return self.parse_number(rest, eof);
+                            }
+                        }
+                    }
+                    None | Some(_) => (),
+                }
+                self.state.1 = Substate::empty();
+                Ok(Some((rest, Segment::Punct)))
+            }
+            '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => {
+                self.state.1 = Substate::empty();
+                Ok(Some((rest, Segment::Punct)))
+            }
+            '*' => {
+                if self.state.1.contains(Substate::START_OF_COMMAND) {
+                    self.state = (State::Comment1, Substate::empty());
+                    self.parse_comment_1(input, eof)
+                } else {
+                    self.parse_digraph(&['*'], rest, eof)
+                }
+            }
+            '<' => self.parse_digraph(&['=', '>'], rest, eof),
+            '>' => self.parse_digraph(&['='], rest, eof),
+            '~' => self.parse_digraph(&['='], rest, eof),
+            '.' if at_end_of_line(rest, eof)? => {
+                self.state.1 = Substate::START_OF_COMMAND;
+                Ok(Some((rest, Segment::EndCommand)))
+            }
+            '.' => match take(rest, eof)? {
+                (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
+                _ => Ok(Some((rest, Segment::Punct))),
+            },
+            '0'..='9' => self.parse_number(input, eof),
+            'u' | 'U' => self.maybe_parse_string(Segment::UnicodeString, (input, rest), eof),
+            'x' | 'X' => self.maybe_parse_string(Segment::HexString, (input, rest), eof),
+            '\'' | '"' => self.parse_string(Segment::QuotedString, c, rest, eof),
+            '!' => {
+                let (c, rest2) = take(rest, eof)?;
+                match c {
+                    Some('*') => Ok(Some((rest2, Segment::Punct))),
+                    Some(_) => self.parse_id(input, eof),
+                    None => Ok(Some((rest, Segment::Punct))),
+                }
+            }
+            c if c.is_whitespace() => Ok(Some((skip_spaces(rest, eof)?, Segment::Spaces))),
+            c if c.may_start_id() => self.parse_id(input, eof),
+            '#'..='~' if c != '\\' && c != '^' => {
+                self.state.1 = Substate::empty();
+                Ok(Some((rest, Segment::Punct)))
+            }
+            _ => {
+                self.state.1 = Substate::empty();
+                Ok(Some((rest, Segment::UnexpectedChar)))
+            }
+        }
+    }
+    fn parse_string<'a>(
+        &mut self,
+        segment: Segment,
+        quote: char,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        while let (Some(c), rest) = take(input, eof)? {
+            match c {
+                _ if c == quote => {
+                    let (c, rest2) = take(rest, eof)?;
+                    if c != Some(quote) {
+                        self.state.1 = Substate::empty();
+                        return Ok(Some((rest, segment)));
+                    }
+                    input = rest2;
+                }
+                '\r' | '\n' if is_end_of_line(input, eof)? => break,
+                _ => input = rest,
+            }
+        }
+        self.state.1 = Substate::empty();
+        Ok(Some((input, Segment::ExpectedQuote)))
+    }
+    fn maybe_parse_string<'a>(
+        &mut self,
+        segment: Segment,
+        input: (&'a str, &'a str),
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        match take(input.1, eof)? {
+            (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(segment, c, rest, eof),
+            _ => self.parse_id(input.0, eof),
+        }
+    }
+    fn next_id_in_command<'a>(
+        &self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<(&'a str, &'a str), Incomplete> {
+        let mut sub = Segmenter::new(self.syntax, true);
+        loop {
+            let Some((seg_len, seg_type)) = sub.push(input, eof)? else {
+                return Ok((input, input));
+            };
+            let (segment, rest) = input.split_at(seg_len);
+            match seg_type {
+                Segment::Shbang | Segment::Spaces | Segment::Comment | Segment::Newline => (),
+
+                Segment::Identifier => return Ok((segment, rest)),
+
+                Segment::Number
+                | Segment::QuotedString
+                | Segment::HexString
+                | Segment::UnicodeString
+                | Segment::UnquotedString
+                | Segment::Punct
+                | Segment::CommentCommand
+                | Segment::DoRepeatCommand
+                | Segment::DoRepeatOverflow
+                | Segment::InlineData
+                | Segment::MacroName
+                | Segment::MacroBody
+                | Segment::StartDocument
+                | Segment::Document
+                | Segment::StartCommand
+                | Segment::SeparateCommands
+                | Segment::EndCommand
+                | Segment::ExpectedQuote
+                | Segment::ExpectedExponent
+                | Segment::UnexpectedChar => return Ok(("", rest)),
+            }
+            input = rest;
+        }
+    }
+    fn parse_id<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (Some(_), mut end) = take(input, eof).unwrap() else {
+            unreachable!()
+        };
+        while let (Some(c), rest) = take(end, eof)? {
+            if !c.may_continue_id() {
+                break;
+            };
+            end = rest;
+        }
+        let identifier = &input[..input.len() - end.len()];
+        let identifier = match identifier.strip_suffix('.') {
+            Some(without_dot) if at_end_of_line(end, eof)? => without_dot,
+            _ => identifier,
+        };
+        let rest = &input[identifier.len()..];
+
+        if self.state.1.contains(Substate::START_OF_COMMAND) {
+            if id_match_n("COMMENT", identifier, 4) {
+                self.state = (State::Comment1, Substate::empty());
+                return self.parse_comment_1(input, eof);
+            } else if id_match("DOCUMENT", identifier) {
+                self.state = (State::Document1, Substate::empty());
+                return Ok(Some((input, Segment::StartDocument)));
+            } else if id_match_n("DEFINE", identifier, 6) {
+                self.state = (State::Define1, Substate::empty());
+            } else if id_match("FILE", identifier) {
+                if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
+                    self.state = (State::FileLabel1, Substate::empty());
+                    return Ok(Some((rest, Segment::Identifier)));
+                }
+            } else if id_match("DO", identifier) {
+                if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) {
+                    self.state = (State::DoRepeat1, Substate::empty());
+                    return Ok(Some((rest, Segment::Identifier)));
+                }
+            } else if id_match("BEGIN", identifier) {
+                let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
+                if id_match("DATA", next_id) {
+                    let rest2 = skip_spaces_and_comments(rest2, eof)?;
+                    let rest2 = if let Some(s) = rest2.strip_prefix('.') {
+                        skip_spaces_and_comments(s, eof)?
+                    } else {
+                        rest2
+                    };
+                    if is_end_of_line(rest2, eof)? {
+                        let s = &input[..input.len() - rest2.len()];
+                        self.state = (
+                            if s.contains('\n') {
+                                State::BeginData1
+                            } else {
+                                State::BeginData2
+                            },
+                            Substate::empty(),
+                        );
+                        return Ok(Some((rest, Segment::Identifier)));
+                    }
+                }
+            }
+        }
+
+        self.state.1 = Substate::empty();
+        Ok(Some((
+            rest,
+            if identifier != "!" {
+                Segment::Identifier
+            } else {
+                Segment::Punct
+            },
+        )))
+    }
+    fn parse_digraph<'a>(
+        &mut self,
+        seconds: &[char],
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (c, rest) = take(input, eof)?;
+        self.state.1 = Substate::empty();
+        Ok(Some((
+            match c {
+                Some(c) if seconds.contains(&c) => rest,
+                _ => input,
+            },
+            Segment::Punct,
+        )))
+    }
+    fn parse_number<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let mut input = skip_digits(input, eof)?;
+        if let Some(rest) = match_char(|c| c == '.', input, eof)? {
+            let rest2 = skip_digits(rest, eof)?;
+            if rest2.len() < rest.len() || !at_end_of_line(rest2, eof)? {
+                input = rest2;
+            }
+        };
+        if let Some(rest) = match_char(|c| c == 'e' || c == 'E', input, eof)? {
+            let rest = match_char(|c| c == '+' || c == '-', rest, eof)?.unwrap_or(rest);
+            let rest2 = skip_digits(rest, eof)?;
+            if rest2.len() == rest.len() {
+                self.state.1 = Substate::empty();
+                return Ok(Some((rest, Segment::ExpectedExponent)));
+            }
+            input = rest2;
+        }
+        self.state.1 = Substate::empty();
+        Ok(Some((input, Segment::Number)))
+    }
+    fn parse_comment_1<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        enum CommentState<'a> {
+            Blank,
+            NotBlank,
+            Period(&'a str),
+        }
+        let mut state = CommentState::Blank;
+        loop {
+            let (Some(c), rest) = take(input, eof)? else {
+                // End of file.
+                self.state = (State::General, Substate::START_OF_COMMAND);
+                return Ok(Some((input, Segment::SeparateCommands)));
+            };
+            match c {
+                '.' => state = CommentState::Period(input),
+                '\n' | '\r' if is_end_of_line(input, eof)? => {
+                    match state {
+                        CommentState::Blank => {
+                            // Blank line ends comment command.
+                            self.state = (State::General, Substate::START_OF_COMMAND);
+                            return Ok(Some((input, Segment::SeparateCommands)));
+                        }
+                        CommentState::Period(period) => {
+                            // '.' at end of line ends comment command.
+                            self.state = (State::General, Substate::empty());
+                            return Ok(Some((period, Segment::CommentCommand)));
+                        }
+                        CommentState::NotBlank => {
+                            // Comment continues onto next line.
+                            self.state = (State::Comment2, Substate::empty());
+                            return Ok(Some((input, Segment::CommentCommand)));
+                        }
+                    }
+                }
+                c if c.is_whitespace() => (),
+                _ => state = CommentState::NotBlank,
+            }
+            input = rest;
+        }
+    }
+    fn parse_comment_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+
+        let new_command = match take(rest, eof)?.0 {
+            Some('+') | Some('-') | Some('.') => true,
+            Some(c) if !c.is_whitespace() => self.at_command_start(rest, eof)?,
+            None | Some(_) => false,
+        };
+        if new_command {
+            self.state = (
+                State::General,
+                Substate::START_OF_LINE | Substate::START_OF_COMMAND,
+            );
+        } else {
+            self.state = (State::Comment1, Substate::empty());
+        }
+        Ok(Some((rest, Segment::Newline)))
+    }
+    fn parse_document_1<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let mut end_cmd = false;
+        loop {
+            let (Some(c), rest) = take(input, eof)? else {
+                self.state = (State::Document3, Substate::empty());
+                return Ok(Some((input, Segment::Document)));
+            };
+            match c {
+                '.' => end_cmd = true,
+                '\n' | '\r' if is_end_of_line(input, eof)? => {
+                    self.state.0 = if end_cmd {
+                        State::Document3
+                    } else {
+                        State::Document2
+                    };
+                    return Ok(Some((input, Segment::Document)));
+                }
+                c if !c.is_whitespace() => end_cmd = false,
+                _ => (),
+            }
+            input = rest;
+        }
+    }
+    fn parse_document_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+        self.state = (State::Document1, Substate::empty());
+        Ok(Some((rest, Segment::Newline)))
+    }
+    fn parse_document_3<'a>(
+        &mut self,
+        input: &'a str,
+        _eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        self.state = (
+            State::General,
+            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+        );
+        Ok(Some((input, Segment::EndCommand)))
+    }
+    fn quoted_file_label(input: &str, eof: bool) -> Result<bool, Incomplete> {
+        let input = skip_spaces_and_comments(input, eof)?;
+        match take(input, eof)?.0 {
+            Some('\'') | Some('"') | Some('\n') => Ok(true),
+            _ => Ok(false),
+        }
+    }
+    fn parse_file_label_1<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let mut sub = Segmenter {
+            state: (State::General, self.state.1),
+            ..*self
+        };
+        let (rest, segment) = sub.push_rest(input, eof)?.unwrap();
+        if segment == Segment::Identifier {
+            let id = &input[..input.len() - rest.len()];
+            debug_assert!(id_match("LABEL", id), "{id} should be LABEL");
+            if Self::quoted_file_label(rest, eof)? {
+                *self = sub;
+            } else {
+                self.state.0 = State::FileLabel2;
+            }
+        } else {
+            self.state.1 = sub.state.1;
+        }
+        Ok(Some((rest, segment)))
+    }
+    fn parse_file_label_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let input = skip_spaces(input, eof)?;
+        self.state = (State::FileLabel3, Substate::empty());
+        Ok(Some((input, Segment::Spaces)))
+    }
+    fn parse_file_label_3<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let mut end_cmd = None;
+        loop {
+            let (c, rest) = take(input, eof)?;
+            match c {
+                None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => {
+                    self.state = (State::General, Substate::empty());
+                    return Ok(Some((end_cmd.unwrap_or(input), Segment::UnquotedString)));
+                }
+                None => unreachable!(),
+                Some('.') => end_cmd = Some(input),
+                Some(c) if !c.is_whitespace() => end_cmd = None,
+                Some(_) => (),
+            }
+            input = rest;
+        }
+    }
+    fn subparse<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let mut sub = Segmenter {
+            syntax: self.syntax,
+            state: (State::General, self.state.1),
+            nest: 0,
+        };
+        let result = sub.push_rest(input, eof)?;
+        self.state.1 = sub.state.1;
+        Ok(result)
+    }
+    /// We are segmenting a `DO REPEAT` command, currently reading the syntax
+    /// that defines the stand-in variables (the head) before the lines of
+    /// syntax to be repeated (the body).
+    fn parse_do_repeat_1<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
+        if segment == Segment::SeparateCommands {
+            // We reached a blank line that separates the head from the body.
+            self.state.0 = State::DoRepeat2;
+        } else if segment == Segment::EndCommand || segment == Segment::StartCommand {
+            // We reached the body.
+            self.state.0 = State::DoRepeat3;
+            self.nest = 1;
+        }
+        Ok(Some((rest, segment)))
+    }
+    /// We are segmenting a `DO REPEAT` command, currently reading a blank line
+    /// that separates the head from the body.
+    fn parse_do_repeat_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
+        if segment == Segment::Newline {
+            // We reached the body.
+            self.state.0 = State::DoRepeat3;
+            self.nest = 1;
+        }
+        Ok(Some((rest, segment)))
+    }
+    fn parse_newline<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<&'a str>, Incomplete> {
+        let (Some(c), rest) = take(input, eof)? else {
+            return Ok(None);
+        };
+        match c {
+            '\n' => Ok(Some(rest)),
+            '\r' => {
+                if let (Some('\n'), rest) = take(rest, eof)? {
+                    Ok(Some(rest))
+                } else {
+                    Ok(None)
+                }
+            }
+            _ => Ok(None),
+        }
+    }
+
+    fn parse_full_line<'a>(
+        &mut self,
+        mut input: &'a str,
+        eof: bool,
+    ) -> Result<&'a str, Incomplete> {
+        loop {
+            if is_end_of_line(input, eof)? {
+                return Ok(input);
+            }
+            input = take(input, eof).unwrap().1;
+        }
+    }
+    fn check_repeat_command(&mut self, input: &str, eof: bool) -> Result<isize, Incomplete> {
+        let input = input.strip_prefix(['-', '+']).unwrap_or(input);
+        let (id1, input) = self.next_id_in_command(input, eof)?;
+        if id_match("DO", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0) {
+            Ok(1)
+        } else if id_match("END", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0)
+        {
+            Ok(-1)
+        } else {
+            Ok(0)
+        }
+    }
+    /// We are in the body of `DO REPEAT`, segmenting the lines of syntax that
+    /// are to be repeated.  Report each line of syntax as a single
+    /// [`Type::DoRepeatCommand`].
+    ///
+    /// `DO REPEAT` can be nested, so we look for `DO REPEAT...END REPEAT`
+    /// blocks inside the lines we're segmenting.  `self.nest` counts the
+    /// nesting level, starting at 1.
+    fn parse_do_repeat_3<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        if let Some(rest) = self.parse_newline(input, eof)? {
+            return Ok(Some((rest, Segment::Newline)));
+        }
+        let rest = self.parse_full_line(input, eof)?;
+        match self.check_repeat_command(input, eof)?.cmp(&0) {
+            Ordering::Greater => {
+                if let Some(nest) = self.nest.checked_add(1) {
+                    self.nest = nest;
+                } else {
+                    self.state.0 = State::DoRepeat4;
+                }
+            }
+            Ordering::Less => {
+                self.nest -= 1;
+                if self.nest == 0 {
+                    // Nesting level dropped to 0, so we've finished reading the `DO
+                    // REPEAT` body.
+                    self.state = (
+                        State::General,
+                        Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+                    );
+                    return self.push_rest(input, eof);
+                }
+            }
+            Ordering::Equal => (),
+        }
+        Ok(Some((rest, Segment::DoRepeatCommand)))
+    }
+    fn parse_do_repeat_4<'a>(
+        &mut self,
+        input: &'a str,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        self.state.0 = State::DoRepeat3;
+        Ok(Some((input, Segment::DoRepeatOverflow)))
+    }
+    /// We are segmenting a `DEFINE` command, which consists of:
+    ///
+    ///   - The `DEFINE` keyword.
+    ///
+    ///   - An identifier.  We transform this into `Type::MacroName` instead of
+    ///     `Type::Identifier` because this identifier must never  be macro-expanded.
+    ///
+    ///   - Anything but `(`.
+    ///
+    ///   - `(` followed by a sequence of tokens possibly including balanced
+    ///     parentheses up to a final `)`.
+    ///
+    ///   - A sequence of any number of lines, one string per line, ending with
+    ///     `!ENDDEFINE`.  The first line is usually blank (that is, a newline
+    ///     follows the `(`).  The last line usually just has `!ENDDEFINE.` on
+    ///     it, but it can start with other tokens.  The whole
+    ///     DEFINE...!ENDDEFINE can be on a single line, even.
+    fn parse_define_1_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
+        match segment {
+            Segment::Identifier if self.state.0 == State::Define1 => {
+                self.state.0 = State::Define2;
+                return Ok(Some((rest, Segment::MacroName)));
+            }
+            Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
+                // The DEFINE command is malformed because we reached its end
+                // without ever hitting a `(` token.  Transition back to general
+                // parsing.
+                self.state.0 = State::General;
+            }
+            Segment::Punct if input.starts_with('(') => {
+                self.state.0 = State::Define3;
+                self.nest = 1;
+            }
+            _ => (),
+        }
+        Ok(Some((rest, segment)))
+    }
+    fn parse_define_3<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
+        match segment {
+            Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
+                // The DEFINE command is malformed because we reached its end
+                // without ever hitting a `(` token.  Transition back to general
+                // parsing.
+                self.state.0 = State::General;
+            }
+            Segment::Punct if input.starts_with('(') => {
+                self.nest += 1;
+            }
+            Segment::Punct if input.starts_with(')') => {
+                self.nest -= 1;
+                if self.nest == 0 {
+                    self.state = (State::Define4, Substate::empty());
+                }
+            }
+            _ => (),
+        }
+        Ok(Some((rest, segment)))
+    }
+    fn find_enddefine(mut input: &str) -> Option<&str> {
+        loop {
+            input = skip_spaces_and_comments(input, true).unwrap();
+            let (Some(c), rest) = take(input, true).unwrap() else {
+                return None;
+            };
+            match c {
+                '!' if strip_prefix_ignore_ascii_case(input, "!ENDDEFINE").is_some() => {
+                    return Some(input)
+                }
+                '\'' | '"' => {
+                    let index = rest.find(c)?;
+                    input = &rest[index + 1..];
+                }
+                _ => input = rest,
+            }
+        }
+    }
+
+    /// We are in the body of a macro definition, looking for additional lines
+    /// of the body or `!ENDDEFINE`.
+    ///
+    /// In `State::Define4`, we're parsing the first line of the macro body (the
+    /// same line as the closing parenthesis in the argument definition).  In
+    /// `State::Define5`, we're on a later line.
+    fn parse_define_4_5<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let rest = self.parse_full_line(input, eof)?;
+        let line = &input[..input.len() - rest.len()];
+        if let Some(end) = Self::find_enddefine(line) {
+            // Macro ends at the !ENDDEFINE on this line.
+            self.state = (State::General, Substate::empty());
+            let (prefix, rest) = input.split_at(line.len() - end.len());
+            if prefix.is_empty() {
+                // Line starts with `!ENDDEFINE`.
+                self.push_rest(input, eof)
+            } else if prefix.trim_start().is_empty() {
+                // Line starts with spaces followed by `!ENDDEFINE`.
+                Ok(Some((rest, Segment::Spaces)))
+            } else {
+                // Line starts with some content followed by `!ENDDEFINE`.
+                Ok(Some((rest, Segment::MacroBody)))
+            }
+        } else {
+            // No `!ENDDEFINE`.  We have a full line of macro body.
+            //
+            // If the first line of the macro body is blank, we just report it
+            // as spaces, or not at all if there are no spaces, because it's not
+            // significant.
+            //
+            // However, if it's a later line, we need to report it because blank
+            // lines can have significance.
+            let segment = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
+                if line.is_empty() {
+                    return self.parse_define_6(input, eof);
+                }
+                Segment::Spaces
+            } else {
+                Segment::MacroBody
+            };
+            self.state.0 = State::Define6;
+            Ok(Some((rest, segment)))
+        }
+    }
+    fn parse_define_6<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+        self.state.0 = State::Define5;
+        Ok(Some((rest, Segment::Newline)))
+    }
+    fn parse_begin_data_1<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
+        if segment == Segment::Newline {
+            self.state.0 = State::BeginData2;
+        }
+        Ok(Some((rest, segment)))
+    }
+    fn parse_begin_data_2<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let (rest, segment) = self.subparse(input, eof)?.unwrap();
+        if segment == Segment::Newline {
+            self.state.0 = State::BeginData3;
+        }
+        Ok(Some((rest, segment)))
+    }
+    fn is_end_data(line: &str) -> bool {
+        let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else {
+            return false;
+        };
+        let (Some(c), rest) = take(rest, true).unwrap() else {
+            return false;
+        };
+        if !c.is_whitespace() {
+            return false;
+        };
+        let Some(rest) = strip_prefix_ignore_ascii_case(rest, "DATA") else {
+            return false;
+        };
+
+        let mut endcmd = false;
+        for c in rest.chars() {
+            match c {
+                '.' if endcmd => return false,
+                '.' => endcmd = true,
+                c if c.is_whitespace() => (),
+                _ => return false,
+            }
+        }
+        true
+    }
+    fn parse_begin_data_3<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let rest = self.parse_full_line(input, eof)?;
+        let line = &input[..input.len() - rest.len()];
+        if Self::is_end_data(line) {
+            self.state = (
+                State::General,
+                Substate::START_OF_COMMAND | Substate::START_OF_LINE,
+            );
+            self.push_rest(input, eof)
+        } else {
+            self.state.0 = State::BeginData4;
+            Ok(Some((rest, Segment::InlineData)))
+        }
+    }
+    fn parse_begin_data_4<'a>(
+        &mut self,
+        input: &'a str,
+        eof: bool,
+    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
+        let rest = self.parse_newline(input, eof)?.unwrap();
+        self.state.0 = State::BeginData3;
+        Ok(Some((rest, Segment::Newline)))
+    }
+}
+
+fn strip_prefix_ignore_ascii_case<'a>(line: &'a str, pattern: &str) -> Option<&'a str> {
+    line.get(..pattern.len()).and_then(|prefix| {
+        prefix
+            .eq_ignore_ascii_case(pattern)
+            .then(|| &line[pattern.len()..])
+    })
+}
+
+#[cfg(test)]
+mod test;
diff --git a/rust/pspp/src/lex/segment/mod.rs b/rust/pspp/src/lex/segment/mod.rs

deleted file mode 100644 (file)

index 5a56869..0000000
--- a/rust/pspp/src/lex/segment/mod.rs
+++ /dev/null
@@ -1,1442 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Low-level lexical analysis.
-//!
-//! PSPP divides traditional "lexical analysis" or "tokenization" into [three
-//! phases](super).  This module implements the low-level segmentation phase.
-//!
-//! Segmentation accepts a stream of UTF-8 bytes as input.  It outputs a label
-//! (a segment type) for each byte or contiguous sequence of bytes in the input.
-//! It also, in a few corner cases, outputs zero-width segments that label the
-//! boundary between a pair of bytes in the input.
-//!
-//! Some segment types correspond directly to tokens; for example,
-//! [Segment::Identifier] becomes [Token::Id] later in lexical analysis.  Other
-//! segments contribute to tokens but do not correspond directly; for example,
-//! multiple quoted string [Segment::QuotedString] separated by
-//! [Segment::Spaces] and "+" punctuators [Segment::Punct] may be combined to
-//! form a single string token [Token::String].  Still other segments are
-//! ignored (e.g. [Segment::Spaces]) or trigger special behavior such as error
-//! messages later in tokenization (e.g. [Segment::ExpectedQuote]).
-//!
-//! [Token::Id]: crate::lex::token::Token::Id
-//! [Token::String]: crate::lex::token::Token::String
-
-use std::cmp::Ordering;
-
-use crate::{
-    identifier::{id_match, id_match_n, IdentifierChar},
-    prompt::PromptStyle,
-};
-use bitflags::bitflags;
-
-use super::command_name::{command_match, COMMAND_NAMES};
-
-/// Syntax variant.
-///
-/// PSPP syntax is written in one of two syntax variant which are broadly
-/// defined as follows:
-///
-/// - In interactive syntax, commands end with a period at the end of the line
-///   or with a blank line.
-///
-/// - In batch syntax, the second and subsequent lines of a command are indented
-///   from the left margin.
-///
-/// The segmenter can also try to automatically detect the kind of syntax in
-/// use, using a heuristic that is usually correct.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
-pub enum Syntax {
-    /// Try to interpret input correctly regardless of whether it is written
-    /// for interactive or batch syntax.
-    ///
-    /// This is `Syntax::default()`.
-    #[default]
-    Auto,
-
-    /// Interactive syntax.
-    Interactive,
-
-    /// Batch syntax.
-    Batch,
-}
-
-/// The type of a segment.
-///
-/// A [Segment] is a label for a string slice and is normally paired with one.
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub enum Segment {
-    /// A number.
-    Number,
-
-    /// A quoted string (`'...'` or `"..."`)..
-    QuotedString,
-
-    /// A hexadecimal string (`X'...'` or `X"..."`).
-    HexString,
-
-    /// A Unicode string (`U'...'` or `U"..."`).
-    UnicodeString,
-
-    /// An unquoted string.
-    ///
-    /// Unquoted strings appear only in a few special-case constructs, such as
-    /// the `FILE LABEL` command.
-    UnquotedString,
-
-    /// An identifier.
-    Identifier,
-
-    /// A punctuator or operator.
-    Punct,
-
-    /// `#!` at the beginning of a syntax file only.
-    Shbang,
-
-    /// Spaces.
-    Spaces,
-
-    /// A comment (`/* ... */`).
-    Comment,
-
-    /// New-line.
-    Newline,
-
-    /// A comment command (`* ...` or `COMMENT ...`).
-    CommentCommand,
-
-    /// In a `DO REPEAT` command, one of the lines to be repeated.
-    DoRepeatCommand,
-
-    /// Indicates `DO REPEAT` nested more deeply than supported.
-    DoRepeatOverflow,
-
-    /// A line of inline data inside `BEGIN DATA`...`END DATA`.
-    InlineData,
-
-    /// In `!DEFINE`, an identifier for the macro being defined.
-    ///
-    /// Distinguished from [Identifier](Self::Identifier) because a `MacroName`
-    /// must never be macro-expanded.
-    MacroName,
-
-    /// Contents of `!DEFINE`...`!ENDDEFINE`.
-    MacroBody,
-
-    /// Represents the `DOCUMENT` beginning a `DOCUMENT` command.
-    ///
-    /// This token is not associated with any text: the actual `DOCUMENT`
-    /// keyword is part of the following [Document](Self::Document) segment.
-    /// This is because documents include the `DOCUMENT` keyword.
-    StartDocument,
-
-    /// One of the lines of documents in a `DOCUMENT` command.
-    ///
-    /// The first line of a document includes the `DOCUMENT` keyword itself.
-    Document,
-
-    /// A command separator.
-    ///
-    /// This segment is usually for `+`, `-`, or `.` at the beginning of a line.
-    StartCommand,
-
-    /// A command separator.
-    ///
-    /// This segment is usually for a blank line.  It also appears at the end of
-    /// a file.
-    SeparateCommands,
-
-    /// A command separator.
-    ///
-    /// This segment is for `.` at the end of a line.
-    EndCommand,
-
-    /// Missing quote at the end of a line.
-    ///
-    /// This segment contains a partial quoted string.  It starts with a quote
-    /// mark (`"` or `'`, possibly preceded by `X` or `U`) but goes to the end
-    /// of the line without the matching end quote mark.
-    ExpectedQuote,
-
-    /// Missing exponent in number.
-    ///
-    /// This segment contains a number that ends with `E` or `E+` or `E-`
-    /// without a following exponent.
-    ExpectedExponent,
-
-    /// Unexpected character.
-    ///
-    /// The segment is a single character that isn't valid in syntax.
-    UnexpectedChar,
-}
-
-bitflags! {
-    #[derive(Copy, Clone, Debug)]
-    struct Substate: u8 {
-        const START_OF_LINE = 1;
-        const START_OF_COMMAND = 2;
-    }
-}
-
-/// Used by [Segmenter] to indicate that more input is needed.
-#[derive(Copy, Clone, Debug)]
-pub struct Incomplete;
-
-/// Labels syntax input with [Segment]s.
-#[derive(Copy, Clone)]
-pub struct Segmenter {
-    state: (State, Substate),
-    nest: u8,
-    syntax: Syntax,
-}
-
-impl Segmenter {
-    /// Returns a segmenter with the given `syntax`.
-    ///
-    /// If `is_snippet` is false, then the segmenter will parse as if it's being
-    /// given a whole file.  This means, for example, that it will interpret `-`
-    /// or `+` at the beginning of the syntax as a separator between commands
-    /// (since `-` or `+` at the beginning of a line has this meaning).
-    ///
-    /// If `is_snippet` is true, then the segmenter will parse as if it's being
-    /// given an isolated piece of syntax.  This means that, for example, that
-    /// it will interpret `-` or `+` at the beginning of the syntax as an
-    /// operator token or (if followed by a digit) as part of a number.
-    pub fn new(syntax: Syntax, is_snippet: bool) -> Self {
-        Self {
-            state: if is_snippet {
-                (State::General, Substate::empty())
-            } else {
-                (State::Shbang, Substate::empty())
-            },
-            syntax,
-            nest: 0,
-        }
-    }
-
-    /// Returns the [Syntax] variant passed in to [new](Self::new).
-    pub fn syntax(&self) -> Syntax {
-        self.syntax
-    }
-
-    fn start_of_line(&self) -> bool {
-        self.state.1.contains(Substate::START_OF_LINE)
-    }
-
-    fn start_of_command(&self) -> bool {
-        self.state.1.contains(Substate::START_OF_COMMAND)
-    }
-
-    /// Returns the style of command prompt to display to an interactive user
-    /// for input in the current state..  The return value is most accurate in
-    /// with [Syntax::Interactive] syntax and at the beginning of a line (that
-    /// is, if [Segmenter::push] consumed as much as possible of the input up to
-    /// a new-line).
-    pub fn prompt(&self) -> PromptStyle {
-        match self.state.0 {
-            State::Shbang => PromptStyle::First,
-            State::General => {
-                if self.start_of_command() {
-                    PromptStyle::First
-                } else {
-                    PromptStyle::Later
-                }
-            }
-            State::Comment1 | State::Comment2 => PromptStyle::Comment,
-            State::Document1 | State::Document2 => PromptStyle::Document,
-            State::Document3 => PromptStyle::First,
-            State::FileLabel1 => PromptStyle::Later,
-            State::FileLabel2 | State::FileLabel3 => PromptStyle::First,
-            State::DoRepeat1 | State::DoRepeat2 => {
-                if self.start_of_command() {
-                    PromptStyle::First
-                } else {
-                    PromptStyle::Later
-                }
-            }
-            State::DoRepeat3 => PromptStyle::DoRepeat,
-            State::DoRepeat4 => PromptStyle::DoRepeat,
-            State::Define1 | State::Define2 | State::Define3 => {
-                if self.start_of_command() {
-                    PromptStyle::First
-                } else {
-                    PromptStyle::Later
-                }
-            }
-            State::Define4 | State::Define5 | State::Define6 => PromptStyle::Define,
-            State::BeginData1 => PromptStyle::First,
-            State::BeginData2 => PromptStyle::Later,
-            State::BeginData3 | State::BeginData4 => PromptStyle::Data,
-        }
-    }
-
-    fn push_rest<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        if input.is_empty() {
-            if eof {
-                return Ok(None);
-            } else {
-                return Err(Incomplete);
-            };
-        }
-
-        match self.state.0 {
-            State::Shbang => self.parse_shbang(input, eof),
-            State::General => {
-                if self.start_of_line() {
-                    self.parse_start_of_line(input, eof)
-                } else {
-                    self.parse_mid_line(input, eof)
-                }
-            }
-            State::Comment1 => self.parse_comment_1(input, eof),
-            State::Comment2 => self.parse_comment_2(input, eof),
-            State::Document1 => self.parse_document_1(input, eof),
-            State::Document2 => self.parse_document_2(input, eof),
-            State::Document3 => self.parse_document_3(input, eof),
-            State::FileLabel1 => self.parse_file_label_1(input, eof),
-            State::FileLabel2 => self.parse_file_label_2(input, eof),
-            State::FileLabel3 => self.parse_file_label_3(input, eof),
-            State::DoRepeat1 => self.parse_do_repeat_1(input, eof),
-            State::DoRepeat2 => self.parse_do_repeat_2(input, eof),
-            State::DoRepeat3 => self.parse_do_repeat_3(input, eof),
-            State::DoRepeat4 => self.parse_do_repeat_4(input),
-            State::Define1 => self.parse_define_1_2(input, eof),
-            State::Define2 => self.parse_define_1_2(input, eof),
-            State::Define3 => self.parse_define_3(input, eof),
-            State::Define4 => self.parse_define_4_5(input, eof),
-            State::Define5 => self.parse_define_4_5(input, eof),
-            State::Define6 => self.parse_define_6(input, eof),
-            State::BeginData1 => self.parse_begin_data_1(input, eof),
-            State::BeginData2 => self.parse_begin_data_2(input, eof),
-            State::BeginData3 => self.parse_begin_data_3(input, eof),
-            State::BeginData4 => self.parse_begin_data_4(input, eof),
-        }
-    }
-
-    /// Attempts to label a prefix of the remaining input with a segment type.
-    /// The caller supplies a prefix of the remaining input as `input`.  If
-    /// `eof` is true, then `input` is the entire (remainder) of the input; if
-    /// `eof` is false, then further input is potentially available.
-    ///
-    /// The input may contain `\n` or `\r\n` line ends in any combination.
-    ///
-    /// If successful, returns `Ok((n, type))`, where `n` is the number of bytes
-    /// in the segment at the beginning of `input` (a number in
-    /// `0..=input.len()`) and the type of that segment.  The next call should
-    /// not include those bytes in `input`, because the segmenter has
-    /// (figuratively) consumed them.
-    ///
-    /// Segments can have zero length, including segment types
-    /// [Segment::SeparateCommands], [Segment::StartDocument],
-    /// [Segment::InlineData], and [Segment::Spaces].
-    ///
-    /// Failure occurs only if the segment type of the bytes in `input` cannot
-    /// yet be determined.  In this case, this function returns
-    /// `Err(Incomplete)`.  If more input is available, the caller should obtain
-    /// some more, then call again with a longer `input`.  If this is still not
-    /// enough, the process might need to repeat again and again.  If input is
-    /// exhausted, then the caller may call again setting `eof` to true.  This
-    /// function will never return `Err(Incomplete)` when `eof` is true.
-    ///
-    /// The caller must not, in a sequence of calls, supply contradictory input.
-    /// That is, bytes provided as part of `input` in one call, but not
-    /// consumed, must not be provided with *different* values on subsequent
-    /// calls.  This is because the function must often make decisions based on
-    /// looking ahead beyond the bytes that it consumes.
-    pub fn push(&mut self, input: &str, eof: bool) -> Result<Option<(usize, Segment)>, Incomplete> {
-        Ok(self
-            .push_rest(input, eof)?
-            .map(|(rest, seg_type)| (input.len() - rest.len(), seg_type)))
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-enum State {
-    Shbang,
-    General,
-    Comment1,
-    Comment2,
-    Document1,
-    Document2,
-    Document3,
-    FileLabel1,
-    FileLabel2,
-    FileLabel3,
-    DoRepeat1,
-    DoRepeat2,
-    DoRepeat3,
-    DoRepeat4,
-    Define1,
-    Define2,
-    Define3,
-    Define4,
-    Define5,
-    Define6,
-    BeginData1,
-    BeginData2,
-    BeginData3,
-    BeginData4,
-}
-
-fn take(input: &str, eof: bool) -> Result<(Option<char>, &str), Incomplete> {
-    let mut iter = input.chars();
-    match iter.next() {
-        None if !eof => Err(Incomplete),
-        c => Ok((c, iter.as_str())),
-    }
-}
-
-fn skip_comment(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
-    loop {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(input);
-        };
-        match c {
-            '\n' | '\r' if is_end_of_line(input, eof)? => return Ok(input),
-            '*' => {
-                if let (Some('/'), rest) = take(rest, eof)? {
-                    return Ok(rest);
-                }
-            }
-            _ => (),
-        };
-        input = rest;
-    }
-}
-
-fn skip_matching<F>(f: F, input: &str, eof: bool) -> Result<&str, Incomplete>
-where
-    F: Fn(char) -> bool,
-{
-    let input = input.trim_start_matches(f);
-    if input.is_empty() && !eof {
-        Err(Incomplete)
-    } else {
-        Ok(input)
-    }
-}
-
-fn match_char<F>(f: F, input: &str, eof: bool) -> Result<Option<&str>, Incomplete>
-where
-    F: Fn(char) -> bool,
-{
-    if let (Some(c), rest) = take(input, eof)? {
-        if f(c) {
-            return Ok(Some(rest));
-        }
-    }
-    Ok(None)
-}
-
-fn skip_spaces(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
-    loop {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(input);
-        };
-        match c {
-            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
-            c if c.is_whitespace() => (),
-            _ => return Ok(input),
-        }
-        input = rest;
-    }
-}
-
-fn skip_digits(input: &str, eof: bool) -> Result<&str, Incomplete> {
-    skip_matching(|c| c.is_ascii_digit(), input, eof)
-}
-
-fn skip_spaces_and_comments(mut input: &str, eof: bool) -> Result<&str, Incomplete> {
-    loop {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(input);
-        };
-        match c {
-            '/' => {
-                let (c, rest2) = take(rest, eof)?;
-                match c {
-                    Some('*') => input = skip_comment(rest2, eof)?,
-                    Some(_) | None => return Ok(rest),
-                }
-            }
-            '\r' | '\n' if is_end_of_line(input, eof)? => return Ok(input),
-            c if c.is_whitespace() => input = rest,
-            _ => return Ok(input),
-        };
-    }
-}
-
-fn is_start_of_string(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let (Some(c), rest) = take(input, eof)? else {
-        return Ok(false);
-    };
-    match c {
-        'x' | 'X' | 'u' | 'U' => {
-            let (c, _rest) = take(rest, eof)?;
-            Ok(c == Some('\'') || c == Some('"'))
-        }
-        '\'' | '"' => Ok(true),
-        '\n' | '\r' if is_end_of_line(input, eof)? => Ok(true),
-        _ => Ok(false),
-    }
-}
-
-fn is_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let (Some(c), rest) = take(input, eof)? else {
-        return Ok(true);
-    };
-    Ok(match c {
-        '\n' => true,
-        '\r' => take(rest, eof)?.0 == Some('\n'),
-        _ => false,
-    })
-}
-
-fn at_end_of_line(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    is_end_of_line(skip_spaces_and_comments(input, eof)?, eof)
-}
-
-fn first(s: &str) -> char {
-    s.chars().next().unwrap()
-}
-fn get_command_name_candidates(target: &str) -> &[&'static str] {
-    if target.is_empty() {
-        return &[];
-    }
-    let target_first = first(target).to_ascii_uppercase();
-    let low = COMMAND_NAMES.partition_point(|s| first(s) < target_first);
-    let high = COMMAND_NAMES.partition_point(|s| first(s) <= target_first);
-    &COMMAND_NAMES[low..high]
-}
-
-fn detect_command_name(input: &str, eof: bool) -> Result<bool, Incomplete> {
-    let command_name = input
-        .split(|c: char| {
-            !((c.is_whitespace() && c != '\n') || (c.may_continue_id() && c != '.') || c == '-')
-        })
-        .next()
-        .unwrap();
-    if !eof && command_name.len() == input.len() {
-        return Err(Incomplete);
-    }
-    let command_name = command_name.trim_end_matches(|c: char| c.is_whitespace() || c == '.');
-    for command in get_command_name_candidates(command_name) {
-        if let Some(m) = command_match(command, command_name) {
-            if m.missing_words <= 0 {
-                return Ok(true);
-            }
-        }
-    }
-    Ok(false)
-}
-
-impl Segmenter {
-    fn parse_shbang<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        if let (Some('#'), rest) = take(input, eof)? {
-            if let (Some('!'), rest) = take(rest, eof)? {
-                let rest = self.parse_full_line(rest, eof)?;
-                self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok(Some((rest, Segment::Shbang)));
-            }
-        }
-
-        self.state = (
-            State::General,
-            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-        );
-        self.push_rest(input, eof)
-    }
-    fn at_command_start(&self, input: &str, eof: bool) -> Result<bool, Incomplete> {
-        match self.syntax {
-            Syntax::Auto => detect_command_name(input, eof),
-            Syntax::Interactive => Ok(false),
-            Syntax::Batch => Ok(true),
-        }
-    }
-    fn parse_start_of_line<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        debug_assert_eq!(self.state.0, State::General);
-        debug_assert!(self.start_of_line());
-        debug_assert!(!input.is_empty());
-
-        let (Some(c), rest) = take(input, eof).unwrap() else {
-            unreachable!()
-        };
-        match c {
-            '+' if is_start_of_string(skip_spaces_and_comments(rest, eof)?, eof)? => {
-                // This  `+` is punctuation that may separate pieces of a string.
-                self.state = (State::General, Substate::empty());
-                return Ok(Some((rest, Segment::Punct)));
-            }
-            '+' | '-' | '.' => {
-                self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok(Some((rest, Segment::StartCommand)));
-            }
-            _ if c.is_whitespace() => {
-                if at_end_of_line(input, eof)? {
-                    self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok(Some((input, Segment::SeparateCommands)));
-                }
-            }
-            _ => {
-                if self.at_command_start(input, eof)?
-                    && !self.state.1.contains(Substate::START_OF_COMMAND)
-                {
-                    self.state = (State::General, Substate::START_OF_COMMAND);
-                    return Ok(Some((input, Segment::StartCommand)));
-                }
-            }
-        }
-        self.state.1 = Substate::START_OF_COMMAND;
-        self.parse_mid_line(input, eof)
-    }
-    fn parse_mid_line<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        debug_assert!(self.state.0 == State::General);
-        debug_assert!(!self.state.1.contains(Substate::START_OF_LINE));
-        let (Some(c), rest) = take(input, eof)? else {
-            unreachable!()
-        };
-        match c {
-            '\r' | '\n' if is_end_of_line(input, eof)? => {
-                self.state.1 |= Substate::START_OF_LINE;
-                Ok(Some((
-                    self.parse_newline(input, eof).unwrap().unwrap(),
-                    Segment::Newline,
-                )))
-            }
-            '/' => {
-                if let (Some('*'), rest) = take(rest, eof)? {
-                    let rest = skip_comment(rest, eof)?;
-                    Ok(Some((rest, Segment::Comment)))
-                } else {
-                    self.state.1 = Substate::empty();
-                    Ok(Some((rest, Segment::Punct)))
-                }
-            }
-            '-' => {
-                let (c, rest2) = take(skip_spaces(rest, eof)?, eof)?;
-                match c {
-                    Some(c) if c.is_ascii_digit() => {
-                        return self.parse_number(rest, eof);
-                    }
-                    Some('.') => {
-                        if let (Some(c), _rest) = take(rest2, eof)? {
-                            if c.is_ascii_digit() {
-                                return self.parse_number(rest, eof);
-                            }
-                        }
-                    }
-                    None | Some(_) => (),
-                }
-                self.state.1 = Substate::empty();
-                Ok(Some((rest, Segment::Punct)))
-            }
-            '(' | ')' | '[' | ']' | '{' | '}' | ',' | '=' | ';' | ':' | '&' | '|' | '+' => {
-                self.state.1 = Substate::empty();
-                Ok(Some((rest, Segment::Punct)))
-            }
-            '*' => {
-                if self.state.1.contains(Substate::START_OF_COMMAND) {
-                    self.state = (State::Comment1, Substate::empty());
-                    self.parse_comment_1(input, eof)
-                } else {
-                    self.parse_digraph(&['*'], rest, eof)
-                }
-            }
-            '<' => self.parse_digraph(&['=', '>'], rest, eof),
-            '>' => self.parse_digraph(&['='], rest, eof),
-            '~' => self.parse_digraph(&['='], rest, eof),
-            '.' if at_end_of_line(rest, eof)? => {
-                self.state.1 = Substate::START_OF_COMMAND;
-                Ok(Some((rest, Segment::EndCommand)))
-            }
-            '.' => match take(rest, eof)? {
-                (Some(c), _) if c.is_ascii_digit() => self.parse_number(input, eof),
-                _ => Ok(Some((rest, Segment::Punct))),
-            },
-            '0'..='9' => self.parse_number(input, eof),
-            'u' | 'U' => self.maybe_parse_string(Segment::UnicodeString, (input, rest), eof),
-            'x' | 'X' => self.maybe_parse_string(Segment::HexString, (input, rest), eof),
-            '\'' | '"' => self.parse_string(Segment::QuotedString, c, rest, eof),
-            '!' => {
-                let (c, rest2) = take(rest, eof)?;
-                match c {
-                    Some('*') => Ok(Some((rest2, Segment::Punct))),
-                    Some(_) => self.parse_id(input, eof),
-                    None => Ok(Some((rest, Segment::Punct))),
-                }
-            }
-            c if c.is_whitespace() => Ok(Some((skip_spaces(rest, eof)?, Segment::Spaces))),
-            c if c.may_start_id() => self.parse_id(input, eof),
-            '#'..='~' if c != '\\' && c != '^' => {
-                self.state.1 = Substate::empty();
-                Ok(Some((rest, Segment::Punct)))
-            }
-            _ => {
-                self.state.1 = Substate::empty();
-                Ok(Some((rest, Segment::UnexpectedChar)))
-            }
-        }
-    }
-    fn parse_string<'a>(
-        &mut self,
-        segment: Segment,
-        quote: char,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        while let (Some(c), rest) = take(input, eof)? {
-            match c {
-                _ if c == quote => {
-                    let (c, rest2) = take(rest, eof)?;
-                    if c != Some(quote) {
-                        self.state.1 = Substate::empty();
-                        return Ok(Some((rest, segment)));
-                    }
-                    input = rest2;
-                }
-                '\r' | '\n' if is_end_of_line(input, eof)? => break,
-                _ => input = rest,
-            }
-        }
-        self.state.1 = Substate::empty();
-        Ok(Some((input, Segment::ExpectedQuote)))
-    }
-    fn maybe_parse_string<'a>(
-        &mut self,
-        segment: Segment,
-        input: (&'a str, &'a str),
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        match take(input.1, eof)? {
-            (Some(c), rest) if c == '\'' || c == '"' => self.parse_string(segment, c, rest, eof),
-            _ => self.parse_id(input.0, eof),
-        }
-    }
-    fn next_id_in_command<'a>(
-        &self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<(&'a str, &'a str), Incomplete> {
-        let mut sub = Segmenter::new(self.syntax, true);
-        loop {
-            let Some((seg_len, seg_type)) = sub.push(input, eof)? else {
-                return Ok((input, input));
-            };
-            let (segment, rest) = input.split_at(seg_len);
-            match seg_type {
-                Segment::Shbang | Segment::Spaces | Segment::Comment | Segment::Newline => (),
-
-                Segment::Identifier => return Ok((segment, rest)),
-
-                Segment::Number
-                | Segment::QuotedString
-                | Segment::HexString
-                | Segment::UnicodeString
-                | Segment::UnquotedString
-                | Segment::Punct
-                | Segment::CommentCommand
-                | Segment::DoRepeatCommand
-                | Segment::DoRepeatOverflow
-                | Segment::InlineData
-                | Segment::MacroName
-                | Segment::MacroBody
-                | Segment::StartDocument
-                | Segment::Document
-                | Segment::StartCommand
-                | Segment::SeparateCommands
-                | Segment::EndCommand
-                | Segment::ExpectedQuote
-                | Segment::ExpectedExponent
-                | Segment::UnexpectedChar => return Ok(("", rest)),
-            }
-            input = rest;
-        }
-    }
-    fn parse_id<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (Some(_), mut end) = take(input, eof).unwrap() else {
-            unreachable!()
-        };
-        while let (Some(c), rest) = take(end, eof)? {
-            if !c.may_continue_id() {
-                break;
-            };
-            end = rest;
-        }
-        let identifier = &input[..input.len() - end.len()];
-        let identifier = match identifier.strip_suffix('.') {
-            Some(without_dot) if at_end_of_line(end, eof)? => without_dot,
-            _ => identifier,
-        };
-        let rest = &input[identifier.len()..];
-
-        if self.state.1.contains(Substate::START_OF_COMMAND) {
-            if id_match_n("COMMENT", identifier, 4) {
-                self.state = (State::Comment1, Substate::empty());
-                return self.parse_comment_1(input, eof);
-            } else if id_match("DOCUMENT", identifier) {
-                self.state = (State::Document1, Substate::empty());
-                return Ok(Some((input, Segment::StartDocument)));
-            } else if id_match_n("DEFINE", identifier, 6) {
-                self.state = (State::Define1, Substate::empty());
-            } else if id_match("FILE", identifier) {
-                if id_match("LABEL", self.next_id_in_command(rest, eof)?.0) {
-                    self.state = (State::FileLabel1, Substate::empty());
-                    return Ok(Some((rest, Segment::Identifier)));
-                }
-            } else if id_match("DO", identifier) {
-                if id_match("REPEAT", self.next_id_in_command(rest, eof)?.0) {
-                    self.state = (State::DoRepeat1, Substate::empty());
-                    return Ok(Some((rest, Segment::Identifier)));
-                }
-            } else if id_match("BEGIN", identifier) {
-                let (next_id, rest2) = self.next_id_in_command(rest, eof)?;
-                if id_match("DATA", next_id) {
-                    let rest2 = skip_spaces_and_comments(rest2, eof)?;
-                    let rest2 = if let Some(s) = rest2.strip_prefix('.') {
-                        skip_spaces_and_comments(s, eof)?
-                    } else {
-                        rest2
-                    };
-                    if is_end_of_line(rest2, eof)? {
-                        let s = &input[..input.len() - rest2.len()];
-                        self.state = (
-                            if s.contains('\n') {
-                                State::BeginData1
-                            } else {
-                                State::BeginData2
-                            },
-                            Substate::empty(),
-                        );
-                        return Ok(Some((rest, Segment::Identifier)));
-                    }
-                }
-            }
-        }
-
-        self.state.1 = Substate::empty();
-        Ok(Some((
-            rest,
-            if identifier != "!" {
-                Segment::Identifier
-            } else {
-                Segment::Punct
-            },
-        )))
-    }
-    fn parse_digraph<'a>(
-        &mut self,
-        seconds: &[char],
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (c, rest) = take(input, eof)?;
-        self.state.1 = Substate::empty();
-        Ok(Some((
-            match c {
-                Some(c) if seconds.contains(&c) => rest,
-                _ => input,
-            },
-            Segment::Punct,
-        )))
-    }
-    fn parse_number<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let mut input = skip_digits(input, eof)?;
-        if let Some(rest) = match_char(|c| c == '.', input, eof)? {
-            let rest2 = skip_digits(rest, eof)?;
-            if rest2.len() < rest.len() || !at_end_of_line(rest2, eof)? {
-                input = rest2;
-            }
-        };
-        if let Some(rest) = match_char(|c| c == 'e' || c == 'E', input, eof)? {
-            let rest = match_char(|c| c == '+' || c == '-', rest, eof)?.unwrap_or(rest);
-            let rest2 = skip_digits(rest, eof)?;
-            if rest2.len() == rest.len() {
-                self.state.1 = Substate::empty();
-                return Ok(Some((rest, Segment::ExpectedExponent)));
-            }
-            input = rest2;
-        }
-        self.state.1 = Substate::empty();
-        Ok(Some((input, Segment::Number)))
-    }
-    fn parse_comment_1<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        enum CommentState<'a> {
-            Blank,
-            NotBlank,
-            Period(&'a str),
-        }
-        let mut state = CommentState::Blank;
-        loop {
-            let (Some(c), rest) = take(input, eof)? else {
-                // End of file.
-                self.state = (State::General, Substate::START_OF_COMMAND);
-                return Ok(Some((input, Segment::SeparateCommands)));
-            };
-            match c {
-                '.' => state = CommentState::Period(input),
-                '\n' | '\r' if is_end_of_line(input, eof)? => {
-                    match state {
-                        CommentState::Blank => {
-                            // Blank line ends comment command.
-                            self.state = (State::General, Substate::START_OF_COMMAND);
-                            return Ok(Some((input, Segment::SeparateCommands)));
-                        }
-                        CommentState::Period(period) => {
-                            // '.' at end of line ends comment command.
-                            self.state = (State::General, Substate::empty());
-                            return Ok(Some((period, Segment::CommentCommand)));
-                        }
-                        CommentState::NotBlank => {
-                            // Comment continues onto next line.
-                            self.state = (State::Comment2, Substate::empty());
-                            return Ok(Some((input, Segment::CommentCommand)));
-                        }
-                    }
-                }
-                c if c.is_whitespace() => (),
-                _ => state = CommentState::NotBlank,
-            }
-            input = rest;
-        }
-    }
-    fn parse_comment_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-
-        let new_command = match take(rest, eof)?.0 {
-            Some('+') | Some('-') | Some('.') => true,
-            Some(c) if !c.is_whitespace() => self.at_command_start(rest, eof)?,
-            None | Some(_) => false,
-        };
-        if new_command {
-            self.state = (
-                State::General,
-                Substate::START_OF_LINE | Substate::START_OF_COMMAND,
-            );
-        } else {
-            self.state = (State::Comment1, Substate::empty());
-        }
-        Ok(Some((rest, Segment::Newline)))
-    }
-    fn parse_document_1<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let mut end_cmd = false;
-        loop {
-            let (Some(c), rest) = take(input, eof)? else {
-                self.state = (State::Document3, Substate::empty());
-                return Ok(Some((input, Segment::Document)));
-            };
-            match c {
-                '.' => end_cmd = true,
-                '\n' | '\r' if is_end_of_line(input, eof)? => {
-                    self.state.0 = if end_cmd {
-                        State::Document3
-                    } else {
-                        State::Document2
-                    };
-                    return Ok(Some((input, Segment::Document)));
-                }
-                c if !c.is_whitespace() => end_cmd = false,
-                _ => (),
-            }
-            input = rest;
-        }
-    }
-    fn parse_document_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state = (State::Document1, Substate::empty());
-        Ok(Some((rest, Segment::Newline)))
-    }
-    fn parse_document_3<'a>(
-        &mut self,
-        input: &'a str,
-        _eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        self.state = (
-            State::General,
-            Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-        );
-        Ok(Some((input, Segment::EndCommand)))
-    }
-    fn quoted_file_label(input: &str, eof: bool) -> Result<bool, Incomplete> {
-        let input = skip_spaces_and_comments(input, eof)?;
-        match take(input, eof)?.0 {
-            Some('\'') | Some('"') | Some('\n') => Ok(true),
-            _ => Ok(false),
-        }
-    }
-    fn parse_file_label_1<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let mut sub = Segmenter {
-            state: (State::General, self.state.1),
-            ..*self
-        };
-        let (rest, segment) = sub.push_rest(input, eof)?.unwrap();
-        if segment == Segment::Identifier {
-            let id = &input[..input.len() - rest.len()];
-            debug_assert!(id_match("LABEL", id), "{id} should be LABEL");
-            if Self::quoted_file_label(rest, eof)? {
-                *self = sub;
-            } else {
-                self.state.0 = State::FileLabel2;
-            }
-        } else {
-            self.state.1 = sub.state.1;
-        }
-        Ok(Some((rest, segment)))
-    }
-    fn parse_file_label_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let input = skip_spaces(input, eof)?;
-        self.state = (State::FileLabel3, Substate::empty());
-        Ok(Some((input, Segment::Spaces)))
-    }
-    fn parse_file_label_3<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let mut end_cmd = None;
-        loop {
-            let (c, rest) = take(input, eof)?;
-            match c {
-                None | Some('\n') | Some('\r') if is_end_of_line(input, eof)? => {
-                    self.state = (State::General, Substate::empty());
-                    return Ok(Some((end_cmd.unwrap_or(input), Segment::UnquotedString)));
-                }
-                None => unreachable!(),
-                Some('.') => end_cmd = Some(input),
-                Some(c) if !c.is_whitespace() => end_cmd = None,
-                Some(_) => (),
-            }
-            input = rest;
-        }
-    }
-    fn subparse<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let mut sub = Segmenter {
-            syntax: self.syntax,
-            state: (State::General, self.state.1),
-            nest: 0,
-        };
-        let result = sub.push_rest(input, eof)?;
-        self.state.1 = sub.state.1;
-        Ok(result)
-    }
-    /// We are segmenting a `DO REPEAT` command, currently reading the syntax
-    /// that defines the stand-in variables (the head) before the lines of
-    /// syntax to be repeated (the body).
-    fn parse_do_repeat_1<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?.unwrap();
-        if segment == Segment::SeparateCommands {
-            // We reached a blank line that separates the head from the body.
-            self.state.0 = State::DoRepeat2;
-        } else if segment == Segment::EndCommand || segment == Segment::StartCommand {
-            // We reached the body.
-            self.state.0 = State::DoRepeat3;
-            self.nest = 1;
-        }
-        Ok(Some((rest, segment)))
-    }
-    /// We are segmenting a `DO REPEAT` command, currently reading a blank line
-    /// that separates the head from the body.
-    fn parse_do_repeat_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?.unwrap();
-        if segment == Segment::Newline {
-            // We reached the body.
-            self.state.0 = State::DoRepeat3;
-            self.nest = 1;
-        }
-        Ok(Some((rest, segment)))
-    }
-    fn parse_newline<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<&'a str>, Incomplete> {
-        let (Some(c), rest) = take(input, eof)? else {
-            return Ok(None);
-        };
-        match c {
-            '\n' => Ok(Some(rest)),
-            '\r' => {
-                if let (Some('\n'), rest) = take(rest, eof)? {
-                    Ok(Some(rest))
-                } else {
-                    Ok(None)
-                }
-            }
-            _ => Ok(None),
-        }
-    }
-
-    fn parse_full_line<'a>(
-        &mut self,
-        mut input: &'a str,
-        eof: bool,
-    ) -> Result<&'a str, Incomplete> {
-        loop {
-            if is_end_of_line(input, eof)? {
-                return Ok(input);
-            }
-            input = take(input, eof).unwrap().1;
-        }
-    }
-    fn check_repeat_command(&mut self, input: &str, eof: bool) -> Result<isize, Incomplete> {
-        let input = input.strip_prefix(['-', '+']).unwrap_or(input);
-        let (id1, input) = self.next_id_in_command(input, eof)?;
-        if id_match("DO", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0) {
-            Ok(1)
-        } else if id_match("END", id1) && id_match("REPEAT", self.next_id_in_command(input, eof)?.0)
-        {
-            Ok(-1)
-        } else {
-            Ok(0)
-        }
-    }
-    /// We are in the body of `DO REPEAT`, segmenting the lines of syntax that
-    /// are to be repeated.  Report each line of syntax as a single
-    /// [`Type::DoRepeatCommand`].
-    ///
-    /// `DO REPEAT` can be nested, so we look for `DO REPEAT...END REPEAT`
-    /// blocks inside the lines we're segmenting.  `self.nest` counts the
-    /// nesting level, starting at 1.
-    fn parse_do_repeat_3<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        if let Some(rest) = self.parse_newline(input, eof)? {
-            return Ok(Some((rest, Segment::Newline)));
-        }
-        let rest = self.parse_full_line(input, eof)?;
-        match self.check_repeat_command(input, eof)?.cmp(&0) {
-            Ordering::Greater => {
-                if let Some(nest) = self.nest.checked_add(1) {
-                    self.nest = nest;
-                } else {
-                    self.state.0 = State::DoRepeat4;
-                }
-            }
-            Ordering::Less => {
-                self.nest -= 1;
-                if self.nest == 0 {
-                    // Nesting level dropped to 0, so we've finished reading the `DO
-                    // REPEAT` body.
-                    self.state = (
-                        State::General,
-                        Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-                    );
-                    return self.push_rest(input, eof);
-                }
-            }
-            Ordering::Equal => (),
-        }
-        Ok(Some((rest, Segment::DoRepeatCommand)))
-    }
-    fn parse_do_repeat_4<'a>(
-        &mut self,
-        input: &'a str,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        self.state.0 = State::DoRepeat3;
-        Ok(Some((input, Segment::DoRepeatOverflow)))
-    }
-    /// We are segmenting a `DEFINE` command, which consists of:
-    ///
-    ///   - The `DEFINE` keyword.
-    ///
-    ///   - An identifier.  We transform this into `Type::MacroName` instead of
-    ///     `Type::Identifier` because this identifier must never  be macro-expanded.
-    ///
-    ///   - Anything but `(`.
-    ///
-    ///   - `(` followed by a sequence of tokens possibly including balanced
-    ///     parentheses up to a final `)`.
-    ///
-    ///   - A sequence of any number of lines, one string per line, ending with
-    ///     `!ENDDEFINE`.  The first line is usually blank (that is, a newline
-    ///     follows the `(`).  The last line usually just has `!ENDDEFINE.` on
-    ///     it, but it can start with other tokens.  The whole
-    ///     DEFINE...!ENDDEFINE can be on a single line, even.
-    fn parse_define_1_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?.unwrap();
-        match segment {
-            Segment::Identifier if self.state.0 == State::Define1 => {
-                self.state.0 = State::Define2;
-                return Ok(Some((rest, Segment::MacroName)));
-            }
-            Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
-                // The DEFINE command is malformed because we reached its end
-                // without ever hitting a `(` token.  Transition back to general
-                // parsing.
-                self.state.0 = State::General;
-            }
-            Segment::Punct if input.starts_with('(') => {
-                self.state.0 = State::Define3;
-                self.nest = 1;
-            }
-            _ => (),
-        }
-        Ok(Some((rest, segment)))
-    }
-    fn parse_define_3<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?.unwrap();
-        match segment {
-            Segment::SeparateCommands | Segment::EndCommand | Segment::StartCommand => {
-                // The DEFINE command is malformed because we reached its end
-                // without ever hitting a `(` token.  Transition back to general
-                // parsing.
-                self.state.0 = State::General;
-            }
-            Segment::Punct if input.starts_with('(') => {
-                self.nest += 1;
-            }
-            Segment::Punct if input.starts_with(')') => {
-                self.nest -= 1;
-                if self.nest == 0 {
-                    self.state = (State::Define4, Substate::empty());
-                }
-            }
-            _ => (),
-        }
-        Ok(Some((rest, segment)))
-    }
-    fn find_enddefine(mut input: &str) -> Option<&str> {
-        loop {
-            input = skip_spaces_and_comments(input, true).unwrap();
-            let (Some(c), rest) = take(input, true).unwrap() else {
-                return None;
-            };
-            match c {
-                '!' if strip_prefix_ignore_ascii_case(input, "!ENDDEFINE").is_some() => {
-                    return Some(input)
-                }
-                '\'' | '"' => {
-                    let index = rest.find(c)?;
-                    input = &rest[index + 1..];
-                }
-                _ => input = rest,
-            }
-        }
-    }
-
-    /// We are in the body of a macro definition, looking for additional lines
-    /// of the body or `!ENDDEFINE`.
-    ///
-    /// In `State::Define4`, we're parsing the first line of the macro body (the
-    /// same line as the closing parenthesis in the argument definition).  In
-    /// `State::Define5`, we're on a later line.
-    fn parse_define_4_5<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let rest = self.parse_full_line(input, eof)?;
-        let line = &input[..input.len() - rest.len()];
-        if let Some(end) = Self::find_enddefine(line) {
-            // Macro ends at the !ENDDEFINE on this line.
-            self.state = (State::General, Substate::empty());
-            let (prefix, rest) = input.split_at(line.len() - end.len());
-            if prefix.is_empty() {
-                // Line starts with `!ENDDEFINE`.
-                self.push_rest(input, eof)
-            } else if prefix.trim_start().is_empty() {
-                // Line starts with spaces followed by `!ENDDEFINE`.
-                Ok(Some((rest, Segment::Spaces)))
-            } else {
-                // Line starts with some content followed by `!ENDDEFINE`.
-                Ok(Some((rest, Segment::MacroBody)))
-            }
-        } else {
-            // No `!ENDDEFINE`.  We have a full line of macro body.
-            //
-            // If the first line of the macro body is blank, we just report it
-            // as spaces, or not at all if there are no spaces, because it's not
-            // significant.
-            //
-            // However, if it's a later line, we need to report it because blank
-            // lines can have significance.
-            let segment = if self.state.0 == State::Define4 && line.trim_start().is_empty() {
-                if line.is_empty() {
-                    return self.parse_define_6(input, eof);
-                }
-                Segment::Spaces
-            } else {
-                Segment::MacroBody
-            };
-            self.state.0 = State::Define6;
-            Ok(Some((rest, segment)))
-        }
-    }
-    fn parse_define_6<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state.0 = State::Define5;
-        Ok(Some((rest, Segment::Newline)))
-    }
-    fn parse_begin_data_1<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?.unwrap();
-        if segment == Segment::Newline {
-            self.state.0 = State::BeginData2;
-        }
-        Ok(Some((rest, segment)))
-    }
-    fn parse_begin_data_2<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let (rest, segment) = self.subparse(input, eof)?.unwrap();
-        if segment == Segment::Newline {
-            self.state.0 = State::BeginData3;
-        }
-        Ok(Some((rest, segment)))
-    }
-    fn is_end_data(line: &str) -> bool {
-        let Some(rest) = strip_prefix_ignore_ascii_case(line, "END") else {
-            return false;
-        };
-        let (Some(c), rest) = take(rest, true).unwrap() else {
-            return false;
-        };
-        if !c.is_whitespace() {
-            return false;
-        };
-        let Some(rest) = strip_prefix_ignore_ascii_case(rest, "DATA") else {
-            return false;
-        };
-
-        let mut endcmd = false;
-        for c in rest.chars() {
-            match c {
-                '.' if endcmd => return false,
-                '.' => endcmd = true,
-                c if c.is_whitespace() => (),
-                _ => return false,
-            }
-        }
-        true
-    }
-    fn parse_begin_data_3<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let rest = self.parse_full_line(input, eof)?;
-        let line = &input[..input.len() - rest.len()];
-        if Self::is_end_data(line) {
-            self.state = (
-                State::General,
-                Substate::START_OF_COMMAND | Substate::START_OF_LINE,
-            );
-            self.push_rest(input, eof)
-        } else {
-            self.state.0 = State::BeginData4;
-            Ok(Some((rest, Segment::InlineData)))
-        }
-    }
-    fn parse_begin_data_4<'a>(
-        &mut self,
-        input: &'a str,
-        eof: bool,
-    ) -> Result<Option<(&'a str, Segment)>, Incomplete> {
-        let rest = self.parse_newline(input, eof)?.unwrap();
-        self.state.0 = State::BeginData3;
-        Ok(Some((rest, Segment::Newline)))
-    }
-}
-
-fn strip_prefix_ignore_ascii_case<'a>(line: &'a str, pattern: &str) -> Option<&'a str> {
-    line.get(..pattern.len()).and_then(|prefix| {
-        prefix
-            .eq_ignore_ascii_case(pattern)
-            .then(|| &line[pattern.len()..])
-    })
-}
-
-#[cfg(test)]
-mod test;
diff --git a/rust/pspp/src/output.rs b/rust/pspp/src/output.rs

new file mode 100644 (file)

index 0000000..c1e061e
--- /dev/null
+++ b/rust/pspp/src/output.rs
@@ -0,0 +1,317 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#![allow(dead_code)]
+use std::{
+    borrow::Cow,
+    sync::{Arc, OnceLock},
+};
+
+use enum_map::EnumMap;
+use pivot::PivotTable;
+use serde::Serialize;
+
+use crate::{
+    message::Diagnostic,
+    output::pivot::{Axis3, BorderStyle, Dimension, Group, Look},
+};
+
+use self::pivot::Value;
+
+pub mod cairo;
+pub mod csv;
+pub mod driver;
+pub mod html;
+pub mod json;
+pub mod page;
+pub mod pivot;
+pub mod render;
+pub mod spv;
+pub mod table;
+pub mod text;
+pub mod text_line;
+
+/// A single output item.
+#[derive(Serialize)]
+pub struct Item {
+    /// The localized label for the item that appears in the outline pane in the
+    /// output viewer and in PDF outlines.  This is `None` if no label has been
+    /// explicitly set.
+    label: Option<String>,
+
+    /// A locale-invariant identifier for the command that produced the output,
+    /// which may be `None` if unknown or if a command did not produce this
+    /// output.
+    command_name: Option<String>,
+
+    /// For a group item, this is true if the group's subtree should
+    /// be expanded in an outline view, false otherwise.
+    ///
+    /// For other kinds of output items, this is true to show the item's
+    /// content, false to hide it.  The item's label is always shown in an
+    /// outline view.
+    show: bool,
+
+    /// Item details.
+    details: Details,
+}
+
+impl Item {
+    pub fn new(details: impl Into<Details>) -> Self {
+        let details = details.into();
+        Self {
+            label: None,
+            command_name: details.command_name().cloned(),
+            show: true,
+            details,
+        }
+    }
+
+    pub fn label(&self) -> Cow<'static, str> {
+        match &self.label {
+            Some(label) => Cow::from(label.clone()),
+            None => self.details.label(),
+        }
+    }
+}
+
+impl<T> From<T> for Item
+where
+    T: Into<Details>,
+{
+    fn from(value: T) -> Self {
+        Self::new(value)
+    }
+}
+
+#[derive(Serialize)]
+pub enum Details {
+    Chart,
+    Image,
+    Group(Vec<Arc<Item>>),
+    Message(Box<Diagnostic>),
+    PageBreak,
+    Table(Box<PivotTable>),
+    Text(Box<Text>),
+}
+
+impl Details {
+    pub fn as_group(&self) -> Option<&[Arc<Item>]> {
+        match self {
+            Self::Group(children) => Some(children.as_slice()),
+            _ => None,
+        }
+    }
+
+    pub fn command_name(&self) -> Option<&String> {
+        match self {
+            Details::Chart
+            | Details::Image
+            | Details::Group(_)
+            | Details::Message(_)
+            | Details::PageBreak
+            | Details::Text(_) => None,
+            Details::Table(pivot_table) => pivot_table.command_c.as_ref(),
+        }
+    }
+
+    pub fn label(&self) -> Cow<'static, str> {
+        match self {
+            Details::Chart => todo!(),
+            Details::Image => todo!(),
+            Details::Group(_) => Cow::from("Group"),
+            Details::Message(diagnostic) => Cow::from(diagnostic.severity.as_title_str()),
+            Details::PageBreak => Cow::from("Page Break"),
+            Details::Table(pivot_table) => Cow::from(pivot_table.label()),
+            Details::Text(text) => Cow::from(text.type_.as_str()),
+        }
+    }
+
+    pub fn is_page_break(&self) -> bool {
+        matches!(self, Self::PageBreak)
+    }
+}
+
+impl<A> FromIterator<A> for Details
+where
+    A: Into<Arc<Item>>,
+{
+    fn from_iter<T>(iter: T) -> Self
+    where
+        T: IntoIterator<Item = A>,
+    {
+        Self::Group(iter.into_iter().map(|value| value.into()).collect())
+    }
+}
+
+impl From<Diagnostic> for Details {
+    fn from(value: Diagnostic) -> Self {
+        Self::Message(Box::new(value))
+    }
+}
+
+impl From<Box<Diagnostic>> for Details {
+    fn from(value: Box<Diagnostic>) -> Self {
+        Self::Message(value)
+    }
+}
+
+impl From<PivotTable> for Details {
+    fn from(value: PivotTable) -> Self {
+        Self::Table(Box::new(value))
+    }
+}
+
+impl From<Box<PivotTable>> for Details {
+    fn from(value: Box<PivotTable>) -> Self {
+        Self::Table(value)
+    }
+}
+
+impl From<Text> for Details {
+    fn from(value: Text) -> Self {
+        Self::Text(Box::new(value))
+    }
+}
+
+impl From<Box<Text>> for Details {
+    fn from(value: Box<Text>) -> Self {
+        Self::Text(value)
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct Text {
+    type_: TextType,
+
+    content: Value,
+}
+
+impl Text {
+    pub fn new_log(value: impl Into<Value>) -> Self {
+        Self {
+            type_: TextType::Log,
+            content: value.into(),
+        }
+    }
+}
+
+fn text_item_table_look() -> Arc<Look> {
+    static LOOK: OnceLock<Arc<Look>> = OnceLock::new();
+    LOOK.get_or_init(|| {
+        Arc::new({
+            let mut look = Look::default().with_borders(EnumMap::from_fn(|_| BorderStyle::none()));
+            for style in look.areas.values_mut() {
+                style.cell_style.margins = EnumMap::from_fn(|_| [0, 0]);
+            }
+            look
+        })
+    })
+    .clone()
+}
+
+impl From<Text> for PivotTable {
+    fn from(value: Text) -> Self {
+        let dimension =
+            Dimension::new(Group::new(Value::new_text("Text")).with(Value::new_user_text("null")))
+                .with_all_labels_hidden();
+        PivotTable::new([(Axis3::Y, dimension)])
+            .with_look(text_item_table_look())
+            .with_data([(&[0], value.content)])
+            .with_subtype(Value::new_user_text("Text"))
+    }
+}
+
+impl From<&Diagnostic> for Text {
+    fn from(value: &Diagnostic) -> Self {
+        Self::new_log(value.to_string())
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum TextType {
+    /// `TITLE` and `SUBTITLE` commands.
+    PageTitle,
+
+    /// Title,
+    Title,
+
+    /// Syntax printback logging.
+    Syntax,
+
+    /// Other logging.
+    Log,
+}
+
+impl TextType {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            TextType::PageTitle => "Page Title",
+            TextType::Title => "Title",
+            TextType::Syntax => "Log",
+            TextType::Log => "Log",
+        }
+    }
+
+    pub fn as_xml_str(&self) -> &'static str {
+        match self {
+            TextType::PageTitle => "page-title",
+            TextType::Title => "title",
+            TextType::Syntax | TextType::Log => "log",
+        }
+    }
+}
+
+pub struct ItemCursor {
+    cur: Option<Arc<Item>>,
+    stack: Vec<(Arc<Item>, usize)>,
+}
+
+impl ItemCursor {
+    pub fn new(start: Arc<Item>) -> Self {
+        Self {
+            cur: Some(start),
+            stack: Vec::new(),
+        }
+    }
+
+    pub fn cur(&self) -> Option<&Arc<Item>> {
+        self.cur.as_ref()
+    }
+
+    pub fn next(&mut self) {
+        let Some(cur) = self.cur.take() else {
+            return;
+        };
+        match cur.details {
+            Details::Group(ref children) if !children.is_empty() => {
+                self.cur = Some(children[0].clone());
+                self.stack.push((cur, 1));
+            }
+            _ => {
+                while let Some((item, index)) = self.stack.pop() {
+                    let children = item.details.as_group().unwrap();
+                    if index < children.len() {
+                        self.cur = Some(children[index].clone());
+                        self.stack.push((item, index + 1));
+                        return;
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/rust/pspp/src/output/cairo.rs b/rust/pspp/src/output/cairo.rs

new file mode 100644 (file)

index 0000000..0d6782f
--- /dev/null
+++ b/rust/pspp/src/output/cairo.rs
@@ -0,0 +1,52 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+use pango::SCALE;
+
+use crate::output::pivot::HorzAlign;
+
+mod driver;
+pub mod fsm;
+pub mod pager;
+
+pub use driver::{CairoConfig, CairoDriver};
+
+/// Conversion from 1/96" units ("pixels") to Cairo/Pango units.
+fn px_to_xr(x: usize) -> usize {
+    x * 3 * (SCALE as usize * 72 / 96) / 3
+}
+
+fn xr_to_pt(x: usize) -> f64 {
+    x as f64 / SCALE as f64
+}
+
+fn horz_align_to_pango(horz_align: HorzAlign) -> pango::Alignment {
+    match horz_align {
+        HorzAlign::Right | HorzAlign::Decimal { .. } => pango::Alignment::Right,
+        HorzAlign::Left => pango::Alignment::Left,
+        HorzAlign::Center => pango::Alignment::Center,
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use crate::output::cairo::{CairoConfig, CairoDriver};
+
+    #[test]
+    fn create() {
+        CairoDriver::new(&CairoConfig::new("test.pdf")).unwrap();
+    }
+}
diff --git a/rust/pspp/src/output/cairo/mod.rs b/rust/pspp/src/output/cairo/mod.rs

deleted file mode 100644 (file)

index 0d6782f..0000000
--- a/rust/pspp/src/output/cairo/mod.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-use pango::SCALE;
-
-use crate::output::pivot::HorzAlign;
-
-mod driver;
-pub mod fsm;
-pub mod pager;
-
-pub use driver::{CairoConfig, CairoDriver};
-
-/// Conversion from 1/96" units ("pixels") to Cairo/Pango units.
-fn px_to_xr(x: usize) -> usize {
-    x * 3 * (SCALE as usize * 72 / 96) / 3
-}
-
-fn xr_to_pt(x: usize) -> f64 {
-    x as f64 / SCALE as f64
-}
-
-fn horz_align_to_pango(horz_align: HorzAlign) -> pango::Alignment {
-    match horz_align {
-        HorzAlign::Right | HorzAlign::Decimal { .. } => pango::Alignment::Right,
-        HorzAlign::Left => pango::Alignment::Left,
-        HorzAlign::Center => pango::Alignment::Center,
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use crate::output::cairo::{CairoConfig, CairoDriver};
-
-    #[test]
-    fn create() {
-        CairoDriver::new(&CairoConfig::new("test.pdf")).unwrap();
-    }
-}
diff --git a/rust/pspp/src/output/mod.rs b/rust/pspp/src/output/mod.rs

deleted file mode 100644 (file)

index c1e061e..0000000
--- a/rust/pspp/src/output/mod.rs
+++ /dev/null
@@ -1,317 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-#![allow(dead_code)]
-use std::{
-    borrow::Cow,
-    sync::{Arc, OnceLock},
-};
-
-use enum_map::EnumMap;
-use pivot::PivotTable;
-use serde::Serialize;
-
-use crate::{
-    message::Diagnostic,
-    output::pivot::{Axis3, BorderStyle, Dimension, Group, Look},
-};
-
-use self::pivot::Value;
-
-pub mod cairo;
-pub mod csv;
-pub mod driver;
-pub mod html;
-pub mod json;
-pub mod page;
-pub mod pivot;
-pub mod render;
-pub mod spv;
-pub mod table;
-pub mod text;
-pub mod text_line;
-
-/// A single output item.
-#[derive(Serialize)]
-pub struct Item {
-    /// The localized label for the item that appears in the outline pane in the
-    /// output viewer and in PDF outlines.  This is `None` if no label has been
-    /// explicitly set.
-    label: Option<String>,
-
-    /// A locale-invariant identifier for the command that produced the output,
-    /// which may be `None` if unknown or if a command did not produce this
-    /// output.
-    command_name: Option<String>,
-
-    /// For a group item, this is true if the group's subtree should
-    /// be expanded in an outline view, false otherwise.
-    ///
-    /// For other kinds of output items, this is true to show the item's
-    /// content, false to hide it.  The item's label is always shown in an
-    /// outline view.
-    show: bool,
-
-    /// Item details.
-    details: Details,
-}
-
-impl Item {
-    pub fn new(details: impl Into<Details>) -> Self {
-        let details = details.into();
-        Self {
-            label: None,
-            command_name: details.command_name().cloned(),
-            show: true,
-            details,
-        }
-    }
-
-    pub fn label(&self) -> Cow<'static, str> {
-        match &self.label {
-            Some(label) => Cow::from(label.clone()),
-            None => self.details.label(),
-        }
-    }
-}
-
-impl<T> From<T> for Item
-where
-    T: Into<Details>,
-{
-    fn from(value: T) -> Self {
-        Self::new(value)
-    }
-}
-
-#[derive(Serialize)]
-pub enum Details {
-    Chart,
-    Image,
-    Group(Vec<Arc<Item>>),
-    Message(Box<Diagnostic>),
-    PageBreak,
-    Table(Box<PivotTable>),
-    Text(Box<Text>),
-}
-
-impl Details {
-    pub fn as_group(&self) -> Option<&[Arc<Item>]> {
-        match self {
-            Self::Group(children) => Some(children.as_slice()),
-            _ => None,
-        }
-    }
-
-    pub fn command_name(&self) -> Option<&String> {
-        match self {
-            Details::Chart
-            | Details::Image
-            | Details::Group(_)
-            | Details::Message(_)
-            | Details::PageBreak
-            | Details::Text(_) => None,
-            Details::Table(pivot_table) => pivot_table.command_c.as_ref(),
-        }
-    }
-
-    pub fn label(&self) -> Cow<'static, str> {
-        match self {
-            Details::Chart => todo!(),
-            Details::Image => todo!(),
-            Details::Group(_) => Cow::from("Group"),
-            Details::Message(diagnostic) => Cow::from(diagnostic.severity.as_title_str()),
-            Details::PageBreak => Cow::from("Page Break"),
-            Details::Table(pivot_table) => Cow::from(pivot_table.label()),
-            Details::Text(text) => Cow::from(text.type_.as_str()),
-        }
-    }
-
-    pub fn is_page_break(&self) -> bool {
-        matches!(self, Self::PageBreak)
-    }
-}
-
-impl<A> FromIterator<A> for Details
-where
-    A: Into<Arc<Item>>,
-{
-    fn from_iter<T>(iter: T) -> Self
-    where
-        T: IntoIterator<Item = A>,
-    {
-        Self::Group(iter.into_iter().map(|value| value.into()).collect())
-    }
-}
-
-impl From<Diagnostic> for Details {
-    fn from(value: Diagnostic) -> Self {
-        Self::Message(Box::new(value))
-    }
-}
-
-impl From<Box<Diagnostic>> for Details {
-    fn from(value: Box<Diagnostic>) -> Self {
-        Self::Message(value)
-    }
-}
-
-impl From<PivotTable> for Details {
-    fn from(value: PivotTable) -> Self {
-        Self::Table(Box::new(value))
-    }
-}
-
-impl From<Box<PivotTable>> for Details {
-    fn from(value: Box<PivotTable>) -> Self {
-        Self::Table(value)
-    }
-}
-
-impl From<Text> for Details {
-    fn from(value: Text) -> Self {
-        Self::Text(Box::new(value))
-    }
-}
-
-impl From<Box<Text>> for Details {
-    fn from(value: Box<Text>) -> Self {
-        Self::Text(value)
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct Text {
-    type_: TextType,
-
-    content: Value,
-}
-
-impl Text {
-    pub fn new_log(value: impl Into<Value>) -> Self {
-        Self {
-            type_: TextType::Log,
-            content: value.into(),
-        }
-    }
-}
-
-fn text_item_table_look() -> Arc<Look> {
-    static LOOK: OnceLock<Arc<Look>> = OnceLock::new();
-    LOOK.get_or_init(|| {
-        Arc::new({
-            let mut look = Look::default().with_borders(EnumMap::from_fn(|_| BorderStyle::none()));
-            for style in look.areas.values_mut() {
-                style.cell_style.margins = EnumMap::from_fn(|_| [0, 0]);
-            }
-            look
-        })
-    })
-    .clone()
-}
-
-impl From<Text> for PivotTable {
-    fn from(value: Text) -> Self {
-        let dimension =
-            Dimension::new(Group::new(Value::new_text("Text")).with(Value::new_user_text("null")))
-                .with_all_labels_hidden();
-        PivotTable::new([(Axis3::Y, dimension)])
-            .with_look(text_item_table_look())
-            .with_data([(&[0], value.content)])
-            .with_subtype(Value::new_user_text("Text"))
-    }
-}
-
-impl From<&Diagnostic> for Text {
-    fn from(value: &Diagnostic) -> Self {
-        Self::new_log(value.to_string())
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum TextType {
-    /// `TITLE` and `SUBTITLE` commands.
-    PageTitle,
-
-    /// Title,
-    Title,
-
-    /// Syntax printback logging.
-    Syntax,
-
-    /// Other logging.
-    Log,
-}
-
-impl TextType {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            TextType::PageTitle => "Page Title",
-            TextType::Title => "Title",
-            TextType::Syntax => "Log",
-            TextType::Log => "Log",
-        }
-    }
-
-    pub fn as_xml_str(&self) -> &'static str {
-        match self {
-            TextType::PageTitle => "page-title",
-            TextType::Title => "title",
-            TextType::Syntax | TextType::Log => "log",
-        }
-    }
-}
-
-pub struct ItemCursor {
-    cur: Option<Arc<Item>>,
-    stack: Vec<(Arc<Item>, usize)>,
-}
-
-impl ItemCursor {
-    pub fn new(start: Arc<Item>) -> Self {
-        Self {
-            cur: Some(start),
-            stack: Vec::new(),
-        }
-    }
-
-    pub fn cur(&self) -> Option<&Arc<Item>> {
-        self.cur.as_ref()
-    }
-
-    pub fn next(&mut self) {
-        let Some(cur) = self.cur.take() else {
-            return;
-        };
-        match cur.details {
-            Details::Group(ref children) if !children.is_empty() => {
-                self.cur = Some(children[0].clone());
-                self.stack.push((cur, 1));
-            }
-            _ => {
-                while let Some((item, index)) = self.stack.pop() {
-                    let children = item.details.as_group().unwrap();
-                    if index < children.len() {
-                        self.cur = Some(children[index].clone());
-                        self.stack.push((item, index + 1));
-                        return;
-                    }
-                }
-            }
-        }
-    }
-}
diff --git a/rust/pspp/src/output/pivot.rs b/rust/pspp/src/output/pivot.rs

new file mode 100644 (file)

index 0000000..92133e2
--- /dev/null
+++ b/rust/pspp/src/output/pivot.rs
@@ -0,0 +1,2859 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Pivot tables.
+//!
+//! Pivot tables are PSPP's primary form of output.  They are analogous to the
+//! pivot tables you might be familiar with from spreadsheets and databases.
+//! See <https://en.wikipedia.org/wiki/Pivot_table> for a brief introduction to
+//! the overall concept of a pivot table.
+//!
+//! In PSPP, the most important internal pieces of a pivot table are:
+//!
+//! - Title.  Every pivot table has a title that is displayed above it.  It also
+//!   has an optional caption (displayed below it) and corner text (displayed in
+//!   the upper left corner).
+//!
+//! - Dimensions.  A dimension consists of zero or more categories.  A category
+//!   has a label, such as "df" or "Asymp. Sig." or 123 or a variable name.  The
+//!   categories are the leaves of a tree whose non-leaf nodes form groups of
+//!   categories.  The tree always has a root group whose label is the name of
+//!   the dimension.
+//!
+//! - Axes.  A table has three axes: column, row, and layer.  Each dimension is
+//!   assigned to an axis, and each axis has zero or more dimensions.  When an
+//!   axis has more than one dimension, they are ordered from innermost to
+//!   outermost.
+//!
+//! - Data.  A table's data consists of zero or more cells.  Each cell maps from
+//!   a category for each dimension to a value, which is commonly a number but
+//!   could also be a variable name or an arbitrary text string.
+
+use std::{
+    collections::HashMap,
+    fmt::{Debug, Display, Write},
+    io::Read,
+    iter::{once, repeat, repeat_n, FusedIterator},
+    ops::{Index, IndexMut, Not, Range, RangeInclusive},
+    str::{from_utf8, FromStr, Utf8Error},
+    sync::{Arc, OnceLock},
+};
+
+use binrw::Error as BinError;
+use chrono::NaiveDateTime;
+pub use color::ParseError as ParseColorError;
+use color::{palette::css::TRANSPARENT, AlphaColor, Rgba8, Srgb};
+use enum_iterator::Sequence;
+use enum_map::{enum_map, Enum, EnumMap};
+use look_xml::TableProperties;
+use quick_xml::{de::from_str, DeError};
+use serde::{
+    de::Visitor,
+    ser::{SerializeMap, SerializeStruct},
+    Deserialize, Serialize, Serializer,
+};
+use smallstr::SmallString;
+use smallvec::SmallVec;
+use thiserror::Error as ThisError;
+use tlo::parse_tlo;
+
+use crate::{
+    data::{ByteString, Datum, EncodedString, RawString},
+    format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
+    settings::{Settings, Show},
+    util::ToSmallString,
+    variable::{VarType, Variable},
+};
+
+pub mod output;
+
+mod look_xml;
+#[cfg(test)]
+pub mod test;
+mod tlo;
+
+/// Areas of a pivot table for styling purposes.
+#[derive(Copy, Clone, Debug, Default, Enum, PartialEq, Eq)]
+pub enum Area {
+    Title,
+    Caption,
+
+    /// Footnotes,
+    Footer,
+
+    // Top-left corner.
+    Corner,
+
+    /// Labels for columns ([Axis2::X]) and rows ([Axis2::Y]).
+    Labels(Axis2),
+
+    #[default]
+    Data,
+
+    /// Layer indication.
+    Layers,
+}
+
+impl Display for Area {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Area::Title => write!(f, "title"),
+            Area::Caption => write!(f, "caption"),
+            Area::Footer => write!(f, "footer"),
+            Area::Corner => write!(f, "corner"),
+            Area::Labels(axis2) => write!(f, "labels({axis2})"),
+            Area::Data => write!(f, "data"),
+            Area::Layers => write!(f, "layers"),
+        }
+    }
+}
+
+impl Serialize for Area {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_str(&self.to_small_string::<16>())
+    }
+}
+
+impl Area {
+    fn default_cell_style(self) -> CellStyle {
+        use HorzAlign::*;
+        use VertAlign::*;
+        let (horz_align, vert_align, hmargins, vmargins) = match self {
+            Area::Title => (Some(Center), Middle, [8, 11], [1, 8]),
+            Area::Caption => (Some(Left), Top, [8, 11], [1, 1]),
+            Area::Footer => (Some(Left), Top, [11, 8], [2, 3]),
+            Area::Corner => (Some(Left), Bottom, [8, 11], [1, 1]),
+            Area::Labels(Axis2::X) => (Some(Center), Top, [8, 11], [1, 3]),
+            Area::Labels(Axis2::Y) => (Some(Left), Top, [8, 11], [1, 3]),
+            Area::Data => (None, Top, [8, 11], [1, 1]),
+            Area::Layers => (Some(Left), Bottom, [8, 11], [1, 3]),
+        };
+        CellStyle {
+            horz_align,
+            vert_align,
+            margins: enum_map! { Axis2::X => hmargins, Axis2::Y => vmargins },
+        }
+    }
+
+    fn default_font_style(self) -> FontStyle {
+        FontStyle {
+            bold: self == Area::Title,
+            italic: false,
+            underline: false,
+            markup: false,
+            font: String::from("Sans Serif"),
+            fg: [Color::BLACK; 2],
+            bg: [Color::WHITE; 2],
+            size: 9,
+        }
+    }
+
+    fn default_area_style(self) -> AreaStyle {
+        AreaStyle {
+            cell_style: self.default_cell_style(),
+            font_style: self.default_font_style(),
+        }
+    }
+}
+
+/// Table borders for styling purposes.
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)]
+pub enum Border {
+    Title,
+    OuterFrame(BoxBorder),
+    InnerFrame(BoxBorder),
+    Dimension(RowColBorder),
+    Category(RowColBorder),
+    DataLeft,
+    DataTop,
+}
+
+impl Border {
+    pub fn default_stroke(self) -> Stroke {
+        match self {
+            Self::InnerFrame(_) | Self::DataLeft | Self::DataTop => Stroke::Thick,
+            Self::Dimension(
+                RowColBorder(HeadingRegion::Columns, _) | RowColBorder(_, Axis2::X),
+            )
+            | Self::Category(RowColBorder(HeadingRegion::Columns, _)) => Stroke::Solid,
+            _ => Stroke::None,
+        }
+    }
+    pub fn default_border_style(self) -> BorderStyle {
+        BorderStyle {
+            stroke: self.default_stroke(),
+            color: Color::BLACK,
+        }
+    }
+
+    fn fallback(self) -> Self {
+        match self {
+            Self::Title
+            | Self::OuterFrame(_)
+            | Self::InnerFrame(_)
+            | Self::DataLeft
+            | Self::DataTop
+            | Self::Category(_) => self,
+            Self::Dimension(row_col_border) => Self::Category(row_col_border),
+        }
+    }
+}
+
+impl Display for Border {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Border::Title => write!(f, "title"),
+            Border::OuterFrame(box_border) => write!(f, "outer_frame({box_border})"),
+            Border::InnerFrame(box_border) => write!(f, "inner_frame({box_border})"),
+            Border::Dimension(row_col_border) => write!(f, "dimension({row_col_border})"),
+            Border::Category(row_col_border) => write!(f, "category({row_col_border})"),
+            Border::DataLeft => write!(f, "data(left)"),
+            Border::DataTop => write!(f, "data(top)"),
+        }
+    }
+}
+
+impl Serialize for Border {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_str(&self.to_small_string::<32>())
+    }
+}
+
+/// The borders on a box.
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum BoxBorder {
+    Left,
+    Top,
+    Right,
+    Bottom,
+}
+
+impl BoxBorder {
+    fn as_str(&self) -> &'static str {
+        match self {
+            BoxBorder::Left => "left",
+            BoxBorder::Top => "top",
+            BoxBorder::Right => "right",
+            BoxBorder::Bottom => "bottom",
+        }
+    }
+}
+
+impl Display for BoxBorder {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
+/// Borders between rows and columns.
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub struct RowColBorder(
+    /// Row or column headings.
+    pub HeadingRegion,
+    /// Horizontal ([Axis2::X]) or vertical ([Axis2::Y]) borders.
+    pub Axis2,
+);
+
+impl Display for RowColBorder {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}:{}", self.0, self.1)
+    }
+}
+
+/// Sizing for rows or columns of a rendered table.
+///
+/// The comments below talk about columns and their widths but they apply
+/// equally to rows and their heights.
+#[derive(Default, Clone, Debug, Serialize)]
+pub struct Sizing {
+    /// Specific column widths, in 1/96" units.
+    widths: Vec<i32>,
+
+    /// Specific page breaks: 0-based columns after which a page break must
+    /// occur, e.g. a value of 1 requests a break after the second column.
+    breaks: Vec<usize>,
+
+    /// Keeps: columns to keep together on a page if possible.
+    keeps: Vec<Range<usize>>,
+}
+
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Sequence, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum Axis3 {
+    X,
+    Y,
+    Z,
+}
+
+impl Axis3 {
+    fn transpose(&self) -> Option<Self> {
+        match self {
+            Axis3::X => Some(Axis3::Y),
+            Axis3::Y => Some(Axis3::X),
+            Axis3::Z => None,
+        }
+    }
+}
+
+impl From<Axis2> for Axis3 {
+    fn from(axis2: Axis2) -> Self {
+        match axis2 {
+            Axis2::X => Self::X,
+            Axis2::Y => Self::Y,
+        }
+    }
+}
+
+/// An axis within a pivot table.
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct Axis {
+    /// `dimensions[0]` is the innermost dimension.
+    pub dimensions: Vec<usize>,
+}
+
+pub struct AxisIterator {
+    indexes: SmallVec<[usize; 4]>,
+    lengths: SmallVec<[usize; 4]>,
+    done: bool,
+}
+
+impl FusedIterator for AxisIterator {}
+impl Iterator for AxisIterator {
+    type Item = SmallVec<[usize; 4]>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.done {
+            None
+        } else {
+            let retval = self.indexes.clone();
+            for (index, len) in self.indexes.iter_mut().zip(self.lengths.iter().copied()) {
+                *index += 1;
+                if *index < len {
+                    return Some(retval);
+                };
+                *index = 0;
+            }
+            self.done = true;
+            Some(retval)
+        }
+    }
+}
+
+impl PivotTable {
+    pub fn with_look(mut self, look: Arc<Look>) -> Self {
+        self.look = look;
+        self
+    }
+    pub fn insert_number(&mut self, data_indexes: &[usize], number: Option<f64>, class: Class) {
+        let format = match class {
+            Class::Other => Settings::global().default_format,
+            Class::Integer => Format::F40,
+            Class::Correlations => Format::F40_3,
+            Class::Significance => Format::F40_3,
+            Class::Percent => Format::PCT40_1,
+            Class::Residual => Format::F40_2,
+            Class::Count => Format::F40, // XXX
+        };
+        let value = Value::new(ValueInner::Number(NumberValue {
+            show: None,
+            format,
+            honor_small: class == Class::Other,
+            value: number,
+            variable: None,
+            value_label: None,
+        }));
+        self.insert(data_indexes, value);
+    }
+
+    pub fn with_footnotes(mut self, footnotes: Footnotes) -> Self {
+        debug_assert!(self.footnotes.is_empty());
+        self.footnotes = footnotes;
+        self
+    }
+    fn axis_values(&self, axis: Axis3) -> AxisIterator {
+        AxisIterator {
+            indexes: repeat_n(0, self.axes[axis].dimensions.len()).collect(),
+            lengths: self.axis_dimensions(axis).map(|d| d.len()).collect(),
+            done: self.axis_extent(axis) == 0,
+        }
+    }
+
+    fn axis_extent(&self, axis: Axis3) -> usize {
+        self.axis_dimensions(axis).map(|d| d.len()).product()
+    }
+}
+
+/// Dimensions.
+///
+/// A [Dimension] identifies the categories associated with a single dimension
+/// within a multidimensional pivot table.
+///
+/// A dimension contains a collection of categories, which are the leaves in a
+/// tree of groups.
+///
+/// (A dimension or a group can contain zero categories, but this is unusual.
+/// If a dimension contains no categories, then its table cannot contain any
+/// data.)
+#[derive(Clone, Debug, Serialize)]
+pub struct Dimension {
+    /// Hierarchy of categories within the dimension.  The groups and categories
+    /// are sorted in the order that should be used for display.  This might be
+    /// different from the original order produced for output if the user
+    /// adjusted it.
+    ///
+    /// The root must always be a group, although it is allowed to have no
+    /// subcategories.
+    pub root: Group,
+
+    /// Ordering of leaves for presentation.
+    ///
+    /// This is a permutation of `0..n` where `n` is the number of leaves.  It
+    /// maps from an index in presentation order to an index in data order.
+    pub presentation_order: Vec<usize>,
+
+    /// Display.
+    pub hide_all_labels: bool,
+}
+
+pub type GroupVec<'a> = SmallVec<[&'a Group; 4]>;
+pub struct Path<'a> {
+    groups: GroupVec<'a>,
+    leaf: &'a Leaf,
+}
+
+impl Dimension {
+    pub fn new(root: Group) -> Self {
+        Dimension {
+            presentation_order: (0..root.len()).collect(),
+            root,
+            hide_all_labels: false,
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    /// Returns the number of (leaf) categories in this dimension.
+    pub fn len(&self) -> usize {
+        self.root.len()
+    }
+
+    pub fn nth_leaf(&self, index: usize) -> Option<&Leaf> {
+        self.root.nth_leaf(index)
+    }
+
+    pub fn leaf_path(&self, index: usize) -> Option<Path<'_>> {
+        self.root.leaf_path(index, SmallVec::new())
+    }
+
+    pub fn with_all_labels_hidden(self) -> Self {
+        Self {
+            hide_all_labels: true,
+            ..self
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct Group {
+    #[serde(skip)]
+    len: usize,
+    pub name: Box<Value>,
+
+    /// The child categories.
+    ///
+    /// A group usually has multiple children, but it is allowed to have
+    /// only one or even (pathologically) none.
+    pub children: Vec<Category>,
+
+    /// Whether to show the group's label.
+    pub show_label: bool,
+}
+
+impl Group {
+    pub fn new(name: impl Into<Value>) -> Self {
+        Self::with_capacity(name, 0)
+    }
+
+    pub fn with_capacity(name: impl Into<Value>, capacity: usize) -> Self {
+        Self {
+            len: 0,
+            name: Box::new(name.into()),
+            children: Vec::with_capacity(capacity),
+            show_label: false,
+        }
+    }
+
+    pub fn push(&mut self, child: impl Into<Category>) {
+        let mut child = child.into();
+        if let Category::Group(group) = &mut child {
+            group.show_label = true;
+        }
+        self.len += child.len();
+        self.children.push(child);
+    }
+
+    pub fn with(mut self, child: impl Into<Category>) -> Self {
+        self.push(child);
+        self
+    }
+
+    pub fn with_multiple<C>(mut self, children: impl IntoIterator<Item = C>) -> Self
+    where
+        C: Into<Category>,
+    {
+        self.extend(children);
+        self
+    }
+
+    pub fn with_label_shown(self) -> Self {
+        self.with_show_label(true)
+    }
+
+    pub fn with_show_label(mut self, show_label: bool) -> Self {
+        self.show_label = show_label;
+        self
+    }
+
+    pub fn nth_leaf(&self, mut index: usize) -> Option<&Leaf> {
+        for child in &self.children {
+            let len = child.len();
+            if index < len {
+                return child.nth_leaf(index);
+            }
+            index -= len;
+        }
+        None
+    }
+
+    pub fn leaf_path<'a>(&'a self, mut index: usize, mut groups: GroupVec<'a>) -> Option<Path<'a>> {
+        for child in &self.children {
+            let len = child.len();
+            if index < len {
+                groups.push(self);
+                return child.leaf_path(index, groups);
+            }
+            index -= len;
+        }
+        None
+    }
+
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    pub fn name(&self) -> &Value {
+        &self.name
+    }
+}
+
+impl<C> Extend<C> for Group
+where
+    C: Into<Category>,
+{
+    fn extend<T: IntoIterator<Item = C>>(&mut self, children: T) {
+        let children = children.into_iter();
+        self.children.reserve(children.size_hint().0);
+        for child in children {
+            self.push(child);
+        }
+    }
+}
+
+#[derive(Clone, Debug, Default, Serialize)]
+pub struct Footnotes(pub Vec<Arc<Footnote>>);
+
+impl Footnotes {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn push(&mut self, footnote: Footnote) -> Arc<Footnote> {
+        let footnote = Arc::new(footnote.with_index(self.0.len()));
+        self.0.push(footnote.clone());
+        footnote
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct Leaf {
+    name: Box<Value>,
+}
+
+impl Leaf {
+    pub fn new(name: Value) -> Self {
+        Self {
+            name: Box::new(name),
+        }
+    }
+    pub fn name(&self) -> &Value {
+        &self.name
+    }
+}
+
+impl Serialize for Leaf {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        self.name.serialize(serializer)
+    }
+}
+
+/// Pivot result classes.
+///
+/// These are used to mark [Leaf] categories as having particular types of data,
+/// to set their numeric formats.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum Class {
+    Other,
+    Integer,
+    Correlations,
+    Significance,
+    Percent,
+    Residual,
+    Count,
+}
+
+/// A pivot_category is a leaf (a category) or a group.
+#[derive(Clone, Debug, Serialize)]
+pub enum Category {
+    Group(Group),
+    Leaf(Leaf),
+}
+
+impl Category {
+    pub fn name(&self) -> &Value {
+        match self {
+            Category::Group(group) => &group.name,
+            Category::Leaf(leaf) => &leaf.name,
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+
+    pub fn len(&self) -> usize {
+        match self {
+            Category::Group(group) => group.len,
+            Category::Leaf(_) => 1,
+        }
+    }
+
+    pub fn nth_leaf(&self, index: usize) -> Option<&Leaf> {
+        match self {
+            Category::Group(group) => group.nth_leaf(index),
+            Category::Leaf(leaf) => {
+                if index == 0 {
+                    Some(leaf)
+                } else {
+                    None
+                }
+            }
+        }
+    }
+
+    pub fn leaf_path<'a>(&'a self, index: usize, groups: GroupVec<'a>) -> Option<Path<'a>> {
+        match self {
+            Category::Group(group) => group.leaf_path(index, groups),
+            Category::Leaf(leaf) => {
+                if index == 0 {
+                    Some(Path { groups, leaf })
+                } else {
+                    None
+                }
+            }
+        }
+    }
+
+    pub fn show_label(&self) -> bool {
+        match self {
+            Category::Group(group) => group.show_label,
+            Category::Leaf(_) => true,
+        }
+    }
+}
+
+impl From<Group> for Category {
+    fn from(group: Group) -> Self {
+        Self::Group(group)
+    }
+}
+
+impl From<Leaf> for Category {
+    fn from(group: Leaf) -> Self {
+        Self::Leaf(group)
+    }
+}
+
+impl From<Value> for Category {
+    fn from(name: Value) -> Self {
+        Leaf::new(name).into()
+    }
+}
+
+impl From<&Variable> for Category {
+    fn from(variable: &Variable) -> Self {
+        Value::new_variable(variable).into()
+    }
+}
+
+impl From<&str> for Category {
+    fn from(name: &str) -> Self {
+        Self::Leaf(Leaf::new(Value::new_text(name)))
+    }
+}
+
+impl From<String> for Category {
+    fn from(name: String) -> Self {
+        Self::Leaf(Leaf::new(Value::new_text(name)))
+    }
+}
+
+impl From<&String> for Category {
+    fn from(name: &String) -> Self {
+        Self::Leaf(Leaf::new(Value::new_text(name)))
+    }
+}
+
+/// Styling for a pivot table.
+///
+/// The division between this and the style information in [PivotTable] seems
+/// fairly arbitrary.  The ultimate reason for the division is simply because
+/// that's how SPSS documentation and file formats do it.
+#[derive(Clone, Debug, Serialize)]
+pub struct Look {
+    pub name: Option<String>,
+
+    /// Whether to hide rows or columns whose cells are all empty.
+    pub hide_empty: bool,
+
+    pub row_label_position: LabelPosition,
+
+    /// Ranges of column widths in the two heading regions, in 1/96" units.
+    pub heading_widths: EnumMap<HeadingRegion, RangeInclusive<usize>>,
+
+    /// Kind of markers to use for footnotes.
+    pub footnote_marker_type: FootnoteMarkerType,
+
+    /// Where to put the footnote markers.
+    pub footnote_marker_position: FootnoteMarkerPosition,
+
+    /// Styles for areas of the pivot table.
+    pub areas: EnumMap<Area, AreaStyle>,
+
+    /// Styles for borders in the pivot table.
+    pub borders: EnumMap<Border, BorderStyle>,
+
+    pub print_all_layers: bool,
+
+    pub paginate_layers: bool,
+
+    pub shrink_to_fit: EnumMap<Axis2, bool>,
+
+    pub top_continuation: bool,
+
+    pub bottom_continuation: bool,
+
+    pub continuation: Option<String>,
+
+    pub n_orphan_lines: usize,
+}
+
+impl Look {
+    pub fn with_omit_empty(mut self, omit_empty: bool) -> Self {
+        self.hide_empty = omit_empty;
+        self
+    }
+    pub fn with_row_label_position(mut self, row_label_position: LabelPosition) -> Self {
+        self.row_label_position = row_label_position;
+        self
+    }
+    pub fn with_borders(mut self, borders: EnumMap<Border, BorderStyle>) -> Self {
+        self.borders = borders;
+        self
+    }
+}
+
+impl Default for Look {
+    fn default() -> Self {
+        Self {
+            name: None,
+            hide_empty: true,
+            row_label_position: LabelPosition::default(),
+            heading_widths: EnumMap::from_fn(|region| match region {
+                HeadingRegion::Rows => 36..=72,
+                HeadingRegion::Columns => 36..=120,
+            }),
+            footnote_marker_type: FootnoteMarkerType::default(),
+            footnote_marker_position: FootnoteMarkerPosition::default(),
+            areas: EnumMap::from_fn(Area::default_area_style),
+            borders: EnumMap::from_fn(Border::default_border_style),
+            print_all_layers: false,
+            paginate_layers: false,
+            shrink_to_fit: EnumMap::from_fn(|_| false),
+            top_continuation: false,
+            bottom_continuation: false,
+            continuation: None,
+            n_orphan_lines: 0,
+        }
+    }
+}
+
+#[derive(ThisError, Debug)]
+pub enum ParseLookError {
+    #[error(transparent)]
+    XmlError(#[from] DeError),
+
+    #[error(transparent)]
+    Utf8Error(#[from] Utf8Error),
+
+    #[error(transparent)]
+    BinError(#[from] BinError),
+
+    #[error(transparent)]
+    IoError(#[from] std::io::Error),
+}
+
+impl Look {
+    pub fn shared_default() -> Arc<Look> {
+        static LOOK: OnceLock<Arc<Look>> = OnceLock::new();
+        LOOK.get_or_init(|| Arc::new(Look::default())).clone()
+    }
+
+    pub fn from_xml(xml: &str) -> Result<Self, ParseLookError> {
+        Ok(from_str::<TableProperties>(xml)
+            .map_err(ParseLookError::from)?
+            .into())
+    }
+
+    pub fn from_binary(tlo: &[u8]) -> Result<Self, ParseLookError> {
+        parse_tlo(tlo).map_err(ParseLookError::from)
+    }
+
+    pub fn from_data(data: &[u8]) -> Result<Self, ParseLookError> {
+        if data.starts_with(b"\xff\xff\0\0") {
+            Self::from_binary(data)
+        } else {
+            Self::from_xml(from_utf8(data).map_err(ParseLookError::from)?)
+        }
+    }
+
+    pub fn from_reader<R>(mut reader: R) -> Result<Self, ParseLookError>
+    where
+        R: Read,
+    {
+        let mut buffer = Vec::new();
+        reader
+            .read_to_end(&mut buffer)
+            .map_err(ParseLookError::from)?;
+        Self::from_data(&buffer)
+    }
+}
+
+/// Position for group labels.
+#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
+pub enum LabelPosition {
+    /// Hierarachically enclosing the categories.
+    ///
+    /// For column labels, group labels appear above the categories.  For row
+    /// labels, group labels appear to the left of the categories.
+    ///
+    /// ```text
+    /// ┌────┬──────────────┐   ┌─────────┬──────────┐
+    /// │    │    nested    │   │         │ columns  │
+    /// │    ├────┬────┬────┤   ├──────┬──┼──────────┤
+    /// │    │ a1 │ a2 │ a3 │   │      │a1│...data...│
+    /// ├────┼────┼────┼────┤   │nested│a2│...data...│
+    /// │    │data│data│data│   │      │a3│...data...│
+    /// │    │ .  │ .  │ .  │   └──────┴──┴──────────┘
+    /// │rows│ .  │ .  │ .  │
+    /// │    │ .  │ .  │ .  │
+    /// └────┴────┴────┴────┘
+    /// ```
+    #[serde(rename = "nested")]
+    Nested,
+
+    /// In the corner (row labels only).
+    ///
+    /// ```text
+    /// ┌──────┬──────────┐
+    /// │corner│ columns  │
+    /// ├──────┼──────────┤
+    /// │    a1│...data...│
+    /// │    a2│...data...│
+    /// │    a3│...data...│
+    /// └──────┴──────────┘
+    /// ```
+    #[default]
+    #[serde(rename = "inCorner")]
+    Corner,
+}
+
+/// The heading region of a rendered pivot table:
+///
+/// ```text
+/// ┌──────────────────┬─────────────────────────────────────────────────┐
+/// │                  │                  column headings                │
+/// │                  ├─────────────────────────────────────────────────┤
+/// │      corner      │                                                 │
+/// │       and        │                                                 │
+/// │   row headings   │                      data                       │
+/// │                  │                                                 │
+/// │                  │                                                 │
+/// └──────────────────┴─────────────────────────────────────────────────┘
+/// ```
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Enum, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum HeadingRegion {
+    Rows,
+    Columns,
+}
+
+impl HeadingRegion {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            HeadingRegion::Rows => "rows",
+            HeadingRegion::Columns => "columns",
+        }
+    }
+}
+
+impl Display for HeadingRegion {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+impl From<Axis2> for HeadingRegion {
+    fn from(axis: Axis2) -> Self {
+        match axis {
+            Axis2::X => HeadingRegion::Columns,
+            Axis2::Y => HeadingRegion::Rows,
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct AreaStyle {
+    pub cell_style: CellStyle,
+    pub font_style: FontStyle,
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct CellStyle {
+    /// `None` means "mixed" alignment: align strings to the left, numbers to
+    /// the right.
+    pub horz_align: Option<HorzAlign>,
+    pub vert_align: VertAlign,
+
+    /// Margins in 1/96" units.
+    ///
+    /// `margins[Axis2::X][0]` is the left margin.
+    /// `margins[Axis2::X][1]` is the right margin.
+    /// `margins[Axis2::Y][0]` is the top margin.
+    /// `margins[Axis2::Y][1]` is the bottom margin.
+    pub margins: EnumMap<Axis2, [i32; 2]>,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Deserialize, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum HorzAlign {
+    /// Right aligned.
+    Right,
+
+    /// Left aligned.
+    Left,
+
+    /// Centered.
+    Center,
+
+    /// Align the decimal point at the specified position.
+    Decimal {
+        /// Decimal offset from the right side of the cell, in 1/96" units.
+        offset: f64,
+
+        /// Decimal character.
+        decimal: Decimal,
+    },
+}
+
+impl HorzAlign {
+    pub fn for_mixed(var_type: VarType) -> Self {
+        match var_type {
+            VarType::Numeric => Self::Right,
+            VarType::String => Self::Left,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VertAlign {
+    /// Top alignment.
+    Top,
+
+    /// Centered,
+    Middle,
+
+    /// Bottom alignment.
+    Bottom,
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct FontStyle {
+    pub bold: bool,
+    pub italic: bool,
+    pub underline: bool,
+    pub markup: bool,
+    pub font: String,
+
+    /// `fg[0]` is the usual foreground color.
+    ///
+    /// `fg[1]` is used only in [Area::Data] for odd-numbered rows.
+    pub fg: [Color; 2],
+
+    /// `bg[0]` is the usual background color.
+    ///
+    /// `bg[1]` is used only in [Area::Data] for odd-numbered rows.
+    pub bg: [Color; 2],
+
+    /// In 1/72" units.
+    pub size: i32,
+}
+
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub struct Color {
+    pub alpha: u8,
+    pub r: u8,
+    pub g: u8,
+    pub b: u8,
+}
+
+impl Color {
+    pub const BLACK: Color = Color::new(0, 0, 0);
+    pub const WHITE: Color = Color::new(255, 255, 255);
+    pub const RED: Color = Color::new(255, 0, 0);
+    pub const BLUE: Color = Color::new(0, 0, 255);
+    pub const TRANSPARENT: Color = Color::new(0, 0, 0).with_alpha(0);
+
+    pub const fn new(r: u8, g: u8, b: u8) -> Self {
+        Self {
+            alpha: 255,
+            r,
+            g,
+            b,
+        }
+    }
+
+    pub const fn with_alpha(self, alpha: u8) -> Self {
+        Self { alpha, ..self }
+    }
+
+    pub const fn without_alpha(self) -> Self {
+        self.with_alpha(255)
+    }
+
+    pub fn display_css(&self) -> DisplayCss {
+        DisplayCss(*self)
+    }
+}
+
+impl Debug for Color {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.display_css())
+    }
+}
+
+impl From<Rgba8> for Color {
+    fn from(Rgba8 { r, g, b, a }: Rgba8) -> Self {
+        Self::new(r, g, b).with_alpha(a)
+    }
+}
+
+impl FromStr for Color {
+    type Err = ParseColorError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        fn is_bare_hex(s: &str) -> bool {
+            let s = s.trim();
+            s.chars().count() == 6 && s.chars().all(|c| c.is_ascii_hexdigit())
+        }
+        let color: AlphaColor<Srgb> = match s.parse() {
+            Err(ParseColorError::UnknownColorSyntax) if is_bare_hex(s) => {
+                ("#".to_owned() + s).parse()
+            }
+            Err(ParseColorError::UnknownColorSyntax)
+                if s.trim().eq_ignore_ascii_case("transparent") =>
+            {
+                Ok(TRANSPARENT)
+            }
+            other => other,
+        }?;
+        Ok(color.to_rgba8().into())
+    }
+}
+
+impl Serialize for Color {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        serializer.serialize_str(&self.display_css().to_small_string::<32>())
+    }
+}
+
+impl<'de> Deserialize<'de> for Color {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        struct ColorVisitor;
+
+        impl<'de> Visitor<'de> for ColorVisitor {
+            type Value = Color;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("\"#rrggbb\" or \"rrggbb\" or web color name")
+            }
+
+            fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                v.parse().map_err(E::custom)
+            }
+        }
+
+        deserializer.deserialize_str(ColorVisitor)
+    }
+}
+
+pub struct DisplayCss(Color);
+
+impl Display for DisplayCss {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let Color { alpha, r, g, b } = self.0;
+        match alpha {
+            255 => write!(f, "#{r:02x}{g:02x}{b:02x}"),
+            _ => write!(f, "rgb({r}, {g}, {b}, {:.2})", alpha as f64 / 255.0),
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, Deserialize)]
+pub struct BorderStyle {
+    #[serde(rename = "@borderStyleType")]
+    pub stroke: Stroke,
+
+    #[serde(rename = "@color")]
+    pub color: Color,
+}
+
+impl Serialize for BorderStyle {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        let mut s = serializer.serialize_struct("BorderStyle", 2)?;
+        s.serialize_field("stroke", &self.stroke)?;
+        s.serialize_field("color", &self.color)?;
+        s.end()
+    }
+}
+
+impl BorderStyle {
+    pub const fn none() -> Self {
+        Self {
+            stroke: Stroke::None,
+            color: Color::BLACK,
+        }
+    }
+
+    pub fn is_none(&self) -> bool {
+        self.stroke.is_none()
+    }
+
+    /// Returns a border style that "combines" the two arguments, that is, that
+    /// gives a reasonable choice for a rule for different reasons should have
+    /// both styles.
+    pub fn combine(self, other: BorderStyle) -> Self {
+        Self {
+            stroke: self.stroke.combine(other.stroke),
+            color: self.color,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Enum, Deserialize, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub enum Stroke {
+    None,
+    Solid,
+    Dashed,
+    Thick,
+    Thin,
+    Double,
+}
+
+impl Stroke {
+    pub fn is_none(&self) -> bool {
+        self == &Self::None
+    }
+
+    /// Returns a stroke that "combines" the two arguments, that is, that gives
+    /// a reasonable stroke choice for a rule for different reasons should have
+    /// both styles.
+    pub fn combine(self, other: Stroke) -> Self {
+        self.max(other)
+    }
+}
+
+/// An axis of a 2-dimensional table.
+#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum Axis2 {
+    X,
+    Y,
+}
+
+impl Axis2 {
+    pub fn new_enum<T>(x: T, y: T) -> EnumMap<Axis2, T> {
+        EnumMap::from_array([x, y])
+    }
+
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Axis2::X => "x",
+            Axis2::Y => "y",
+        }
+    }
+}
+
+impl Display for Axis2 {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", self.as_str())
+    }
+}
+
+impl Not for Axis2 {
+    type Output = Self;
+
+    fn not(self) -> Self::Output {
+        match self {
+            Self::X => Self::Y,
+            Self::Y => Self::X,
+        }
+    }
+}
+
+/// A 2-dimensional `(x,y)` pair.
+#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
+pub struct Coord2(pub EnumMap<Axis2, usize>);
+
+impl Coord2 {
+    pub fn new(x: usize, y: usize) -> Self {
+        use Axis2::*;
+        Self(enum_map! {
+            X => x,
+            Y => y
+        })
+    }
+
+    pub fn for_axis((a, az): (Axis2, usize), bz: usize) -> Self {
+        let mut coord = Self::default();
+        coord[a] = az;
+        coord[!a] = bz;
+        coord
+    }
+
+    pub fn from_fn<F>(f: F) -> Self
+    where
+        F: FnMut(Axis2) -> usize,
+    {
+        Self(EnumMap::from_fn(f))
+    }
+
+    pub fn x(&self) -> usize {
+        self.0[Axis2::X]
+    }
+
+    pub fn y(&self) -> usize {
+        self.0[Axis2::Y]
+    }
+
+    pub fn get(&self, axis: Axis2) -> usize {
+        self.0[axis]
+    }
+}
+
+impl From<EnumMap<Axis2, usize>> for Coord2 {
+    fn from(value: EnumMap<Axis2, usize>) -> Self {
+        Self(value)
+    }
+}
+
+impl Index<Axis2> for Coord2 {
+    type Output = usize;
+
+    fn index(&self, index: Axis2) -> &Self::Output {
+        &self.0[index]
+    }
+}
+
+impl IndexMut<Axis2> for Coord2 {
+    fn index_mut(&mut self, index: Axis2) -> &mut Self::Output {
+        &mut self.0[index]
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct Rect2(pub EnumMap<Axis2, Range<usize>>);
+
+impl Rect2 {
+    pub fn new(x_range: Range<usize>, y_range: Range<usize>) -> Self {
+        Self(enum_map! {
+            Axis2::X => x_range.clone(),
+            Axis2::Y => y_range.clone(),
+        })
+    }
+    pub fn for_cell(cell: Coord2) -> Self {
+        Self::new(cell.x()..cell.x() + 1, cell.y()..cell.y() + 1)
+    }
+    pub fn for_ranges((a, a_range): (Axis2, Range<usize>), b_range: Range<usize>) -> Self {
+        let b = !a;
+        let mut ranges = EnumMap::default();
+        ranges[a] = a_range;
+        ranges[b] = b_range;
+        Self(ranges)
+    }
+    pub fn top_left(&self) -> Coord2 {
+        use Axis2::*;
+        Coord2::new(self[X].start, self[Y].start)
+    }
+    pub fn from_fn<F>(f: F) -> Self
+    where
+        F: FnMut(Axis2) -> Range<usize>,
+    {
+        Self(EnumMap::from_fn(f))
+    }
+    pub fn translate(self, offset: Coord2) -> Rect2 {
+        Self::from_fn(|axis| self[axis].start + offset[axis]..self[axis].end + offset[axis])
+    }
+    pub fn is_empty(&self) -> bool {
+        self[Axis2::X].is_empty() || self[Axis2::Y].is_empty()
+    }
+}
+
+impl From<EnumMap<Axis2, Range<usize>>> for Rect2 {
+    fn from(value: EnumMap<Axis2, Range<usize>>) -> Self {
+        Self(value)
+    }
+}
+
+impl Index<Axis2> for Rect2 {
+    type Output = Range<usize>;
+
+    fn index(&self, index: Axis2) -> &Self::Output {
+        &self.0[index]
+    }
+}
+
+impl IndexMut<Axis2> for Rect2 {
+    fn index_mut(&mut self, index: Axis2) -> &mut Self::Output {
+        &mut self.0[index]
+    }
+}
+
+#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(rename_all = "camelCase")]
+pub enum FootnoteMarkerType {
+    /// a, b, c, ...
+    #[default]
+    Alphabetic,
+
+    /// 1, 2, 3, ...
+    Numeric,
+}
+
+#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
+#[serde(rename_all = "camelCase")]
+pub enum FootnoteMarkerPosition {
+    /// Subscripts.
+    #[default]
+    Subscript,
+
+    /// Superscripts.
+    Superscript,
+}
+
+#[derive(Copy, Clone, Debug)]
+pub struct ValueOptions {
+    pub show_values: Option<Show>,
+
+    pub show_variables: Option<Show>,
+
+    pub small: f64,
+
+    /// Where to put the footnote markers.
+    pub footnote_marker_type: FootnoteMarkerType,
+}
+
+impl Default for ValueOptions {
+    fn default() -> Self {
+        Self {
+            show_values: None,
+            show_variables: None,
+            small: 0.0001,
+            footnote_marker_type: FootnoteMarkerType::default(),
+        }
+    }
+}
+
+pub trait IntoValueOptions {
+    fn into_value_options(self) -> ValueOptions;
+}
+
+impl IntoValueOptions for () {
+    fn into_value_options(self) -> ValueOptions {
+        ValueOptions::default()
+    }
+}
+
+impl IntoValueOptions for &PivotTable {
+    fn into_value_options(self) -> ValueOptions {
+        self.value_options()
+    }
+}
+
+impl IntoValueOptions for &ValueOptions {
+    fn into_value_options(self) -> ValueOptions {
+        *self
+    }
+}
+
+impl IntoValueOptions for ValueOptions {
+    fn into_value_options(self) -> ValueOptions {
+        self
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct PivotTable {
+    pub look: Arc<Look>,
+
+    pub rotate_inner_column_labels: bool,
+
+    pub rotate_outer_row_labels: bool,
+
+    pub show_grid_lines: bool,
+
+    pub show_title: bool,
+
+    pub show_caption: bool,
+
+    pub show_values: Option<Show>,
+
+    pub show_variables: Option<Show>,
+
+    pub weight_format: Format,
+
+    /// Current layer indexes, with `axes[Axis3::Z].dimensions.len()` elements.
+    /// `current_layer[i]` is an offset into
+    /// `axes[Axis3::Z].dimensions[i].data_leaves[]`, except that a dimension
+    /// can have zero leaves, in which case `current_layer[i]` is zero and
+    /// there's no corresponding leaf.
+    pub current_layer: Vec<usize>,
+
+    /// Column and row sizing and page breaks.
+    pub sizing: EnumMap<Axis2, Option<Box<Sizing>>>,
+
+    /// Format settings.
+    pub settings: FormatSettings,
+
+    /// Numeric grouping character (usually `.` or `,`).
+    pub grouping: Option<char>,
+
+    pub small: f64,
+
+    pub command_local: Option<String>,
+    pub command_c: Option<String>,
+    pub language: Option<String>,
+    pub locale: Option<String>,
+    pub dataset: Option<String>,
+    pub datafile: Option<String>,
+    pub date: Option<NaiveDateTime>,
+    pub footnotes: Footnotes,
+    pub title: Option<Box<Value>>,
+    pub subtype: Option<Box<Value>>,
+    pub corner_text: Option<Box<Value>>,
+    pub caption: Option<Box<Value>>,
+    pub notes: Option<String>,
+    pub dimensions: Vec<Dimension>,
+    pub axes: EnumMap<Axis3, Axis>,
+    pub cells: HashMap<usize, Value>,
+}
+
+impl PivotTable {
+    pub fn with_title(mut self, title: impl Into<Value>) -> Self {
+        self.title = Some(Box::new(title.into()));
+        self.show_title = true;
+        self
+    }
+
+    pub fn with_caption(mut self, caption: impl Into<Value>) -> Self {
+        self.caption = Some(Box::new(caption.into()));
+        self.show_caption = true;
+        self
+    }
+
+    pub fn with_corner_text(mut self, corner_text: impl Into<Value>) -> Self {
+        self.corner_text = Some(Box::new(corner_text.into()));
+        self
+    }
+
+    pub fn with_subtype(self, subtype: impl Into<Value>) -> Self {
+        Self {
+            subtype: Some(Box::new(subtype.into())),
+            ..self
+        }
+    }
+
+    pub fn with_show_title(mut self, show_title: bool) -> Self {
+        self.show_title = show_title;
+        self
+    }
+
+    pub fn with_show_caption(mut self, show_caption: bool) -> Self {
+        self.show_caption = show_caption;
+        self
+    }
+
+    pub fn with_layer(mut self, layer: &[usize]) -> Self {
+        debug_assert_eq!(layer.len(), self.current_layer.len());
+        if self.look.print_all_layers {
+            self.look_mut().print_all_layers = false;
+        }
+        self.current_layer.clear();
+        self.current_layer.extend_from_slice(layer);
+        self
+    }
+
+    pub fn with_all_layers(mut self) -> Self {
+        if !self.look.print_all_layers {
+            self.look_mut().print_all_layers = true;
+        }
+        self
+    }
+
+    pub fn look_mut(&mut self) -> &mut Look {
+        Arc::make_mut(&mut self.look)
+    }
+
+    pub fn with_show_empty(mut self) -> Self {
+        if self.look.hide_empty {
+            self.look_mut().hide_empty = false;
+        }
+        self
+    }
+
+    pub fn with_hide_empty(mut self) -> Self {
+        if !self.look.hide_empty {
+            self.look_mut().hide_empty = true;
+        }
+        self
+    }
+
+    pub fn label(&self) -> String {
+        match &self.title {
+            Some(title) => title.display(self).to_string(),
+            None => String::from("Table"),
+        }
+    }
+
+    pub fn title(&self) -> &Value {
+        match &self.title {
+            Some(title) => title,
+            None => {
+                static EMPTY: Value = Value::empty();
+                &EMPTY
+            }
+        }
+    }
+
+    pub fn subtype(&self) -> &Value {
+        match &self.subtype {
+            Some(subtype) => subtype,
+            None => {
+                static EMPTY: Value = Value::empty();
+                &EMPTY
+            }
+        }
+    }
+}
+
+impl Default for PivotTable {
+    fn default() -> Self {
+        Self {
+            look: Look::shared_default(),
+            rotate_inner_column_labels: false,
+            rotate_outer_row_labels: false,
+            show_grid_lines: false,
+            show_title: true,
+            show_caption: true,
+            show_values: None,
+            show_variables: None,
+            weight_format: Format::F40,
+            current_layer: Vec::new(),
+            sizing: EnumMap::default(),
+            settings: FormatSettings::default(), // XXX from settings
+            grouping: None,
+            small: 0.0001, // XXX from settings.
+            command_local: None,
+            command_c: None, // XXX from current command name.
+            language: None,
+            locale: None,
+            dataset: None,
+            datafile: None,
+            date: None,
+            footnotes: Footnotes::new(),
+            subtype: None,
+            title: None,
+            corner_text: None,
+            caption: None,
+            notes: None,
+            dimensions: Vec::new(),
+            axes: EnumMap::default(),
+            cells: HashMap::new(),
+        }
+    }
+}
+
+fn cell_index<I>(data_indexes: &[usize], dimensions: I) -> usize
+where
+    I: ExactSizeIterator<Item = usize>,
+{
+    debug_assert_eq!(data_indexes.len(), dimensions.len());
+    let mut index = 0;
+    for (dimension, data_index) in dimensions.zip(data_indexes.iter()) {
+        debug_assert!(*data_index < dimension);
+        index = dimension * index + data_index;
+    }
+    index
+}
+
+impl PivotTable {
+    pub fn new(axes_and_dimensions: impl IntoIterator<Item = (Axis3, Dimension)>) -> Self {
+        let mut dimensions = Vec::new();
+        let mut axes = EnumMap::<Axis3, Axis>::default();
+        for (axis, dimension) in axes_and_dimensions {
+            axes[axis].dimensions.push(dimensions.len());
+            dimensions.push(dimension);
+        }
+        Self {
+            look: Settings::global().look.clone(),
+            current_layer: repeat_n(0, axes[Axis3::Z].dimensions.len()).collect(),
+            axes,
+            dimensions,
+            ..Self::default()
+        }
+    }
+    fn cell_index(&self, data_indexes: &[usize]) -> usize {
+        cell_index(data_indexes, self.dimensions.iter().map(|d| d.len()))
+    }
+
+    pub fn insert(&mut self, data_indexes: &[usize], value: impl Into<Value>) {
+        self.cells
+            .insert(self.cell_index(data_indexes), value.into());
+    }
+
+    pub fn get(&self, data_indexes: &[usize]) -> Option<&Value> {
+        self.cells.get(&self.cell_index(data_indexes))
+    }
+
+    pub fn with_data<I>(mut self, iter: impl IntoIterator<Item = (I, Value)>) -> Self
+    where
+        I: AsRef<[usize]>,
+    {
+        self.extend(iter);
+        self
+    }
+
+    /// Converts per-axis presentation-order indexes in `presentation_indexes`,
+    /// into data indexes for each dimension.
+    fn convert_indexes_ptod(
+        &self,
+        presentation_indexes: EnumMap<Axis3, &[usize]>,
+    ) -> SmallVec<[usize; 4]> {
+        let mut data_indexes = SmallVec::from_elem(0, self.dimensions.len());
+        for (axis, presentation_indexes) in presentation_indexes {
+            for (&dim_index, &pindex) in self.axes[axis]
+                .dimensions
+                .iter()
+                .zip(presentation_indexes.iter())
+            {
+                data_indexes[dim_index] = self.dimensions[dim_index].presentation_order[pindex];
+            }
+        }
+        data_indexes
+    }
+
+    /// Returns an iterator for the layer axis:
+    ///
+    /// - If `print` is true and `self.look.print_all_layers`, then the iterator
+    ///   will visit all values of the layer axis.
+    ///
+    /// - Otherwise, the iterator will just visit `self.current_layer`.
+    pub fn layers(&self, print: bool) -> Box<dyn Iterator<Item = SmallVec<[usize; 4]>>> {
+        if print && self.look.print_all_layers {
+            Box::new(self.axis_values(Axis3::Z))
+        } else {
+            Box::new(once(SmallVec::from_slice(&self.current_layer)))
+        }
+    }
+
+    pub fn value_options(&self) -> ValueOptions {
+        ValueOptions {
+            show_values: self.show_values,
+            show_variables: self.show_variables,
+            small: self.small,
+            footnote_marker_type: self.look.footnote_marker_type,
+        }
+    }
+
+    pub fn transpose(&mut self) {
+        self.axes.swap(Axis3::X, Axis3::Y);
+    }
+
+    pub fn axis_dimensions(
+        &self,
+        axis: Axis3,
+    ) -> impl DoubleEndedIterator<Item = &Dimension> + ExactSizeIterator {
+        self.axes[axis]
+            .dimensions
+            .iter()
+            .copied()
+            .map(|index| &self.dimensions[index])
+    }
+
+    fn find_dimension(&self, dim_index: usize) -> Option<(Axis3, usize)> {
+        debug_assert!(dim_index < self.dimensions.len());
+        for axis in enum_iterator::all::<Axis3>() {
+            for (position, dimension) in self.axes[axis].dimensions.iter().copied().enumerate() {
+                if dimension == dim_index {
+                    return Some((axis, position));
+                }
+            }
+        }
+        None
+    }
+    pub fn move_dimension(&mut self, dim_index: usize, new_axis: Axis3, new_position: usize) {
+        let (old_axis, old_position) = self.find_dimension(dim_index).unwrap();
+        if old_axis == new_axis && old_position == new_position {
+            return;
+        }
+
+        // Update the current layer, if necessary.  If we're moving within the
+        // layer axis, preserve the current layer.
+        match (old_axis, new_axis) {
+            (Axis3::Z, Axis3::Z) => {
+                // Rearrange the layer axis.
+                if old_position < new_position {
+                    self.current_layer[old_position..=new_position].rotate_left(1);
+                } else {
+                    self.current_layer[new_position..=old_position].rotate_right(1);
+                }
+            }
+            (Axis3::Z, _) => {
+                // A layer is becoming a row or column.
+                self.current_layer.remove(old_position);
+            }
+            (_, Axis3::Z) => {
+                // A row or column is becoming a layer.
+                self.current_layer.insert(new_position, 0);
+            }
+            _ => (),
+        }
+
+        self.axes[old_axis].dimensions.remove(old_position);
+        self.axes[new_axis]
+            .dimensions
+            .insert(new_position, dim_index);
+    }
+}
+
+impl<I> Extend<(I, Value)> for PivotTable
+where
+    I: AsRef<[usize]>,
+{
+    fn extend<T: IntoIterator<Item = (I, Value)>>(&mut self, iter: T) {
+        for (data_indexes, value) in iter {
+            self.insert(data_indexes.as_ref(), value);
+        }
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct Footnote {
+    #[serde(skip)]
+    index: usize,
+    pub content: Box<Value>,
+    pub marker: Option<Box<Value>>,
+    pub show: bool,
+}
+
+impl Footnote {
+    pub fn new(content: impl Into<Value>) -> Self {
+        Self {
+            index: 0,
+            content: Box::new(content.into()),
+            marker: None,
+            show: true,
+        }
+    }
+    pub fn with_marker(mut self, marker: impl Into<Value>) -> Self {
+        self.marker = Some(Box::new(marker.into()));
+        self
+    }
+
+    pub fn with_show(mut self, show: bool) -> Self {
+        self.show = show;
+        self
+    }
+
+    pub fn with_index(mut self, index: usize) -> Self {
+        self.index = index;
+        self
+    }
+
+    pub fn display_marker(&self, options: impl IntoValueOptions) -> DisplayMarker<'_> {
+        DisplayMarker {
+            footnote: self,
+            options: options.into_value_options(),
+        }
+    }
+
+    pub fn display_content(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
+        self.content.display(options)
+    }
+
+    pub fn index(&self) -> usize {
+        self.index
+    }
+}
+
+pub struct DisplayMarker<'a> {
+    footnote: &'a Footnote,
+    options: ValueOptions,
+}
+
+impl Display for DisplayMarker<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        if let Some(marker) = &self.footnote.marker {
+            write!(f, "{}", marker.display(self.options).without_suffixes())
+        } else {
+            let i = self.footnote.index + 1;
+            match self.options.footnote_marker_type {
+                FootnoteMarkerType::Alphabetic => write!(f, "{}", Display26Adic::new_lowercase(i)),
+                FootnoteMarkerType::Numeric => write!(f, "{i}"),
+            }
+        }
+    }
+}
+
+/// Displays a number in 26adic notation.
+///
+/// Zero is displayed as the empty string, 1 through 26 as `a` through `z`, 27
+/// through 52 as `aa` through `az`, and so on.
+pub struct Display26Adic {
+    value: usize,
+    base: u8,
+}
+
+impl Display26Adic {
+    /// Constructs a `Display26Adic` for `value`, with letters in lowercase.
+    pub fn new_lowercase(value: usize) -> Self {
+        Self { value, base: b'a' }
+    }
+
+    /// Constructs a `Display26Adic` for `value`, with letters in uppercase.
+    pub fn new_uppercase(value: usize) -> Self {
+        Self { value, base: b'A' }
+    }
+}
+
+impl Display for Display26Adic {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let mut output = SmallVec::<[u8; 16]>::new();
+        let mut number = self.value;
+        while number > 0 {
+            number -= 1;
+            let digit = (number % 26) as u8;
+            output.push(digit + self.base);
+            number /= 26;
+        }
+        output.reverse();
+        write!(f, "{}", from_utf8(&output).unwrap())
+    }
+}
+
+/// The content of a single pivot table cell.
+///
+/// A [Value] is also a pivot table's title, caption, footnote marker and
+/// contents, and so on.
+///
+/// A given [Value] is one of:
+///
+/// 1. A number resulting from a calculation.
+///
+///    A number has an associated display format (usually [F] or [Pct]).  This
+///    format can be set directly, but that is not usually the easiest way.
+///    Instead, it is usually true that all of the values in a single category
+///    should have the same format (e.g. all "Significance" values might use
+///    format `F40.3`), so PSPP makes it easy to set the default format for a
+///    category while creating the category.  See pivot_dimension_create() for
+///    more details.
+///
+///    [F]: crate::format::Type::F
+///    [Pct]: crate::format::Type::Pct
+///
+/// 2. A numeric or string value obtained from data ([ValueInner::Number] or
+///    [ValueInner::String]).  If such a value corresponds to a variable, then the
+///    variable's name can be attached to the pivot_value.  If the value has a
+///    value label, then that can also be attached.  When a label is present,
+///    the user can control whether to show the value or the label or both.
+///
+/// 3. A variable name ([ValueInner::Variable]).  The variable label, if any, can
+///    be attached too, and again the user can control whether to show the value
+///    or the label or both.
+///
+/// 4. A text string ([ValueInner::Text).  The value stores the string in English
+///    and translated into the output language (localized).  Use
+///    pivot_value_new_text() or pivot_value_new_text_format() for those cases.
+///    In some cases, only an English or a localized version is available for
+///    one reason or another, although this is regrettable; in those cases, use
+///    pivot_value_new_user_text() or pivot_value_new_user_text_nocopy().
+///
+/// 5. A template. PSPP doesn't create these itself yet, but it can read and
+///    interpret those created by SPSS.
+#[derive(Clone, Default)]
+pub struct Value {
+    pub inner: ValueInner,
+    pub styling: Option<Box<ValueStyle>>,
+}
+
+impl Serialize for Value {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        self.inner.serialize(serializer)
+    }
+}
+
+/// Wrapper for [Value] that uses [Value::serialize_bare] for serialization.
+#[derive(Serialize)]
+struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] pub &'a Value);
+
+impl Value {
+    pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match &self.inner {
+            ValueInner::Number(number_value) => number_value.serialize_bare(serializer),
+            ValueInner::String(string_value) => string_value.s.serialize(serializer),
+            ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer),
+            ValueInner::Text(text_value) => text_value.localized.serialize(serializer),
+            ValueInner::Template(template_value) => template_value.localized.serialize(serializer),
+            ValueInner::Empty => serializer.serialize_none(),
+        }
+    }
+
+    fn new(inner: ValueInner) -> Self {
+        Self {
+            inner,
+            styling: None,
+        }
+    }
+    pub fn new_number_with_format(x: Option<f64>, format: Format) -> Self {
+        Self::new(ValueInner::Number(NumberValue {
+            show: None,
+            format,
+            honor_small: false,
+            value: x,
+            variable: None,
+            value_label: None,
+        }))
+    }
+    pub fn new_variable(variable: &Variable) -> Self {
+        Self::new(ValueInner::Variable(VariableValue {
+            show: None,
+            var_name: String::from(variable.name.as_str()),
+            variable_label: variable.label.clone(),
+        }))
+    }
+    pub fn new_datum<B>(value: &Datum<B>) -> Self
+    where
+        B: EncodedString,
+    {
+        match value {
+            Datum::Number(number) => Self::new_number(*number),
+            Datum::String(string) => Self::new_user_text(string.as_str()),
+        }
+    }
+    pub fn new_variable_value(variable: &Variable, value: &Datum<ByteString>) -> Self {
+        let var_name = Some(variable.name.as_str().into());
+        let value_label = variable.value_labels.get(value).map(String::from);
+        match value {
+            Datum::Number(number) => Self::new(ValueInner::Number(NumberValue {
+                show: None,
+                format: match variable.print_format.var_type() {
+                    VarType::Numeric => variable.print_format,
+                    VarType::String => {
+                        #[cfg(debug_assertions)]
+                        panic!("cannot create numeric pivot value with string format");
+
+                        #[cfg(not(debug_assertions))]
+                        Format::F8_2
+                    }
+                },
+                honor_small: false,
+                value: *number,
+                variable: var_name,
+                value_label,
+            })),
+            Datum::String(string) => Self::new(ValueInner::String(StringValue {
+                show: None,
+                hex: variable.print_format.type_() == Type::AHex,
+                s: string
+                    .as_ref()
+                    .with_encoding(variable.encoding())
+                    .into_string(),
+                var_name,
+                value_label,
+            })),
+        }
+    }
+    pub fn new_number(x: Option<f64>) -> Self {
+        Self::new_number_with_format(x, Format::F8_2)
+    }
+    pub fn new_integer(x: Option<f64>) -> Self {
+        Self::new_number_with_format(x, Format::F40)
+    }
+    pub fn new_text(s: impl Into<String>) -> Self {
+        Self::new_user_text(s)
+    }
+    pub fn new_user_text(s: impl Into<String>) -> Self {
+        let s: String = s.into();
+        if s.is_empty() {
+            Self::default()
+        } else {
+            Self::new(ValueInner::Text(TextValue {
+                user_provided: true,
+                localized: s.clone(),
+                c: None,
+                id: None,
+            }))
+        }
+    }
+    pub fn with_footnote(mut self, footnote: &Arc<Footnote>) -> Self {
+        self.add_footnote(footnote);
+        self
+    }
+    pub fn add_footnote(&mut self, footnote: &Arc<Footnote>) {
+        let footnotes = &mut self.styling.get_or_insert_default().footnotes;
+        footnotes.push(footnote.clone());
+        footnotes.sort_by_key(|f| f.index);
+    }
+    pub fn with_show_value_label(mut self, show: Option<Show>) -> Self {
+        let new_show = show;
+        match &mut self.inner {
+            ValueInner::Number(NumberValue { show, .. })
+            | ValueInner::String(StringValue { show, .. }) => {
+                *show = new_show;
+            }
+            _ => (),
+        }
+        self
+    }
+    pub fn with_show_variable_label(mut self, show: Option<Show>) -> Self {
+        if let ValueInner::Variable(variable_value) = &mut self.inner {
+            variable_value.show = show;
+        }
+        self
+    }
+    pub fn with_value_label(mut self, label: Option<String>) -> Self {
+        match &mut self.inner {
+            ValueInner::Number(NumberValue { value_label, .. })
+            | ValueInner::String(StringValue { value_label, .. }) => *value_label = label.clone(),
+            _ => (),
+        }
+        self
+    }
+    pub const fn empty() -> Self {
+        Value {
+            inner: ValueInner::Empty,
+            styling: None,
+        }
+    }
+    pub const fn is_empty(&self) -> bool {
+        self.inner.is_empty() && self.styling.is_none()
+    }
+}
+
+impl From<&str> for Value {
+    fn from(value: &str) -> Self {
+        Self::new_text(value)
+    }
+}
+
+impl From<String> for Value {
+    fn from(value: String) -> Self {
+        Self::new_text(value)
+    }
+}
+
+impl From<&Variable> for Value {
+    fn from(variable: &Variable) -> Self {
+        Self::new_variable(variable)
+    }
+}
+
+pub struct DisplayValue<'a> {
+    inner: &'a ValueInner,
+    markup: bool,
+    subscripts: &'a [String],
+    footnotes: &'a [Arc<Footnote>],
+    options: ValueOptions,
+    show_value: bool,
+    show_label: Option<&'a str>,
+}
+
+impl<'a> DisplayValue<'a> {
+    pub fn subscripts(&self) -> impl Iterator<Item = &str> {
+        self.subscripts.iter().map(String::as_str)
+    }
+
+    pub fn has_subscripts(&self) -> bool {
+        !self.subscripts.is_empty()
+    }
+
+    pub fn footnotes(&self) -> impl Iterator<Item = DisplayMarker<'_>> {
+        self.footnotes
+            .iter()
+            .filter(|f| f.show)
+            .map(|f| f.display_marker(self.options))
+    }
+
+    pub fn has_footnotes(&self) -> bool {
+        self.footnotes().next().is_some()
+    }
+
+    pub fn without_suffixes(self) -> Self {
+        Self {
+            subscripts: &[],
+            footnotes: &[],
+            ..self
+        }
+    }
+
+    /// Returns this display split into `(body, suffixes)` where `suffixes` is
+    /// subscripts and footnotes and `body` is everything else.
+    pub fn split_suffixes(self) -> (Self, Self) {
+        let suffixes = Self {
+            inner: &ValueInner::Empty,
+            ..self
+        };
+        (self.without_suffixes(), suffixes)
+    }
+
+    pub fn with_styling(mut self, styling: &'a ValueStyle) -> Self {
+        if let Some(area_style) = &styling.style {
+            self.markup = area_style.font_style.markup;
+        }
+        self.subscripts = styling.subscripts.as_slice();
+        self.footnotes = styling.footnotes.as_slice();
+        self
+    }
+
+    pub fn with_font_style(self, font_style: &FontStyle) -> Self {
+        Self {
+            markup: font_style.markup,
+            ..self
+        }
+    }
+
+    pub fn with_subscripts(self, subscripts: &'a [String]) -> Self {
+        Self { subscripts, ..self }
+    }
+
+    pub fn with_footnotes(self, footnotes: &'a [Arc<Footnote>]) -> Self {
+        Self { footnotes, ..self }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.inner.is_empty() && self.subscripts.is_empty() && self.footnotes.is_empty()
+    }
+
+    fn small(&self) -> f64 {
+        self.options.small
+    }
+
+    pub fn var_type(&self) -> VarType {
+        match self.inner {
+            ValueInner::Number(NumberValue { .. }) if self.show_label.is_none() => VarType::Numeric,
+            _ => VarType::String,
+        }
+    }
+
+    fn template(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        template: &str,
+        args: &[Vec<Value>],
+    ) -> std::fmt::Result {
+        let mut iter = template.as_bytes().iter();
+        while let Some(c) = iter.next() {
+            match c {
+                b'\\' => {
+                    let c = *iter.next().unwrap_or(&b'\\') as char;
+                    let c = if c == 'n' { '\n' } else { c };
+                    write!(f, "{c}")?;
+                }
+                b'^' => {
+                    let (index, rest) = consume_int(iter.as_slice());
+                    iter = rest.iter();
+                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
+                        continue;
+                    };
+                    if let Some(arg) = arg.first() {
+                        write!(f, "{}", arg.display(self.options))?;
+                    }
+                }
+                b'[' => {
+                    let (a, rest) = extract_inner_template(iter.as_slice());
+                    let (b, rest) = extract_inner_template(rest);
+                    let rest = rest.strip_prefix(b"]").unwrap_or(rest);
+                    let (index, rest) = consume_int(rest);
+                    iter = rest.iter();
+
+                    let Some(mut args) = args.get(index.wrapping_sub(1)).map(|vec| vec.as_slice())
+                    else {
+                        continue;
+                    };
+                    let (mut template, mut escape) =
+                        if !a.is_empty() { (a, b'%') } else { (b, b'^') };
+                    while !args.is_empty() {
+                        let n_consumed = self.inner_template(f, template, escape, args)?;
+                        if n_consumed == 0 {
+                            break;
+                        }
+                        args = &args[n_consumed..];
+
+                        template = b;
+                        escape = b'^';
+                    }
+                }
+                c => write!(f, "{c}")?,
+            }
+        }
+        Ok(())
+    }
+
+    fn inner_template(
+        &self,
+        f: &mut std::fmt::Formatter<'_>,
+        template: &[u8],
+        escape: u8,
+        args: &[Value],
+    ) -> Result<usize, std::fmt::Error> {
+        let mut iter = template.iter();
+        let mut args_consumed = 0;
+        while let Some(c) = iter.next() {
+            match c {
+                b'\\' => {
+                    let c = *iter.next().unwrap_or(&b'\\') as char;
+                    let c = if c == 'n' { '\n' } else { c };
+                    write!(f, "{c}")?;
+                }
+                c if *c == escape => {
+                    let (index, rest) = consume_int(iter.as_slice());
+                    iter = rest.iter();
+                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
+                        continue;
+                    };
+                    args_consumed = args_consumed.max(index);
+                    write!(f, "{}", arg.display(self.options))?;
+                }
+                c => write!(f, "{c}")?,
+            }
+        }
+        Ok(args_consumed)
+    }
+}
+
+fn consume_int(input: &[u8]) -> (usize, &[u8]) {
+    let mut n = 0;
+    for (index, c) in input.iter().enumerate() {
+        if !c.is_ascii_digit() {
+            return (n, &input[index..]);
+        }
+        n = n * 10 + (c - b'0') as usize;
+    }
+    (n, &[])
+}
+
+fn extract_inner_template(input: &[u8]) -> (&[u8], &[u8]) {
+    for (index, c) in input.iter().copied().enumerate() {
+        if c == b':' && (index == 0 || input[index - 1] != b'\\') {
+            return input.split_at(index);
+        }
+    }
+    (input, &[])
+}
+
+fn interpret_show(
+    global_show: impl Fn() -> Show,
+    table_show: Option<Show>,
+    value_show: Option<Show>,
+    label: &str,
+) -> (bool, Option<&str>) {
+    match value_show.or(table_show).unwrap_or_else(global_show) {
+        Show::Value => (true, None),
+        Show::Label => (false, Some(label)),
+        Show::Both => (true, Some(label)),
+    }
+}
+
+impl Display for DisplayValue<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self.inner {
+            ValueInner::Number(NumberValue {
+                format,
+                honor_small,
+                value,
+                ..
+            }) => {
+                if self.show_value {
+                    let format = if format.type_() == Type::F
+                        && *honor_small
+                        && value.is_some_and(|value| value != 0.0 && value.abs() < self.small())
+                    {
+                        UncheckedFormat::new(Type::E, 40, format.d() as u8).fix()
+                    } else {
+                        *format
+                    };
+                    let mut buf = SmallString::<[u8; 40]>::new();
+                    write!(
+                        &mut buf,
+                        "{}",
+                        Datum::<&str>::Number(*value).display(format)
+                    )
+                    .unwrap();
+                    write!(f, "{}", buf.trim_start_matches(' '))?;
+                }
+                if let Some(label) = self.show_label {
+                    if self.show_value {
+                        write!(f, " ")?;
+                    }
+                    f.write_str(label)?;
+                }
+                Ok(())
+            }
+
+            ValueInner::String(StringValue { s, .. })
+            | ValueInner::Variable(VariableValue { var_name: s, .. }) => {
+                match (self.show_value, self.show_label) {
+                    (true, None) => write!(f, "{s}"),
+                    (false, Some(label)) => write!(f, "{label}"),
+                    (true, Some(label)) => write!(f, "{s} {label}"),
+                    (false, None) => unreachable!(),
+                }
+            }
+
+            ValueInner::Text(TextValue {
+                localized: local, ..
+            }) => {
+                /*
+                if self
+                    .inner
+                    .styling
+                    .as_ref()
+                    .is_some_and(|styling| styling.style.font_style.markup)
+                {
+                    todo!();
+                }*/
+                f.write_str(local)
+            }
+
+            ValueInner::Template(TemplateValue {
+                args,
+                localized: local,
+                ..
+            }) => self.template(f, local, args),
+
+            ValueInner::Empty => Ok(()),
+        }?;
+
+        for (subscript, delimiter) in self.subscripts.iter().zip(once('_').chain(repeat(','))) {
+            write!(f, "{delimiter}{subscript}")?;
+        }
+
+        for footnote in self.footnotes {
+            write!(f, "[{}]", footnote.display_marker(self.options))?;
+        }
+
+        Ok(())
+    }
+}
+
+impl Value {
+    // Returns an object that will format this value, including subscripts and
+    // superscripts and footnotes.  `options` controls whether variable and
+    // value labels are included.
+    pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
+        let display = self.inner.display(options.into_value_options());
+        match &self.styling {
+            Some(styling) => display.with_styling(styling),
+            None => display,
+        }
+    }
+}
+
+impl Debug for Value {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{:?}", self.display(()).to_string())
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct NumberValue {
+    /// The numerical value, or `None` if it is a missing value.
+    pub value: Option<f64>,
+    pub format: Format,
+    pub show: Option<Show>,
+    pub honor_small: bool,
+    pub variable: Option<String>,
+    pub value_label: Option<String>,
+}
+
+impl Serialize for NumberValue {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() {
+            self.value.serialize(serializer)
+        } else {
+            let mut s = serializer.serialize_map(None)?;
+            s.serialize_entry("value", &self.value)?;
+            s.serialize_entry("format", &self.format)?;
+            if let Some(show) = self.show {
+                s.serialize_entry("show", &show)?;
+            }
+            if self.honor_small {
+                s.serialize_entry("honor_small", &self.honor_small)?;
+            }
+            if let Some(variable) = &self.variable {
+                s.serialize_entry("variable", variable)?;
+            }
+            if let Some(value_label) = &self.value_label {
+                s.serialize_entry("value_label", value_label)?;
+            }
+            s.end()
+        }
+    }
+}
+
+impl NumberValue {
+    pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        if let Some(number) = self.value
+            && number.trunc() == number
+            && number >= -(1i64 << 53) as f64
+            && number <= (1i64 << 53) as f64
+        {
+            (number as u64).serialize(serializer)
+        } else {
+            self.value.serialize(serializer)
+        }
+    }
+}
+
+#[derive(Serialize)]
+pub struct BareNumberValue<'a>(
+    #[serde(serialize_with = "NumberValue::serialize_bare")] pub &'a NumberValue,
+);
+
+#[derive(Clone, Debug, Serialize)]
+pub struct StringValue {
+    /// The string value.
+    ///
+    /// If `hex` is true, this should contain hex digits, not raw binary data
+    /// (otherwise it would be impossible to encode non-UTF-8 data).
+    pub s: String,
+
+    /// True if `s` is hex digits.
+    pub hex: bool,
+
+    pub show: Option<Show>,
+
+    pub var_name: Option<String>,
+    pub value_label: Option<String>,
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct VariableValue {
+    pub show: Option<Show>,
+    pub var_name: String,
+    pub variable_label: Option<String>,
+}
+
+#[derive(Clone, Debug)]
+pub struct TextValue {
+    pub user_provided: bool,
+    /// Localized.
+    pub localized: String,
+    /// English.
+    pub c: Option<String>,
+    /// Identifier.
+    pub id: Option<String>,
+}
+
+impl Serialize for TextValue {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        if self.user_provided && self.c.is_none() && self.id.is_none() {
+            serializer.serialize_str(&self.localized)
+        } else {
+            let mut s = serializer.serialize_struct(
+                "TextValue",
+                2 + self.c.is_some() as usize + self.id.is_some() as usize,
+            )?;
+            s.serialize_field("user_provided", &self.user_provided)?;
+            s.serialize_field("localized", &self.localized)?;
+            if let Some(c) = &self.c {
+                s.serialize_field("c", &c)?;
+            }
+            if let Some(id) = &self.id {
+                s.serialize_field("id", &id)?;
+            }
+            s.end()
+        }
+    }
+}
+
+impl TextValue {
+    pub fn localized(&self) -> &str {
+        self.localized.as_str()
+    }
+    pub fn c(&self) -> &str {
+        self.c.as_ref().unwrap_or(&self.localized).as_str()
+    }
+    pub fn id(&self) -> &str {
+        self.id.as_ref().unwrap_or(&self.localized).as_str()
+    }
+}
+
+#[derive(Clone, Debug, Serialize)]
+pub struct TemplateValue {
+    pub args: Vec<Vec<Value>>,
+    pub localized: String,
+    pub id: String,
+}
+
+#[derive(Clone, Debug, Default, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ValueInner {
+    Number(NumberValue),
+    String(StringValue),
+    Variable(VariableValue),
+    Text(TextValue),
+    Template(TemplateValue),
+
+    #[default]
+    Empty,
+}
+
+impl ValueInner {
+    pub const fn is_empty(&self) -> bool {
+        matches!(self, Self::Empty)
+    }
+    fn show(&self) -> Option<Show> {
+        match self {
+            ValueInner::Number(NumberValue { show, .. })
+            | ValueInner::String(StringValue { show, .. })
+            | ValueInner::Variable(VariableValue { show, .. }) => *show,
+            _ => None,
+        }
+    }
+
+    fn label(&self) -> Option<&str> {
+        self.value_label().or_else(|| self.variable_label())
+    }
+
+    fn value_label(&self) -> Option<&str> {
+        match self {
+            ValueInner::Number(NumberValue { value_label, .. })
+            | ValueInner::String(StringValue { value_label, .. }) => {
+                value_label.as_ref().map(String::as_str)
+            }
+            _ => None,
+        }
+    }
+
+    fn variable_label(&self) -> Option<&str> {
+        match self {
+            ValueInner::Variable(VariableValue { variable_label, .. }) => {
+                variable_label.as_ref().map(String::as_str)
+            }
+            _ => None,
+        }
+    }
+}
+
+#[derive(Clone, Debug, Default)]
+pub struct ValueStyle {
+    pub style: Option<AreaStyle>,
+    pub subscripts: Vec<String>,
+    pub footnotes: Vec<Arc<Footnote>>,
+}
+
+impl ValueStyle {
+    pub fn is_empty(&self) -> bool {
+        self.style.is_none() && self.subscripts.is_empty() && self.footnotes.is_empty()
+    }
+}
+
+impl ValueInner {
+    // Returns an object that will format this value.  Settings on `options`
+    // control whether variable and value labels are included.
+    pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
+        let options = options.into_value_options();
+        let (show_value, show_label) = if let Some(value_label) = self.value_label() {
+            interpret_show(
+                || Settings::global().show_values,
+                options.show_values,
+                self.show(),
+                value_label,
+            )
+        } else if let Some(variable_label) = self.variable_label() {
+            interpret_show(
+                || Settings::global().show_variables,
+                options.show_variables,
+                self.show(),
+                variable_label,
+            )
+        } else {
+            (true, None)
+        };
+        DisplayValue {
+            inner: self,
+            markup: false,
+            subscripts: &[],
+            footnotes: &[],
+            options,
+            show_value,
+            show_label,
+        }
+    }
+}
+
+pub struct MetadataEntry {
+    pub name: Value,
+    pub value: MetadataValue,
+}
+
+pub enum MetadataValue {
+    Leaf(Value),
+    Group(Vec<MetadataEntry>),
+}
+
+impl MetadataEntry {
+    pub fn into_pivot_table(self) -> PivotTable {
+        let mut data = Vec::new();
+        let group = match self.visit(&mut data) {
+            Category::Group(group) => group,
+            Category::Leaf(leaf) => Group::new("Metadata").with(leaf).with_label_shown(),
+        };
+        PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
+            data.into_iter()
+                .enumerate()
+                .filter(|(_row, value)| !value.is_empty())
+                .map(|(row, value)| ([row], value)),
+        )
+    }
+    fn visit(self, data: &mut Vec<Value>) -> Category {
+        match self.value {
+            MetadataValue::Leaf(value) => {
+                data.push(value);
+                Leaf::new(self.name).into()
+            }
+            MetadataValue::Group(items) => Group::with_capacity(self.name, items.len())
+                .with_multiple(items.into_iter().map(|item| item.visit(data)))
+                .into(),
+        }
+    }
+}
+
+impl Serialize for MetadataValue {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match self {
+            MetadataValue::Leaf(value) => value.serialize_bare(serializer),
+            MetadataValue::Group(items) => {
+                let mut map = serializer.serialize_map(Some(items.len()))?;
+                for item in items {
+                    let name = item.name.display(()).to_string();
+                    map.serialize_entry(&name, &item.value)?;
+                }
+                map.end()
+            }
+        }
+    }
+}
+impl Serialize for MetadataEntry {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        match &self.value {
+            MetadataValue::Leaf(value) => {
+                let mut map = serializer.serialize_map(Some(1))?;
+                let name = self.name.display(()).to_string();
+                map.serialize_entry(&name, &BareValue(value))?;
+                map.end()
+            }
+            MetadataValue::Group(items) => {
+                let mut map = serializer.serialize_map(Some(items.len()))?;
+                for item in items {
+                    let name = item.name.display(()).to_string();
+                    map.serialize_entry(&name, &item.value)?;
+                }
+                map.end()
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::output::pivot::{Display26Adic, MetadataEntry, MetadataValue, Value};
+
+    #[test]
+    fn display_26adic() {
+        for (number, lowercase, uppercase) in [
+            (0, "", ""),
+            (1, "a", "A"),
+            (2, "b", "B"),
+            (26, "z", "Z"),
+            (27, "aa", "AA"),
+            (28, "ab", "AB"),
+            (29, "ac", "AC"),
+            (18278, "zzz", "ZZZ"),
+            (18279, "aaaa", "AAAA"),
+            (19010, "abcd", "ABCD"),
+        ] {
+            assert_eq!(Display26Adic::new_lowercase(number).to_string(), lowercase);
+            assert_eq!(Display26Adic::new_uppercase(number).to_string(), uppercase);
+        }
+    }
+
+    #[test]
+    fn metadata_entry() {
+        let tree = MetadataEntry {
+            name: Value::from("Group"),
+            value: MetadataValue::Group(vec![
+                MetadataEntry {
+                    name: Value::from("Name 1"),
+                    value: MetadataValue::Leaf(Value::from("Value 1")),
+                },
+                MetadataEntry {
+                    name: Value::from("Subgroup 1"),
+                    value: MetadataValue::Group(vec![
+                        MetadataEntry {
+                            name: Value::from("Subname 1"),
+                            value: MetadataValue::Leaf(Value::from("Subvalue 1")),
+                        },
+                        MetadataEntry {
+                            name: Value::from("Subname 2"),
+                            value: MetadataValue::Leaf(Value::from("Subvalue 2")),
+                        },
+                        MetadataEntry {
+                            name: Value::from("Subname 3"),
+                            value: MetadataValue::Leaf(Value::new_integer(Some(3.0))),
+                        },
+                    ]),
+                },
+                MetadataEntry {
+                    name: Value::from("Name 2"),
+                    value: MetadataValue::Leaf(Value::from("Value 2")),
+                },
+            ]),
+        };
+        assert_eq!(
+            serde_json::to_string_pretty(&tree).unwrap(),
+            r#"{
+  "Name 1": "Value 1",
+  "Subgroup 1": {
+    "Subname 1": "Subvalue 1",
+    "Subname 2": "Subvalue 2",
+    "Subname 3": 3
+  },
+  "Name 2": "Value 2"
+}"#
+        );
+
+        assert_eq!(
+            tree.into_pivot_table().to_string(),
+            r#"╭────────────────────┬──────────╮
+│           Name 1   │Value 1   │
+├────────────────────┼──────────┤
+│Subgroup 1 Subname 1│Subvalue 1│
+│           Subname 2│Subvalue 2│
+│           Subname 3│         3│
+├────────────────────┼──────────┤
+│           Name 2   │Value 2   │
+╰────────────────────┴──────────╯
+"#
+        );
+    }
+}
diff --git a/rust/pspp/src/output/pivot/mod.rs b/rust/pspp/src/output/pivot/mod.rs

deleted file mode 100644 (file)

index 92133e2..0000000
--- a/rust/pspp/src/output/pivot/mod.rs
+++ /dev/null
@@ -1,2859 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Pivot tables.
-//!
-//! Pivot tables are PSPP's primary form of output.  They are analogous to the
-//! pivot tables you might be familiar with from spreadsheets and databases.
-//! See <https://en.wikipedia.org/wiki/Pivot_table> for a brief introduction to
-//! the overall concept of a pivot table.
-//!
-//! In PSPP, the most important internal pieces of a pivot table are:
-//!
-//! - Title.  Every pivot table has a title that is displayed above it.  It also
-//!   has an optional caption (displayed below it) and corner text (displayed in
-//!   the upper left corner).
-//!
-//! - Dimensions.  A dimension consists of zero or more categories.  A category
-//!   has a label, such as "df" or "Asymp. Sig." or 123 or a variable name.  The
-//!   categories are the leaves of a tree whose non-leaf nodes form groups of
-//!   categories.  The tree always has a root group whose label is the name of
-//!   the dimension.
-//!
-//! - Axes.  A table has three axes: column, row, and layer.  Each dimension is
-//!   assigned to an axis, and each axis has zero or more dimensions.  When an
-//!   axis has more than one dimension, they are ordered from innermost to
-//!   outermost.
-//!
-//! - Data.  A table's data consists of zero or more cells.  Each cell maps from
-//!   a category for each dimension to a value, which is commonly a number but
-//!   could also be a variable name or an arbitrary text string.
-
-use std::{
-    collections::HashMap,
-    fmt::{Debug, Display, Write},
-    io::Read,
-    iter::{once, repeat, repeat_n, FusedIterator},
-    ops::{Index, IndexMut, Not, Range, RangeInclusive},
-    str::{from_utf8, FromStr, Utf8Error},
-    sync::{Arc, OnceLock},
-};
-
-use binrw::Error as BinError;
-use chrono::NaiveDateTime;
-pub use color::ParseError as ParseColorError;
-use color::{palette::css::TRANSPARENT, AlphaColor, Rgba8, Srgb};
-use enum_iterator::Sequence;
-use enum_map::{enum_map, Enum, EnumMap};
-use look_xml::TableProperties;
-use quick_xml::{de::from_str, DeError};
-use serde::{
-    de::Visitor,
-    ser::{SerializeMap, SerializeStruct},
-    Deserialize, Serialize, Serializer,
-};
-use smallstr::SmallString;
-use smallvec::SmallVec;
-use thiserror::Error as ThisError;
-use tlo::parse_tlo;
-
-use crate::{
-    data::{ByteString, Datum, EncodedString, RawString},
-    format::{Decimal, Format, Settings as FormatSettings, Type, UncheckedFormat},
-    settings::{Settings, Show},
-    util::ToSmallString,
-    variable::{VarType, Variable},
-};
-
-pub mod output;
-
-mod look_xml;
-#[cfg(test)]
-pub mod test;
-mod tlo;
-
-/// Areas of a pivot table for styling purposes.
-#[derive(Copy, Clone, Debug, Default, Enum, PartialEq, Eq)]
-pub enum Area {
-    Title,
-    Caption,
-
-    /// Footnotes,
-    Footer,
-
-    // Top-left corner.
-    Corner,
-
-    /// Labels for columns ([Axis2::X]) and rows ([Axis2::Y]).
-    Labels(Axis2),
-
-    #[default]
-    Data,
-
-    /// Layer indication.
-    Layers,
-}
-
-impl Display for Area {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Area::Title => write!(f, "title"),
-            Area::Caption => write!(f, "caption"),
-            Area::Footer => write!(f, "footer"),
-            Area::Corner => write!(f, "corner"),
-            Area::Labels(axis2) => write!(f, "labels({axis2})"),
-            Area::Data => write!(f, "data"),
-            Area::Layers => write!(f, "layers"),
-        }
-    }
-}
-
-impl Serialize for Area {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        serializer.serialize_str(&self.to_small_string::<16>())
-    }
-}
-
-impl Area {
-    fn default_cell_style(self) -> CellStyle {
-        use HorzAlign::*;
-        use VertAlign::*;
-        let (horz_align, vert_align, hmargins, vmargins) = match self {
-            Area::Title => (Some(Center), Middle, [8, 11], [1, 8]),
-            Area::Caption => (Some(Left), Top, [8, 11], [1, 1]),
-            Area::Footer => (Some(Left), Top, [11, 8], [2, 3]),
-            Area::Corner => (Some(Left), Bottom, [8, 11], [1, 1]),
-            Area::Labels(Axis2::X) => (Some(Center), Top, [8, 11], [1, 3]),
-            Area::Labels(Axis2::Y) => (Some(Left), Top, [8, 11], [1, 3]),
-            Area::Data => (None, Top, [8, 11], [1, 1]),
-            Area::Layers => (Some(Left), Bottom, [8, 11], [1, 3]),
-        };
-        CellStyle {
-            horz_align,
-            vert_align,
-            margins: enum_map! { Axis2::X => hmargins, Axis2::Y => vmargins },
-        }
-    }
-
-    fn default_font_style(self) -> FontStyle {
-        FontStyle {
-            bold: self == Area::Title,
-            italic: false,
-            underline: false,
-            markup: false,
-            font: String::from("Sans Serif"),
-            fg: [Color::BLACK; 2],
-            bg: [Color::WHITE; 2],
-            size: 9,
-        }
-    }
-
-    fn default_area_style(self) -> AreaStyle {
-        AreaStyle {
-            cell_style: self.default_cell_style(),
-            font_style: self.default_font_style(),
-        }
-    }
-}
-
-/// Table borders for styling purposes.
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq)]
-pub enum Border {
-    Title,
-    OuterFrame(BoxBorder),
-    InnerFrame(BoxBorder),
-    Dimension(RowColBorder),
-    Category(RowColBorder),
-    DataLeft,
-    DataTop,
-}
-
-impl Border {
-    pub fn default_stroke(self) -> Stroke {
-        match self {
-            Self::InnerFrame(_) | Self::DataLeft | Self::DataTop => Stroke::Thick,
-            Self::Dimension(
-                RowColBorder(HeadingRegion::Columns, _) | RowColBorder(_, Axis2::X),
-            )
-            | Self::Category(RowColBorder(HeadingRegion::Columns, _)) => Stroke::Solid,
-            _ => Stroke::None,
-        }
-    }
-    pub fn default_border_style(self) -> BorderStyle {
-        BorderStyle {
-            stroke: self.default_stroke(),
-            color: Color::BLACK,
-        }
-    }
-
-    fn fallback(self) -> Self {
-        match self {
-            Self::Title
-            | Self::OuterFrame(_)
-            | Self::InnerFrame(_)
-            | Self::DataLeft
-            | Self::DataTop
-            | Self::Category(_) => self,
-            Self::Dimension(row_col_border) => Self::Category(row_col_border),
-        }
-    }
-}
-
-impl Display for Border {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            Border::Title => write!(f, "title"),
-            Border::OuterFrame(box_border) => write!(f, "outer_frame({box_border})"),
-            Border::InnerFrame(box_border) => write!(f, "inner_frame({box_border})"),
-            Border::Dimension(row_col_border) => write!(f, "dimension({row_col_border})"),
-            Border::Category(row_col_border) => write!(f, "category({row_col_border})"),
-            Border::DataLeft => write!(f, "data(left)"),
-            Border::DataTop => write!(f, "data(top)"),
-        }
-    }
-}
-
-impl Serialize for Border {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        serializer.serialize_str(&self.to_small_string::<32>())
-    }
-}
-
-/// The borders on a box.
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum BoxBorder {
-    Left,
-    Top,
-    Right,
-    Bottom,
-}
-
-impl BoxBorder {
-    fn as_str(&self) -> &'static str {
-        match self {
-            BoxBorder::Left => "left",
-            BoxBorder::Top => "top",
-            BoxBorder::Right => "right",
-            BoxBorder::Bottom => "bottom",
-        }
-    }
-}
-
-impl Display for BoxBorder {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.write_str(self.as_str())
-    }
-}
-
-/// Borders between rows and columns.
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub struct RowColBorder(
-    /// Row or column headings.
-    pub HeadingRegion,
-    /// Horizontal ([Axis2::X]) or vertical ([Axis2::Y]) borders.
-    pub Axis2,
-);
-
-impl Display for RowColBorder {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}:{}", self.0, self.1)
-    }
-}
-
-/// Sizing for rows or columns of a rendered table.
-///
-/// The comments below talk about columns and their widths but they apply
-/// equally to rows and their heights.
-#[derive(Default, Clone, Debug, Serialize)]
-pub struct Sizing {
-    /// Specific column widths, in 1/96" units.
-    widths: Vec<i32>,
-
-    /// Specific page breaks: 0-based columns after which a page break must
-    /// occur, e.g. a value of 1 requests a break after the second column.
-    breaks: Vec<usize>,
-
-    /// Keeps: columns to keep together on a page if possible.
-    keeps: Vec<Range<usize>>,
-}
-
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Sequence, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum Axis3 {
-    X,
-    Y,
-    Z,
-}
-
-impl Axis3 {
-    fn transpose(&self) -> Option<Self> {
-        match self {
-            Axis3::X => Some(Axis3::Y),
-            Axis3::Y => Some(Axis3::X),
-            Axis3::Z => None,
-        }
-    }
-}
-
-impl From<Axis2> for Axis3 {
-    fn from(axis2: Axis2) -> Self {
-        match axis2 {
-            Axis2::X => Self::X,
-            Axis2::Y => Self::Y,
-        }
-    }
-}
-
-/// An axis within a pivot table.
-#[derive(Clone, Debug, Default, Serialize)]
-pub struct Axis {
-    /// `dimensions[0]` is the innermost dimension.
-    pub dimensions: Vec<usize>,
-}
-
-pub struct AxisIterator {
-    indexes: SmallVec<[usize; 4]>,
-    lengths: SmallVec<[usize; 4]>,
-    done: bool,
-}
-
-impl FusedIterator for AxisIterator {}
-impl Iterator for AxisIterator {
-    type Item = SmallVec<[usize; 4]>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if self.done {
-            None
-        } else {
-            let retval = self.indexes.clone();
-            for (index, len) in self.indexes.iter_mut().zip(self.lengths.iter().copied()) {
-                *index += 1;
-                if *index < len {
-                    return Some(retval);
-                };
-                *index = 0;
-            }
-            self.done = true;
-            Some(retval)
-        }
-    }
-}
-
-impl PivotTable {
-    pub fn with_look(mut self, look: Arc<Look>) -> Self {
-        self.look = look;
-        self
-    }
-    pub fn insert_number(&mut self, data_indexes: &[usize], number: Option<f64>, class: Class) {
-        let format = match class {
-            Class::Other => Settings::global().default_format,
-            Class::Integer => Format::F40,
-            Class::Correlations => Format::F40_3,
-            Class::Significance => Format::F40_3,
-            Class::Percent => Format::PCT40_1,
-            Class::Residual => Format::F40_2,
-            Class::Count => Format::F40, // XXX
-        };
-        let value = Value::new(ValueInner::Number(NumberValue {
-            show: None,
-            format,
-            honor_small: class == Class::Other,
-            value: number,
-            variable: None,
-            value_label: None,
-        }));
-        self.insert(data_indexes, value);
-    }
-
-    pub fn with_footnotes(mut self, footnotes: Footnotes) -> Self {
-        debug_assert!(self.footnotes.is_empty());
-        self.footnotes = footnotes;
-        self
-    }
-    fn axis_values(&self, axis: Axis3) -> AxisIterator {
-        AxisIterator {
-            indexes: repeat_n(0, self.axes[axis].dimensions.len()).collect(),
-            lengths: self.axis_dimensions(axis).map(|d| d.len()).collect(),
-            done: self.axis_extent(axis) == 0,
-        }
-    }
-
-    fn axis_extent(&self, axis: Axis3) -> usize {
-        self.axis_dimensions(axis).map(|d| d.len()).product()
-    }
-}
-
-/// Dimensions.
-///
-/// A [Dimension] identifies the categories associated with a single dimension
-/// within a multidimensional pivot table.
-///
-/// A dimension contains a collection of categories, which are the leaves in a
-/// tree of groups.
-///
-/// (A dimension or a group can contain zero categories, but this is unusual.
-/// If a dimension contains no categories, then its table cannot contain any
-/// data.)
-#[derive(Clone, Debug, Serialize)]
-pub struct Dimension {
-    /// Hierarchy of categories within the dimension.  The groups and categories
-    /// are sorted in the order that should be used for display.  This might be
-    /// different from the original order produced for output if the user
-    /// adjusted it.
-    ///
-    /// The root must always be a group, although it is allowed to have no
-    /// subcategories.
-    pub root: Group,
-
-    /// Ordering of leaves for presentation.
-    ///
-    /// This is a permutation of `0..n` where `n` is the number of leaves.  It
-    /// maps from an index in presentation order to an index in data order.
-    pub presentation_order: Vec<usize>,
-
-    /// Display.
-    pub hide_all_labels: bool,
-}
-
-pub type GroupVec<'a> = SmallVec<[&'a Group; 4]>;
-pub struct Path<'a> {
-    groups: GroupVec<'a>,
-    leaf: &'a Leaf,
-}
-
-impl Dimension {
-    pub fn new(root: Group) -> Self {
-        Dimension {
-            presentation_order: (0..root.len()).collect(),
-            root,
-            hide_all_labels: false,
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    /// Returns the number of (leaf) categories in this dimension.
-    pub fn len(&self) -> usize {
-        self.root.len()
-    }
-
-    pub fn nth_leaf(&self, index: usize) -> Option<&Leaf> {
-        self.root.nth_leaf(index)
-    }
-
-    pub fn leaf_path(&self, index: usize) -> Option<Path<'_>> {
-        self.root.leaf_path(index, SmallVec::new())
-    }
-
-    pub fn with_all_labels_hidden(self) -> Self {
-        Self {
-            hide_all_labels: true,
-            ..self
-        }
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct Group {
-    #[serde(skip)]
-    len: usize,
-    pub name: Box<Value>,
-
-    /// The child categories.
-    ///
-    /// A group usually has multiple children, but it is allowed to have
-    /// only one or even (pathologically) none.
-    pub children: Vec<Category>,
-
-    /// Whether to show the group's label.
-    pub show_label: bool,
-}
-
-impl Group {
-    pub fn new(name: impl Into<Value>) -> Self {
-        Self::with_capacity(name, 0)
-    }
-
-    pub fn with_capacity(name: impl Into<Value>, capacity: usize) -> Self {
-        Self {
-            len: 0,
-            name: Box::new(name.into()),
-            children: Vec::with_capacity(capacity),
-            show_label: false,
-        }
-    }
-
-    pub fn push(&mut self, child: impl Into<Category>) {
-        let mut child = child.into();
-        if let Category::Group(group) = &mut child {
-            group.show_label = true;
-        }
-        self.len += child.len();
-        self.children.push(child);
-    }
-
-    pub fn with(mut self, child: impl Into<Category>) -> Self {
-        self.push(child);
-        self
-    }
-
-    pub fn with_multiple<C>(mut self, children: impl IntoIterator<Item = C>) -> Self
-    where
-        C: Into<Category>,
-    {
-        self.extend(children);
-        self
-    }
-
-    pub fn with_label_shown(self) -> Self {
-        self.with_show_label(true)
-    }
-
-    pub fn with_show_label(mut self, show_label: bool) -> Self {
-        self.show_label = show_label;
-        self
-    }
-
-    pub fn nth_leaf(&self, mut index: usize) -> Option<&Leaf> {
-        for child in &self.children {
-            let len = child.len();
-            if index < len {
-                return child.nth_leaf(index);
-            }
-            index -= len;
-        }
-        None
-    }
-
-    pub fn leaf_path<'a>(&'a self, mut index: usize, mut groups: GroupVec<'a>) -> Option<Path<'a>> {
-        for child in &self.children {
-            let len = child.len();
-            if index < len {
-                groups.push(self);
-                return child.leaf_path(index, groups);
-            }
-            index -= len;
-        }
-        None
-    }
-
-    pub fn len(&self) -> usize {
-        self.len
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn name(&self) -> &Value {
-        &self.name
-    }
-}
-
-impl<C> Extend<C> for Group
-where
-    C: Into<Category>,
-{
-    fn extend<T: IntoIterator<Item = C>>(&mut self, children: T) {
-        let children = children.into_iter();
-        self.children.reserve(children.size_hint().0);
-        for child in children {
-            self.push(child);
-        }
-    }
-}
-
-#[derive(Clone, Debug, Default, Serialize)]
-pub struct Footnotes(pub Vec<Arc<Footnote>>);
-
-impl Footnotes {
-    pub fn new() -> Self {
-        Self::default()
-    }
-
-    pub fn push(&mut self, footnote: Footnote) -> Arc<Footnote> {
-        let footnote = Arc::new(footnote.with_index(self.0.len()));
-        self.0.push(footnote.clone());
-        footnote
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.0.is_empty()
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct Leaf {
-    name: Box<Value>,
-}
-
-impl Leaf {
-    pub fn new(name: Value) -> Self {
-        Self {
-            name: Box::new(name),
-        }
-    }
-    pub fn name(&self) -> &Value {
-        &self.name
-    }
-}
-
-impl Serialize for Leaf {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        self.name.serialize(serializer)
-    }
-}
-
-/// Pivot result classes.
-///
-/// These are used to mark [Leaf] categories as having particular types of data,
-/// to set their numeric formats.
-#[derive(Clone, Debug, PartialEq, Eq)]
-pub enum Class {
-    Other,
-    Integer,
-    Correlations,
-    Significance,
-    Percent,
-    Residual,
-    Count,
-}
-
-/// A pivot_category is a leaf (a category) or a group.
-#[derive(Clone, Debug, Serialize)]
-pub enum Category {
-    Group(Group),
-    Leaf(Leaf),
-}
-
-impl Category {
-    pub fn name(&self) -> &Value {
-        match self {
-            Category::Group(group) => &group.name,
-            Category::Leaf(leaf) => &leaf.name,
-        }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.len() == 0
-    }
-
-    pub fn len(&self) -> usize {
-        match self {
-            Category::Group(group) => group.len,
-            Category::Leaf(_) => 1,
-        }
-    }
-
-    pub fn nth_leaf(&self, index: usize) -> Option<&Leaf> {
-        match self {
-            Category::Group(group) => group.nth_leaf(index),
-            Category::Leaf(leaf) => {
-                if index == 0 {
-                    Some(leaf)
-                } else {
-                    None
-                }
-            }
-        }
-    }
-
-    pub fn leaf_path<'a>(&'a self, index: usize, groups: GroupVec<'a>) -> Option<Path<'a>> {
-        match self {
-            Category::Group(group) => group.leaf_path(index, groups),
-            Category::Leaf(leaf) => {
-                if index == 0 {
-                    Some(Path { groups, leaf })
-                } else {
-                    None
-                }
-            }
-        }
-    }
-
-    pub fn show_label(&self) -> bool {
-        match self {
-            Category::Group(group) => group.show_label,
-            Category::Leaf(_) => true,
-        }
-    }
-}
-
-impl From<Group> for Category {
-    fn from(group: Group) -> Self {
-        Self::Group(group)
-    }
-}
-
-impl From<Leaf> for Category {
-    fn from(group: Leaf) -> Self {
-        Self::Leaf(group)
-    }
-}
-
-impl From<Value> for Category {
-    fn from(name: Value) -> Self {
-        Leaf::new(name).into()
-    }
-}
-
-impl From<&Variable> for Category {
-    fn from(variable: &Variable) -> Self {
-        Value::new_variable(variable).into()
-    }
-}
-
-impl From<&str> for Category {
-    fn from(name: &str) -> Self {
-        Self::Leaf(Leaf::new(Value::new_text(name)))
-    }
-}
-
-impl From<String> for Category {
-    fn from(name: String) -> Self {
-        Self::Leaf(Leaf::new(Value::new_text(name)))
-    }
-}
-
-impl From<&String> for Category {
-    fn from(name: &String) -> Self {
-        Self::Leaf(Leaf::new(Value::new_text(name)))
-    }
-}
-
-/// Styling for a pivot table.
-///
-/// The division between this and the style information in [PivotTable] seems
-/// fairly arbitrary.  The ultimate reason for the division is simply because
-/// that's how SPSS documentation and file formats do it.
-#[derive(Clone, Debug, Serialize)]
-pub struct Look {
-    pub name: Option<String>,
-
-    /// Whether to hide rows or columns whose cells are all empty.
-    pub hide_empty: bool,
-
-    pub row_label_position: LabelPosition,
-
-    /// Ranges of column widths in the two heading regions, in 1/96" units.
-    pub heading_widths: EnumMap<HeadingRegion, RangeInclusive<usize>>,
-
-    /// Kind of markers to use for footnotes.
-    pub footnote_marker_type: FootnoteMarkerType,
-
-    /// Where to put the footnote markers.
-    pub footnote_marker_position: FootnoteMarkerPosition,
-
-    /// Styles for areas of the pivot table.
-    pub areas: EnumMap<Area, AreaStyle>,
-
-    /// Styles for borders in the pivot table.
-    pub borders: EnumMap<Border, BorderStyle>,
-
-    pub print_all_layers: bool,
-
-    pub paginate_layers: bool,
-
-    pub shrink_to_fit: EnumMap<Axis2, bool>,
-
-    pub top_continuation: bool,
-
-    pub bottom_continuation: bool,
-
-    pub continuation: Option<String>,
-
-    pub n_orphan_lines: usize,
-}
-
-impl Look {
-    pub fn with_omit_empty(mut self, omit_empty: bool) -> Self {
-        self.hide_empty = omit_empty;
-        self
-    }
-    pub fn with_row_label_position(mut self, row_label_position: LabelPosition) -> Self {
-        self.row_label_position = row_label_position;
-        self
-    }
-    pub fn with_borders(mut self, borders: EnumMap<Border, BorderStyle>) -> Self {
-        self.borders = borders;
-        self
-    }
-}
-
-impl Default for Look {
-    fn default() -> Self {
-        Self {
-            name: None,
-            hide_empty: true,
-            row_label_position: LabelPosition::default(),
-            heading_widths: EnumMap::from_fn(|region| match region {
-                HeadingRegion::Rows => 36..=72,
-                HeadingRegion::Columns => 36..=120,
-            }),
-            footnote_marker_type: FootnoteMarkerType::default(),
-            footnote_marker_position: FootnoteMarkerPosition::default(),
-            areas: EnumMap::from_fn(Area::default_area_style),
-            borders: EnumMap::from_fn(Border::default_border_style),
-            print_all_layers: false,
-            paginate_layers: false,
-            shrink_to_fit: EnumMap::from_fn(|_| false),
-            top_continuation: false,
-            bottom_continuation: false,
-            continuation: None,
-            n_orphan_lines: 0,
-        }
-    }
-}
-
-#[derive(ThisError, Debug)]
-pub enum ParseLookError {
-    #[error(transparent)]
-    XmlError(#[from] DeError),
-
-    #[error(transparent)]
-    Utf8Error(#[from] Utf8Error),
-
-    #[error(transparent)]
-    BinError(#[from] BinError),
-
-    #[error(transparent)]
-    IoError(#[from] std::io::Error),
-}
-
-impl Look {
-    pub fn shared_default() -> Arc<Look> {
-        static LOOK: OnceLock<Arc<Look>> = OnceLock::new();
-        LOOK.get_or_init(|| Arc::new(Look::default())).clone()
-    }
-
-    pub fn from_xml(xml: &str) -> Result<Self, ParseLookError> {
-        Ok(from_str::<TableProperties>(xml)
-            .map_err(ParseLookError::from)?
-            .into())
-    }
-
-    pub fn from_binary(tlo: &[u8]) -> Result<Self, ParseLookError> {
-        parse_tlo(tlo).map_err(ParseLookError::from)
-    }
-
-    pub fn from_data(data: &[u8]) -> Result<Self, ParseLookError> {
-        if data.starts_with(b"\xff\xff\0\0") {
-            Self::from_binary(data)
-        } else {
-            Self::from_xml(from_utf8(data).map_err(ParseLookError::from)?)
-        }
-    }
-
-    pub fn from_reader<R>(mut reader: R) -> Result<Self, ParseLookError>
-    where
-        R: Read,
-    {
-        let mut buffer = Vec::new();
-        reader
-            .read_to_end(&mut buffer)
-            .map_err(ParseLookError::from)?;
-        Self::from_data(&buffer)
-    }
-}
-
-/// Position for group labels.
-#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
-pub enum LabelPosition {
-    /// Hierarachically enclosing the categories.
-    ///
-    /// For column labels, group labels appear above the categories.  For row
-    /// labels, group labels appear to the left of the categories.
-    ///
-    /// ```text
-    /// ┌────┬──────────────┐   ┌─────────┬──────────┐
-    /// │    │    nested    │   │         │ columns  │
-    /// │    ├────┬────┬────┤   ├──────┬──┼──────────┤
-    /// │    │ a1 │ a2 │ a3 │   │      │a1│...data...│
-    /// ├────┼────┼────┼────┤   │nested│a2│...data...│
-    /// │    │data│data│data│   │      │a3│...data...│
-    /// │    │ .  │ .  │ .  │   └──────┴──┴──────────┘
-    /// │rows│ .  │ .  │ .  │
-    /// │    │ .  │ .  │ .  │
-    /// └────┴────┴────┴────┘
-    /// ```
-    #[serde(rename = "nested")]
-    Nested,
-
-    /// In the corner (row labels only).
-    ///
-    /// ```text
-    /// ┌──────┬──────────┐
-    /// │corner│ columns  │
-    /// ├──────┼──────────┤
-    /// │    a1│...data...│
-    /// │    a2│...data...│
-    /// │    a3│...data...│
-    /// └──────┴──────────┘
-    /// ```
-    #[default]
-    #[serde(rename = "inCorner")]
-    Corner,
-}
-
-/// The heading region of a rendered pivot table:
-///
-/// ```text
-/// ┌──────────────────┬─────────────────────────────────────────────────┐
-/// │                  │                  column headings                │
-/// │                  ├─────────────────────────────────────────────────┤
-/// │      corner      │                                                 │
-/// │       and        │                                                 │
-/// │   row headings   │                      data                       │
-/// │                  │                                                 │
-/// │                  │                                                 │
-/// └──────────────────┴─────────────────────────────────────────────────┘
-/// ```
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Enum, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum HeadingRegion {
-    Rows,
-    Columns,
-}
-
-impl HeadingRegion {
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            HeadingRegion::Rows => "rows",
-            HeadingRegion::Columns => "columns",
-        }
-    }
-}
-
-impl Display for HeadingRegion {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.as_str())
-    }
-}
-
-impl From<Axis2> for HeadingRegion {
-    fn from(axis: Axis2) -> Self {
-        match axis {
-            Axis2::X => HeadingRegion::Columns,
-            Axis2::Y => HeadingRegion::Rows,
-        }
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct AreaStyle {
-    pub cell_style: CellStyle,
-    pub font_style: FontStyle,
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct CellStyle {
-    /// `None` means "mixed" alignment: align strings to the left, numbers to
-    /// the right.
-    pub horz_align: Option<HorzAlign>,
-    pub vert_align: VertAlign,
-
-    /// Margins in 1/96" units.
-    ///
-    /// `margins[Axis2::X][0]` is the left margin.
-    /// `margins[Axis2::X][1]` is the right margin.
-    /// `margins[Axis2::Y][0]` is the top margin.
-    /// `margins[Axis2::Y][1]` is the bottom margin.
-    pub margins: EnumMap<Axis2, [i32; 2]>,
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Deserialize, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum HorzAlign {
-    /// Right aligned.
-    Right,
-
-    /// Left aligned.
-    Left,
-
-    /// Centered.
-    Center,
-
-    /// Align the decimal point at the specified position.
-    Decimal {
-        /// Decimal offset from the right side of the cell, in 1/96" units.
-        offset: f64,
-
-        /// Decimal character.
-        decimal: Decimal,
-    },
-}
-
-impl HorzAlign {
-    pub fn for_mixed(var_type: VarType) -> Self {
-        match var_type {
-            VarType::Numeric => Self::Right,
-            VarType::String => Self::Left,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum VertAlign {
-    /// Top alignment.
-    Top,
-
-    /// Centered,
-    Middle,
-
-    /// Bottom alignment.
-    Bottom,
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct FontStyle {
-    pub bold: bool,
-    pub italic: bool,
-    pub underline: bool,
-    pub markup: bool,
-    pub font: String,
-
-    /// `fg[0]` is the usual foreground color.
-    ///
-    /// `fg[1]` is used only in [Area::Data] for odd-numbered rows.
-    pub fg: [Color; 2],
-
-    /// `bg[0]` is the usual background color.
-    ///
-    /// `bg[1]` is used only in [Area::Data] for odd-numbered rows.
-    pub bg: [Color; 2],
-
-    /// In 1/72" units.
-    pub size: i32,
-}
-
-#[derive(Copy, Clone, PartialEq, Eq)]
-pub struct Color {
-    pub alpha: u8,
-    pub r: u8,
-    pub g: u8,
-    pub b: u8,
-}
-
-impl Color {
-    pub const BLACK: Color = Color::new(0, 0, 0);
-    pub const WHITE: Color = Color::new(255, 255, 255);
-    pub const RED: Color = Color::new(255, 0, 0);
-    pub const BLUE: Color = Color::new(0, 0, 255);
-    pub const TRANSPARENT: Color = Color::new(0, 0, 0).with_alpha(0);
-
-    pub const fn new(r: u8, g: u8, b: u8) -> Self {
-        Self {
-            alpha: 255,
-            r,
-            g,
-            b,
-        }
-    }
-
-    pub const fn with_alpha(self, alpha: u8) -> Self {
-        Self { alpha, ..self }
-    }
-
-    pub const fn without_alpha(self) -> Self {
-        self.with_alpha(255)
-    }
-
-    pub fn display_css(&self) -> DisplayCss {
-        DisplayCss(*self)
-    }
-}
-
-impl Debug for Color {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.display_css())
-    }
-}
-
-impl From<Rgba8> for Color {
-    fn from(Rgba8 { r, g, b, a }: Rgba8) -> Self {
-        Self::new(r, g, b).with_alpha(a)
-    }
-}
-
-impl FromStr for Color {
-    type Err = ParseColorError;
-
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        fn is_bare_hex(s: &str) -> bool {
-            let s = s.trim();
-            s.chars().count() == 6 && s.chars().all(|c| c.is_ascii_hexdigit())
-        }
-        let color: AlphaColor<Srgb> = match s.parse() {
-            Err(ParseColorError::UnknownColorSyntax) if is_bare_hex(s) => {
-                ("#".to_owned() + s).parse()
-            }
-            Err(ParseColorError::UnknownColorSyntax)
-                if s.trim().eq_ignore_ascii_case("transparent") =>
-            {
-                Ok(TRANSPARENT)
-            }
-            other => other,
-        }?;
-        Ok(color.to_rgba8().into())
-    }
-}
-
-impl Serialize for Color {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        serializer.serialize_str(&self.display_css().to_small_string::<32>())
-    }
-}
-
-impl<'de> Deserialize<'de> for Color {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-    where
-        D: serde::Deserializer<'de>,
-    {
-        struct ColorVisitor;
-
-        impl<'de> Visitor<'de> for ColorVisitor {
-            type Value = Color;
-
-            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
-                formatter.write_str("\"#rrggbb\" or \"rrggbb\" or web color name")
-            }
-
-            fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
-            where
-                E: serde::de::Error,
-            {
-                v.parse().map_err(E::custom)
-            }
-        }
-
-        deserializer.deserialize_str(ColorVisitor)
-    }
-}
-
-pub struct DisplayCss(Color);
-
-impl Display for DisplayCss {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let Color { alpha, r, g, b } = self.0;
-        match alpha {
-            255 => write!(f, "#{r:02x}{g:02x}{b:02x}"),
-            _ => write!(f, "rgb({r}, {g}, {b}, {:.2})", alpha as f64 / 255.0),
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, Deserialize)]
-pub struct BorderStyle {
-    #[serde(rename = "@borderStyleType")]
-    pub stroke: Stroke,
-
-    #[serde(rename = "@color")]
-    pub color: Color,
-}
-
-impl Serialize for BorderStyle {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        let mut s = serializer.serialize_struct("BorderStyle", 2)?;
-        s.serialize_field("stroke", &self.stroke)?;
-        s.serialize_field("color", &self.color)?;
-        s.end()
-    }
-}
-
-impl BorderStyle {
-    pub const fn none() -> Self {
-        Self {
-            stroke: Stroke::None,
-            color: Color::BLACK,
-        }
-    }
-
-    pub fn is_none(&self) -> bool {
-        self.stroke.is_none()
-    }
-
-    /// Returns a border style that "combines" the two arguments, that is, that
-    /// gives a reasonable choice for a rule for different reasons should have
-    /// both styles.
-    pub fn combine(self, other: BorderStyle) -> Self {
-        Self {
-            stroke: self.stroke.combine(other.stroke),
-            color: self.color,
-        }
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Enum, Deserialize, Serialize)]
-#[serde(rename_all = "camelCase")]
-pub enum Stroke {
-    None,
-    Solid,
-    Dashed,
-    Thick,
-    Thin,
-    Double,
-}
-
-impl Stroke {
-    pub fn is_none(&self) -> bool {
-        self == &Self::None
-    }
-
-    /// Returns a stroke that "combines" the two arguments, that is, that gives
-    /// a reasonable stroke choice for a rule for different reasons should have
-    /// both styles.
-    pub fn combine(self, other: Stroke) -> Self {
-        self.max(other)
-    }
-}
-
-/// An axis of a 2-dimensional table.
-#[derive(Copy, Clone, Debug, Enum, PartialEq, Eq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum Axis2 {
-    X,
-    Y,
-}
-
-impl Axis2 {
-    pub fn new_enum<T>(x: T, y: T) -> EnumMap<Axis2, T> {
-        EnumMap::from_array([x, y])
-    }
-
-    pub fn as_str(&self) -> &'static str {
-        match self {
-            Axis2::X => "x",
-            Axis2::Y => "y",
-        }
-    }
-}
-
-impl Display for Axis2 {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{}", self.as_str())
-    }
-}
-
-impl Not for Axis2 {
-    type Output = Self;
-
-    fn not(self) -> Self::Output {
-        match self {
-            Self::X => Self::Y,
-            Self::Y => Self::X,
-        }
-    }
-}
-
-/// A 2-dimensional `(x,y)` pair.
-#[derive(Copy, Clone, Debug, Default, PartialEq, Eq, Hash)]
-pub struct Coord2(pub EnumMap<Axis2, usize>);
-
-impl Coord2 {
-    pub fn new(x: usize, y: usize) -> Self {
-        use Axis2::*;
-        Self(enum_map! {
-            X => x,
-            Y => y
-        })
-    }
-
-    pub fn for_axis((a, az): (Axis2, usize), bz: usize) -> Self {
-        let mut coord = Self::default();
-        coord[a] = az;
-        coord[!a] = bz;
-        coord
-    }
-
-    pub fn from_fn<F>(f: F) -> Self
-    where
-        F: FnMut(Axis2) -> usize,
-    {
-        Self(EnumMap::from_fn(f))
-    }
-
-    pub fn x(&self) -> usize {
-        self.0[Axis2::X]
-    }
-
-    pub fn y(&self) -> usize {
-        self.0[Axis2::Y]
-    }
-
-    pub fn get(&self, axis: Axis2) -> usize {
-        self.0[axis]
-    }
-}
-
-impl From<EnumMap<Axis2, usize>> for Coord2 {
-    fn from(value: EnumMap<Axis2, usize>) -> Self {
-        Self(value)
-    }
-}
-
-impl Index<Axis2> for Coord2 {
-    type Output = usize;
-
-    fn index(&self, index: Axis2) -> &Self::Output {
-        &self.0[index]
-    }
-}
-
-impl IndexMut<Axis2> for Coord2 {
-    fn index_mut(&mut self, index: Axis2) -> &mut Self::Output {
-        &mut self.0[index]
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct Rect2(pub EnumMap<Axis2, Range<usize>>);
-
-impl Rect2 {
-    pub fn new(x_range: Range<usize>, y_range: Range<usize>) -> Self {
-        Self(enum_map! {
-            Axis2::X => x_range.clone(),
-            Axis2::Y => y_range.clone(),
-        })
-    }
-    pub fn for_cell(cell: Coord2) -> Self {
-        Self::new(cell.x()..cell.x() + 1, cell.y()..cell.y() + 1)
-    }
-    pub fn for_ranges((a, a_range): (Axis2, Range<usize>), b_range: Range<usize>) -> Self {
-        let b = !a;
-        let mut ranges = EnumMap::default();
-        ranges[a] = a_range;
-        ranges[b] = b_range;
-        Self(ranges)
-    }
-    pub fn top_left(&self) -> Coord2 {
-        use Axis2::*;
-        Coord2::new(self[X].start, self[Y].start)
-    }
-    pub fn from_fn<F>(f: F) -> Self
-    where
-        F: FnMut(Axis2) -> Range<usize>,
-    {
-        Self(EnumMap::from_fn(f))
-    }
-    pub fn translate(self, offset: Coord2) -> Rect2 {
-        Self::from_fn(|axis| self[axis].start + offset[axis]..self[axis].end + offset[axis])
-    }
-    pub fn is_empty(&self) -> bool {
-        self[Axis2::X].is_empty() || self[Axis2::Y].is_empty()
-    }
-}
-
-impl From<EnumMap<Axis2, Range<usize>>> for Rect2 {
-    fn from(value: EnumMap<Axis2, Range<usize>>) -> Self {
-        Self(value)
-    }
-}
-
-impl Index<Axis2> for Rect2 {
-    type Output = Range<usize>;
-
-    fn index(&self, index: Axis2) -> &Self::Output {
-        &self.0[index]
-    }
-}
-
-impl IndexMut<Axis2> for Rect2 {
-    fn index_mut(&mut self, index: Axis2) -> &mut Self::Output {
-        &mut self.0[index]
-    }
-}
-
-#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
-#[serde(rename_all = "camelCase")]
-pub enum FootnoteMarkerType {
-    /// a, b, c, ...
-    #[default]
-    Alphabetic,
-
-    /// 1, 2, 3, ...
-    Numeric,
-}
-
-#[derive(Copy, Clone, Debug, Default, Deserialize, Serialize, PartialEq, Eq)]
-#[serde(rename_all = "camelCase")]
-pub enum FootnoteMarkerPosition {
-    /// Subscripts.
-    #[default]
-    Subscript,
-
-    /// Superscripts.
-    Superscript,
-}
-
-#[derive(Copy, Clone, Debug)]
-pub struct ValueOptions {
-    pub show_values: Option<Show>,
-
-    pub show_variables: Option<Show>,
-
-    pub small: f64,
-
-    /// Where to put the footnote markers.
-    pub footnote_marker_type: FootnoteMarkerType,
-}
-
-impl Default for ValueOptions {
-    fn default() -> Self {
-        Self {
-            show_values: None,
-            show_variables: None,
-            small: 0.0001,
-            footnote_marker_type: FootnoteMarkerType::default(),
-        }
-    }
-}
-
-pub trait IntoValueOptions {
-    fn into_value_options(self) -> ValueOptions;
-}
-
-impl IntoValueOptions for () {
-    fn into_value_options(self) -> ValueOptions {
-        ValueOptions::default()
-    }
-}
-
-impl IntoValueOptions for &PivotTable {
-    fn into_value_options(self) -> ValueOptions {
-        self.value_options()
-    }
-}
-
-impl IntoValueOptions for &ValueOptions {
-    fn into_value_options(self) -> ValueOptions {
-        *self
-    }
-}
-
-impl IntoValueOptions for ValueOptions {
-    fn into_value_options(self) -> ValueOptions {
-        self
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct PivotTable {
-    pub look: Arc<Look>,
-
-    pub rotate_inner_column_labels: bool,
-
-    pub rotate_outer_row_labels: bool,
-
-    pub show_grid_lines: bool,
-
-    pub show_title: bool,
-
-    pub show_caption: bool,
-
-    pub show_values: Option<Show>,
-
-    pub show_variables: Option<Show>,
-
-    pub weight_format: Format,
-
-    /// Current layer indexes, with `axes[Axis3::Z].dimensions.len()` elements.
-    /// `current_layer[i]` is an offset into
-    /// `axes[Axis3::Z].dimensions[i].data_leaves[]`, except that a dimension
-    /// can have zero leaves, in which case `current_layer[i]` is zero and
-    /// there's no corresponding leaf.
-    pub current_layer: Vec<usize>,
-
-    /// Column and row sizing and page breaks.
-    pub sizing: EnumMap<Axis2, Option<Box<Sizing>>>,
-
-    /// Format settings.
-    pub settings: FormatSettings,
-
-    /// Numeric grouping character (usually `.` or `,`).
-    pub grouping: Option<char>,
-
-    pub small: f64,
-
-    pub command_local: Option<String>,
-    pub command_c: Option<String>,
-    pub language: Option<String>,
-    pub locale: Option<String>,
-    pub dataset: Option<String>,
-    pub datafile: Option<String>,
-    pub date: Option<NaiveDateTime>,
-    pub footnotes: Footnotes,
-    pub title: Option<Box<Value>>,
-    pub subtype: Option<Box<Value>>,
-    pub corner_text: Option<Box<Value>>,
-    pub caption: Option<Box<Value>>,
-    pub notes: Option<String>,
-    pub dimensions: Vec<Dimension>,
-    pub axes: EnumMap<Axis3, Axis>,
-    pub cells: HashMap<usize, Value>,
-}
-
-impl PivotTable {
-    pub fn with_title(mut self, title: impl Into<Value>) -> Self {
-        self.title = Some(Box::new(title.into()));
-        self.show_title = true;
-        self
-    }
-
-    pub fn with_caption(mut self, caption: impl Into<Value>) -> Self {
-        self.caption = Some(Box::new(caption.into()));
-        self.show_caption = true;
-        self
-    }
-
-    pub fn with_corner_text(mut self, corner_text: impl Into<Value>) -> Self {
-        self.corner_text = Some(Box::new(corner_text.into()));
-        self
-    }
-
-    pub fn with_subtype(self, subtype: impl Into<Value>) -> Self {
-        Self {
-            subtype: Some(Box::new(subtype.into())),
-            ..self
-        }
-    }
-
-    pub fn with_show_title(mut self, show_title: bool) -> Self {
-        self.show_title = show_title;
-        self
-    }
-
-    pub fn with_show_caption(mut self, show_caption: bool) -> Self {
-        self.show_caption = show_caption;
-        self
-    }
-
-    pub fn with_layer(mut self, layer: &[usize]) -> Self {
-        debug_assert_eq!(layer.len(), self.current_layer.len());
-        if self.look.print_all_layers {
-            self.look_mut().print_all_layers = false;
-        }
-        self.current_layer.clear();
-        self.current_layer.extend_from_slice(layer);
-        self
-    }
-
-    pub fn with_all_layers(mut self) -> Self {
-        if !self.look.print_all_layers {
-            self.look_mut().print_all_layers = true;
-        }
-        self
-    }
-
-    pub fn look_mut(&mut self) -> &mut Look {
-        Arc::make_mut(&mut self.look)
-    }
-
-    pub fn with_show_empty(mut self) -> Self {
-        if self.look.hide_empty {
-            self.look_mut().hide_empty = false;
-        }
-        self
-    }
-
-    pub fn with_hide_empty(mut self) -> Self {
-        if !self.look.hide_empty {
-            self.look_mut().hide_empty = true;
-        }
-        self
-    }
-
-    pub fn label(&self) -> String {
-        match &self.title {
-            Some(title) => title.display(self).to_string(),
-            None => String::from("Table"),
-        }
-    }
-
-    pub fn title(&self) -> &Value {
-        match &self.title {
-            Some(title) => title,
-            None => {
-                static EMPTY: Value = Value::empty();
-                &EMPTY
-            }
-        }
-    }
-
-    pub fn subtype(&self) -> &Value {
-        match &self.subtype {
-            Some(subtype) => subtype,
-            None => {
-                static EMPTY: Value = Value::empty();
-                &EMPTY
-            }
-        }
-    }
-}
-
-impl Default for PivotTable {
-    fn default() -> Self {
-        Self {
-            look: Look::shared_default(),
-            rotate_inner_column_labels: false,
-            rotate_outer_row_labels: false,
-            show_grid_lines: false,
-            show_title: true,
-            show_caption: true,
-            show_values: None,
-            show_variables: None,
-            weight_format: Format::F40,
-            current_layer: Vec::new(),
-            sizing: EnumMap::default(),
-            settings: FormatSettings::default(), // XXX from settings
-            grouping: None,
-            small: 0.0001, // XXX from settings.
-            command_local: None,
-            command_c: None, // XXX from current command name.
-            language: None,
-            locale: None,
-            dataset: None,
-            datafile: None,
-            date: None,
-            footnotes: Footnotes::new(),
-            subtype: None,
-            title: None,
-            corner_text: None,
-            caption: None,
-            notes: None,
-            dimensions: Vec::new(),
-            axes: EnumMap::default(),
-            cells: HashMap::new(),
-        }
-    }
-}
-
-fn cell_index<I>(data_indexes: &[usize], dimensions: I) -> usize
-where
-    I: ExactSizeIterator<Item = usize>,
-{
-    debug_assert_eq!(data_indexes.len(), dimensions.len());
-    let mut index = 0;
-    for (dimension, data_index) in dimensions.zip(data_indexes.iter()) {
-        debug_assert!(*data_index < dimension);
-        index = dimension * index + data_index;
-    }
-    index
-}
-
-impl PivotTable {
-    pub fn new(axes_and_dimensions: impl IntoIterator<Item = (Axis3, Dimension)>) -> Self {
-        let mut dimensions = Vec::new();
-        let mut axes = EnumMap::<Axis3, Axis>::default();
-        for (axis, dimension) in axes_and_dimensions {
-            axes[axis].dimensions.push(dimensions.len());
-            dimensions.push(dimension);
-        }
-        Self {
-            look: Settings::global().look.clone(),
-            current_layer: repeat_n(0, axes[Axis3::Z].dimensions.len()).collect(),
-            axes,
-            dimensions,
-            ..Self::default()
-        }
-    }
-    fn cell_index(&self, data_indexes: &[usize]) -> usize {
-        cell_index(data_indexes, self.dimensions.iter().map(|d| d.len()))
-    }
-
-    pub fn insert(&mut self, data_indexes: &[usize], value: impl Into<Value>) {
-        self.cells
-            .insert(self.cell_index(data_indexes), value.into());
-    }
-
-    pub fn get(&self, data_indexes: &[usize]) -> Option<&Value> {
-        self.cells.get(&self.cell_index(data_indexes))
-    }
-
-    pub fn with_data<I>(mut self, iter: impl IntoIterator<Item = (I, Value)>) -> Self
-    where
-        I: AsRef<[usize]>,
-    {
-        self.extend(iter);
-        self
-    }
-
-    /// Converts per-axis presentation-order indexes in `presentation_indexes`,
-    /// into data indexes for each dimension.
-    fn convert_indexes_ptod(
-        &self,
-        presentation_indexes: EnumMap<Axis3, &[usize]>,
-    ) -> SmallVec<[usize; 4]> {
-        let mut data_indexes = SmallVec::from_elem(0, self.dimensions.len());
-        for (axis, presentation_indexes) in presentation_indexes {
-            for (&dim_index, &pindex) in self.axes[axis]
-                .dimensions
-                .iter()
-                .zip(presentation_indexes.iter())
-            {
-                data_indexes[dim_index] = self.dimensions[dim_index].presentation_order[pindex];
-            }
-        }
-        data_indexes
-    }
-
-    /// Returns an iterator for the layer axis:
-    ///
-    /// - If `print` is true and `self.look.print_all_layers`, then the iterator
-    ///   will visit all values of the layer axis.
-    ///
-    /// - Otherwise, the iterator will just visit `self.current_layer`.
-    pub fn layers(&self, print: bool) -> Box<dyn Iterator<Item = SmallVec<[usize; 4]>>> {
-        if print && self.look.print_all_layers {
-            Box::new(self.axis_values(Axis3::Z))
-        } else {
-            Box::new(once(SmallVec::from_slice(&self.current_layer)))
-        }
-    }
-
-    pub fn value_options(&self) -> ValueOptions {
-        ValueOptions {
-            show_values: self.show_values,
-            show_variables: self.show_variables,
-            small: self.small,
-            footnote_marker_type: self.look.footnote_marker_type,
-        }
-    }
-
-    pub fn transpose(&mut self) {
-        self.axes.swap(Axis3::X, Axis3::Y);
-    }
-
-    pub fn axis_dimensions(
-        &self,
-        axis: Axis3,
-    ) -> impl DoubleEndedIterator<Item = &Dimension> + ExactSizeIterator {
-        self.axes[axis]
-            .dimensions
-            .iter()
-            .copied()
-            .map(|index| &self.dimensions[index])
-    }
-
-    fn find_dimension(&self, dim_index: usize) -> Option<(Axis3, usize)> {
-        debug_assert!(dim_index < self.dimensions.len());
-        for axis in enum_iterator::all::<Axis3>() {
-            for (position, dimension) in self.axes[axis].dimensions.iter().copied().enumerate() {
-                if dimension == dim_index {
-                    return Some((axis, position));
-                }
-            }
-        }
-        None
-    }
-    pub fn move_dimension(&mut self, dim_index: usize, new_axis: Axis3, new_position: usize) {
-        let (old_axis, old_position) = self.find_dimension(dim_index).unwrap();
-        if old_axis == new_axis && old_position == new_position {
-            return;
-        }
-
-        // Update the current layer, if necessary.  If we're moving within the
-        // layer axis, preserve the current layer.
-        match (old_axis, new_axis) {
-            (Axis3::Z, Axis3::Z) => {
-                // Rearrange the layer axis.
-                if old_position < new_position {
-                    self.current_layer[old_position..=new_position].rotate_left(1);
-                } else {
-                    self.current_layer[new_position..=old_position].rotate_right(1);
-                }
-            }
-            (Axis3::Z, _) => {
-                // A layer is becoming a row or column.
-                self.current_layer.remove(old_position);
-            }
-            (_, Axis3::Z) => {
-                // A row or column is becoming a layer.
-                self.current_layer.insert(new_position, 0);
-            }
-            _ => (),
-        }
-
-        self.axes[old_axis].dimensions.remove(old_position);
-        self.axes[new_axis]
-            .dimensions
-            .insert(new_position, dim_index);
-    }
-}
-
-impl<I> Extend<(I, Value)> for PivotTable
-where
-    I: AsRef<[usize]>,
-{
-    fn extend<T: IntoIterator<Item = (I, Value)>>(&mut self, iter: T) {
-        for (data_indexes, value) in iter {
-            self.insert(data_indexes.as_ref(), value);
-        }
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct Footnote {
-    #[serde(skip)]
-    index: usize,
-    pub content: Box<Value>,
-    pub marker: Option<Box<Value>>,
-    pub show: bool,
-}
-
-impl Footnote {
-    pub fn new(content: impl Into<Value>) -> Self {
-        Self {
-            index: 0,
-            content: Box::new(content.into()),
-            marker: None,
-            show: true,
-        }
-    }
-    pub fn with_marker(mut self, marker: impl Into<Value>) -> Self {
-        self.marker = Some(Box::new(marker.into()));
-        self
-    }
-
-    pub fn with_show(mut self, show: bool) -> Self {
-        self.show = show;
-        self
-    }
-
-    pub fn with_index(mut self, index: usize) -> Self {
-        self.index = index;
-        self
-    }
-
-    pub fn display_marker(&self, options: impl IntoValueOptions) -> DisplayMarker<'_> {
-        DisplayMarker {
-            footnote: self,
-            options: options.into_value_options(),
-        }
-    }
-
-    pub fn display_content(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
-        self.content.display(options)
-    }
-
-    pub fn index(&self) -> usize {
-        self.index
-    }
-}
-
-pub struct DisplayMarker<'a> {
-    footnote: &'a Footnote,
-    options: ValueOptions,
-}
-
-impl Display for DisplayMarker<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        if let Some(marker) = &self.footnote.marker {
-            write!(f, "{}", marker.display(self.options).without_suffixes())
-        } else {
-            let i = self.footnote.index + 1;
-            match self.options.footnote_marker_type {
-                FootnoteMarkerType::Alphabetic => write!(f, "{}", Display26Adic::new_lowercase(i)),
-                FootnoteMarkerType::Numeric => write!(f, "{i}"),
-            }
-        }
-    }
-}
-
-/// Displays a number in 26adic notation.
-///
-/// Zero is displayed as the empty string, 1 through 26 as `a` through `z`, 27
-/// through 52 as `aa` through `az`, and so on.
-pub struct Display26Adic {
-    value: usize,
-    base: u8,
-}
-
-impl Display26Adic {
-    /// Constructs a `Display26Adic` for `value`, with letters in lowercase.
-    pub fn new_lowercase(value: usize) -> Self {
-        Self { value, base: b'a' }
-    }
-
-    /// Constructs a `Display26Adic` for `value`, with letters in uppercase.
-    pub fn new_uppercase(value: usize) -> Self {
-        Self { value, base: b'A' }
-    }
-}
-
-impl Display for Display26Adic {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let mut output = SmallVec::<[u8; 16]>::new();
-        let mut number = self.value;
-        while number > 0 {
-            number -= 1;
-            let digit = (number % 26) as u8;
-            output.push(digit + self.base);
-            number /= 26;
-        }
-        output.reverse();
-        write!(f, "{}", from_utf8(&output).unwrap())
-    }
-}
-
-/// The content of a single pivot table cell.
-///
-/// A [Value] is also a pivot table's title, caption, footnote marker and
-/// contents, and so on.
-///
-/// A given [Value] is one of:
-///
-/// 1. A number resulting from a calculation.
-///
-///    A number has an associated display format (usually [F] or [Pct]).  This
-///    format can be set directly, but that is not usually the easiest way.
-///    Instead, it is usually true that all of the values in a single category
-///    should have the same format (e.g. all "Significance" values might use
-///    format `F40.3`), so PSPP makes it easy to set the default format for a
-///    category while creating the category.  See pivot_dimension_create() for
-///    more details.
-///
-///    [F]: crate::format::Type::F
-///    [Pct]: crate::format::Type::Pct
-///
-/// 2. A numeric or string value obtained from data ([ValueInner::Number] or
-///    [ValueInner::String]).  If such a value corresponds to a variable, then the
-///    variable's name can be attached to the pivot_value.  If the value has a
-///    value label, then that can also be attached.  When a label is present,
-///    the user can control whether to show the value or the label or both.
-///
-/// 3. A variable name ([ValueInner::Variable]).  The variable label, if any, can
-///    be attached too, and again the user can control whether to show the value
-///    or the label or both.
-///
-/// 4. A text string ([ValueInner::Text).  The value stores the string in English
-///    and translated into the output language (localized).  Use
-///    pivot_value_new_text() or pivot_value_new_text_format() for those cases.
-///    In some cases, only an English or a localized version is available for
-///    one reason or another, although this is regrettable; in those cases, use
-///    pivot_value_new_user_text() or pivot_value_new_user_text_nocopy().
-///
-/// 5. A template. PSPP doesn't create these itself yet, but it can read and
-///    interpret those created by SPSS.
-#[derive(Clone, Default)]
-pub struct Value {
-    pub inner: ValueInner,
-    pub styling: Option<Box<ValueStyle>>,
-}
-
-impl Serialize for Value {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        self.inner.serialize(serializer)
-    }
-}
-
-/// Wrapper for [Value] that uses [Value::serialize_bare] for serialization.
-#[derive(Serialize)]
-struct BareValue<'a>(#[serde(serialize_with = "Value::serialize_bare")] pub &'a Value);
-
-impl Value {
-    pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        match &self.inner {
-            ValueInner::Number(number_value) => number_value.serialize_bare(serializer),
-            ValueInner::String(string_value) => string_value.s.serialize(serializer),
-            ValueInner::Variable(variable_value) => variable_value.var_name.serialize(serializer),
-            ValueInner::Text(text_value) => text_value.localized.serialize(serializer),
-            ValueInner::Template(template_value) => template_value.localized.serialize(serializer),
-            ValueInner::Empty => serializer.serialize_none(),
-        }
-    }
-
-    fn new(inner: ValueInner) -> Self {
-        Self {
-            inner,
-            styling: None,
-        }
-    }
-    pub fn new_number_with_format(x: Option<f64>, format: Format) -> Self {
-        Self::new(ValueInner::Number(NumberValue {
-            show: None,
-            format,
-            honor_small: false,
-            value: x,
-            variable: None,
-            value_label: None,
-        }))
-    }
-    pub fn new_variable(variable: &Variable) -> Self {
-        Self::new(ValueInner::Variable(VariableValue {
-            show: None,
-            var_name: String::from(variable.name.as_str()),
-            variable_label: variable.label.clone(),
-        }))
-    }
-    pub fn new_datum<B>(value: &Datum<B>) -> Self
-    where
-        B: EncodedString,
-    {
-        match value {
-            Datum::Number(number) => Self::new_number(*number),
-            Datum::String(string) => Self::new_user_text(string.as_str()),
-        }
-    }
-    pub fn new_variable_value(variable: &Variable, value: &Datum<ByteString>) -> Self {
-        let var_name = Some(variable.name.as_str().into());
-        let value_label = variable.value_labels.get(value).map(String::from);
-        match value {
-            Datum::Number(number) => Self::new(ValueInner::Number(NumberValue {
-                show: None,
-                format: match variable.print_format.var_type() {
-                    VarType::Numeric => variable.print_format,
-                    VarType::String => {
-                        #[cfg(debug_assertions)]
-                        panic!("cannot create numeric pivot value with string format");
-
-                        #[cfg(not(debug_assertions))]
-                        Format::F8_2
-                    }
-                },
-                honor_small: false,
-                value: *number,
-                variable: var_name,
-                value_label,
-            })),
-            Datum::String(string) => Self::new(ValueInner::String(StringValue {
-                show: None,
-                hex: variable.print_format.type_() == Type::AHex,
-                s: string
-                    .as_ref()
-                    .with_encoding(variable.encoding())
-                    .into_string(),
-                var_name,
-                value_label,
-            })),
-        }
-    }
-    pub fn new_number(x: Option<f64>) -> Self {
-        Self::new_number_with_format(x, Format::F8_2)
-    }
-    pub fn new_integer(x: Option<f64>) -> Self {
-        Self::new_number_with_format(x, Format::F40)
-    }
-    pub fn new_text(s: impl Into<String>) -> Self {
-        Self::new_user_text(s)
-    }
-    pub fn new_user_text(s: impl Into<String>) -> Self {
-        let s: String = s.into();
-        if s.is_empty() {
-            Self::default()
-        } else {
-            Self::new(ValueInner::Text(TextValue {
-                user_provided: true,
-                localized: s.clone(),
-                c: None,
-                id: None,
-            }))
-        }
-    }
-    pub fn with_footnote(mut self, footnote: &Arc<Footnote>) -> Self {
-        self.add_footnote(footnote);
-        self
-    }
-    pub fn add_footnote(&mut self, footnote: &Arc<Footnote>) {
-        let footnotes = &mut self.styling.get_or_insert_default().footnotes;
-        footnotes.push(footnote.clone());
-        footnotes.sort_by_key(|f| f.index);
-    }
-    pub fn with_show_value_label(mut self, show: Option<Show>) -> Self {
-        let new_show = show;
-        match &mut self.inner {
-            ValueInner::Number(NumberValue { show, .. })
-            | ValueInner::String(StringValue { show, .. }) => {
-                *show = new_show;
-            }
-            _ => (),
-        }
-        self
-    }
-    pub fn with_show_variable_label(mut self, show: Option<Show>) -> Self {
-        if let ValueInner::Variable(variable_value) = &mut self.inner {
-            variable_value.show = show;
-        }
-        self
-    }
-    pub fn with_value_label(mut self, label: Option<String>) -> Self {
-        match &mut self.inner {
-            ValueInner::Number(NumberValue { value_label, .. })
-            | ValueInner::String(StringValue { value_label, .. }) => *value_label = label.clone(),
-            _ => (),
-        }
-        self
-    }
-    pub const fn empty() -> Self {
-        Value {
-            inner: ValueInner::Empty,
-            styling: None,
-        }
-    }
-    pub const fn is_empty(&self) -> bool {
-        self.inner.is_empty() && self.styling.is_none()
-    }
-}
-
-impl From<&str> for Value {
-    fn from(value: &str) -> Self {
-        Self::new_text(value)
-    }
-}
-
-impl From<String> for Value {
-    fn from(value: String) -> Self {
-        Self::new_text(value)
-    }
-}
-
-impl From<&Variable> for Value {
-    fn from(variable: &Variable) -> Self {
-        Self::new_variable(variable)
-    }
-}
-
-pub struct DisplayValue<'a> {
-    inner: &'a ValueInner,
-    markup: bool,
-    subscripts: &'a [String],
-    footnotes: &'a [Arc<Footnote>],
-    options: ValueOptions,
-    show_value: bool,
-    show_label: Option<&'a str>,
-}
-
-impl<'a> DisplayValue<'a> {
-    pub fn subscripts(&self) -> impl Iterator<Item = &str> {
-        self.subscripts.iter().map(String::as_str)
-    }
-
-    pub fn has_subscripts(&self) -> bool {
-        !self.subscripts.is_empty()
-    }
-
-    pub fn footnotes(&self) -> impl Iterator<Item = DisplayMarker<'_>> {
-        self.footnotes
-            .iter()
-            .filter(|f| f.show)
-            .map(|f| f.display_marker(self.options))
-    }
-
-    pub fn has_footnotes(&self) -> bool {
-        self.footnotes().next().is_some()
-    }
-
-    pub fn without_suffixes(self) -> Self {
-        Self {
-            subscripts: &[],
-            footnotes: &[],
-            ..self
-        }
-    }
-
-    /// Returns this display split into `(body, suffixes)` where `suffixes` is
-    /// subscripts and footnotes and `body` is everything else.
-    pub fn split_suffixes(self) -> (Self, Self) {
-        let suffixes = Self {
-            inner: &ValueInner::Empty,
-            ..self
-        };
-        (self.without_suffixes(), suffixes)
-    }
-
-    pub fn with_styling(mut self, styling: &'a ValueStyle) -> Self {
-        if let Some(area_style) = &styling.style {
-            self.markup = area_style.font_style.markup;
-        }
-        self.subscripts = styling.subscripts.as_slice();
-        self.footnotes = styling.footnotes.as_slice();
-        self
-    }
-
-    pub fn with_font_style(self, font_style: &FontStyle) -> Self {
-        Self {
-            markup: font_style.markup,
-            ..self
-        }
-    }
-
-    pub fn with_subscripts(self, subscripts: &'a [String]) -> Self {
-        Self { subscripts, ..self }
-    }
-
-    pub fn with_footnotes(self, footnotes: &'a [Arc<Footnote>]) -> Self {
-        Self { footnotes, ..self }
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.inner.is_empty() && self.subscripts.is_empty() && self.footnotes.is_empty()
-    }
-
-    fn small(&self) -> f64 {
-        self.options.small
-    }
-
-    pub fn var_type(&self) -> VarType {
-        match self.inner {
-            ValueInner::Number(NumberValue { .. }) if self.show_label.is_none() => VarType::Numeric,
-            _ => VarType::String,
-        }
-    }
-
-    fn template(
-        &self,
-        f: &mut std::fmt::Formatter<'_>,
-        template: &str,
-        args: &[Vec<Value>],
-    ) -> std::fmt::Result {
-        let mut iter = template.as_bytes().iter();
-        while let Some(c) = iter.next() {
-            match c {
-                b'\\' => {
-                    let c = *iter.next().unwrap_or(&b'\\') as char;
-                    let c = if c == 'n' { '\n' } else { c };
-                    write!(f, "{c}")?;
-                }
-                b'^' => {
-                    let (index, rest) = consume_int(iter.as_slice());
-                    iter = rest.iter();
-                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
-                        continue;
-                    };
-                    if let Some(arg) = arg.first() {
-                        write!(f, "{}", arg.display(self.options))?;
-                    }
-                }
-                b'[' => {
-                    let (a, rest) = extract_inner_template(iter.as_slice());
-                    let (b, rest) = extract_inner_template(rest);
-                    let rest = rest.strip_prefix(b"]").unwrap_or(rest);
-                    let (index, rest) = consume_int(rest);
-                    iter = rest.iter();
-
-                    let Some(mut args) = args.get(index.wrapping_sub(1)).map(|vec| vec.as_slice())
-                    else {
-                        continue;
-                    };
-                    let (mut template, mut escape) =
-                        if !a.is_empty() { (a, b'%') } else { (b, b'^') };
-                    while !args.is_empty() {
-                        let n_consumed = self.inner_template(f, template, escape, args)?;
-                        if n_consumed == 0 {
-                            break;
-                        }
-                        args = &args[n_consumed..];
-
-                        template = b;
-                        escape = b'^';
-                    }
-                }
-                c => write!(f, "{c}")?,
-            }
-        }
-        Ok(())
-    }
-
-    fn inner_template(
-        &self,
-        f: &mut std::fmt::Formatter<'_>,
-        template: &[u8],
-        escape: u8,
-        args: &[Value],
-    ) -> Result<usize, std::fmt::Error> {
-        let mut iter = template.iter();
-        let mut args_consumed = 0;
-        while let Some(c) = iter.next() {
-            match c {
-                b'\\' => {
-                    let c = *iter.next().unwrap_or(&b'\\') as char;
-                    let c = if c == 'n' { '\n' } else { c };
-                    write!(f, "{c}")?;
-                }
-                c if *c == escape => {
-                    let (index, rest) = consume_int(iter.as_slice());
-                    iter = rest.iter();
-                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
-                        continue;
-                    };
-                    args_consumed = args_consumed.max(index);
-                    write!(f, "{}", arg.display(self.options))?;
-                }
-                c => write!(f, "{c}")?,
-            }
-        }
-        Ok(args_consumed)
-    }
-}
-
-fn consume_int(input: &[u8]) -> (usize, &[u8]) {
-    let mut n = 0;
-    for (index, c) in input.iter().enumerate() {
-        if !c.is_ascii_digit() {
-            return (n, &input[index..]);
-        }
-        n = n * 10 + (c - b'0') as usize;
-    }
-    (n, &[])
-}
-
-fn extract_inner_template(input: &[u8]) -> (&[u8], &[u8]) {
-    for (index, c) in input.iter().copied().enumerate() {
-        if c == b':' && (index == 0 || input[index - 1] != b'\\') {
-            return input.split_at(index);
-        }
-    }
-    (input, &[])
-}
-
-fn interpret_show(
-    global_show: impl Fn() -> Show,
-    table_show: Option<Show>,
-    value_show: Option<Show>,
-    label: &str,
-) -> (bool, Option<&str>) {
-    match value_show.or(table_show).unwrap_or_else(global_show) {
-        Show::Value => (true, None),
-        Show::Label => (false, Some(label)),
-        Show::Both => (true, Some(label)),
-    }
-}
-
-impl Display for DisplayValue<'_> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self.inner {
-            ValueInner::Number(NumberValue {
-                format,
-                honor_small,
-                value,
-                ..
-            }) => {
-                if self.show_value {
-                    let format = if format.type_() == Type::F
-                        && *honor_small
-                        && value.is_some_and(|value| value != 0.0 && value.abs() < self.small())
-                    {
-                        UncheckedFormat::new(Type::E, 40, format.d() as u8).fix()
-                    } else {
-                        *format
-                    };
-                    let mut buf = SmallString::<[u8; 40]>::new();
-                    write!(
-                        &mut buf,
-                        "{}",
-                        Datum::<&str>::Number(*value).display(format)
-                    )
-                    .unwrap();
-                    write!(f, "{}", buf.trim_start_matches(' '))?;
-                }
-                if let Some(label) = self.show_label {
-                    if self.show_value {
-                        write!(f, " ")?;
-                    }
-                    f.write_str(label)?;
-                }
-                Ok(())
-            }
-
-            ValueInner::String(StringValue { s, .. })
-            | ValueInner::Variable(VariableValue { var_name: s, .. }) => {
-                match (self.show_value, self.show_label) {
-                    (true, None) => write!(f, "{s}"),
-                    (false, Some(label)) => write!(f, "{label}"),
-                    (true, Some(label)) => write!(f, "{s} {label}"),
-                    (false, None) => unreachable!(),
-                }
-            }
-
-            ValueInner::Text(TextValue {
-                localized: local, ..
-            }) => {
-                /*
-                if self
-                    .inner
-                    .styling
-                    .as_ref()
-                    .is_some_and(|styling| styling.style.font_style.markup)
-                {
-                    todo!();
-                }*/
-                f.write_str(local)
-            }
-
-            ValueInner::Template(TemplateValue {
-                args,
-                localized: local,
-                ..
-            }) => self.template(f, local, args),
-
-            ValueInner::Empty => Ok(()),
-        }?;
-
-        for (subscript, delimiter) in self.subscripts.iter().zip(once('_').chain(repeat(','))) {
-            write!(f, "{delimiter}{subscript}")?;
-        }
-
-        for footnote in self.footnotes {
-            write!(f, "[{}]", footnote.display_marker(self.options))?;
-        }
-
-        Ok(())
-    }
-}
-
-impl Value {
-    // Returns an object that will format this value, including subscripts and
-    // superscripts and footnotes.  `options` controls whether variable and
-    // value labels are included.
-    pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
-        let display = self.inner.display(options.into_value_options());
-        match &self.styling {
-            Some(styling) => display.with_styling(styling),
-            None => display,
-        }
-    }
-}
-
-impl Debug for Value {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(f, "{:?}", self.display(()).to_string())
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct NumberValue {
-    /// The numerical value, or `None` if it is a missing value.
-    pub value: Option<f64>,
-    pub format: Format,
-    pub show: Option<Show>,
-    pub honor_small: bool,
-    pub variable: Option<String>,
-    pub value_label: Option<String>,
-}
-
-impl Serialize for NumberValue {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        if self.format.type_() == Type::F && self.variable.is_none() && self.value_label.is_none() {
-            self.value.serialize(serializer)
-        } else {
-            let mut s = serializer.serialize_map(None)?;
-            s.serialize_entry("value", &self.value)?;
-            s.serialize_entry("format", &self.format)?;
-            if let Some(show) = self.show {
-                s.serialize_entry("show", &show)?;
-            }
-            if self.honor_small {
-                s.serialize_entry("honor_small", &self.honor_small)?;
-            }
-            if let Some(variable) = &self.variable {
-                s.serialize_entry("variable", variable)?;
-            }
-            if let Some(value_label) = &self.value_label {
-                s.serialize_entry("value_label", value_label)?;
-            }
-            s.end()
-        }
-    }
-}
-
-impl NumberValue {
-    pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: Serializer,
-    {
-        if let Some(number) = self.value
-            && number.trunc() == number
-            && number >= -(1i64 << 53) as f64
-            && number <= (1i64 << 53) as f64
-        {
-            (number as u64).serialize(serializer)
-        } else {
-            self.value.serialize(serializer)
-        }
-    }
-}
-
-#[derive(Serialize)]
-pub struct BareNumberValue<'a>(
-    #[serde(serialize_with = "NumberValue::serialize_bare")] pub &'a NumberValue,
-);
-
-#[derive(Clone, Debug, Serialize)]
-pub struct StringValue {
-    /// The string value.
-    ///
-    /// If `hex` is true, this should contain hex digits, not raw binary data
-    /// (otherwise it would be impossible to encode non-UTF-8 data).
-    pub s: String,
-
-    /// True if `s` is hex digits.
-    pub hex: bool,
-
-    pub show: Option<Show>,
-
-    pub var_name: Option<String>,
-    pub value_label: Option<String>,
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct VariableValue {
-    pub show: Option<Show>,
-    pub var_name: String,
-    pub variable_label: Option<String>,
-}
-
-#[derive(Clone, Debug)]
-pub struct TextValue {
-    pub user_provided: bool,
-    /// Localized.
-    pub localized: String,
-    /// English.
-    pub c: Option<String>,
-    /// Identifier.
-    pub id: Option<String>,
-}
-
-impl Serialize for TextValue {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        if self.user_provided && self.c.is_none() && self.id.is_none() {
-            serializer.serialize_str(&self.localized)
-        } else {
-            let mut s = serializer.serialize_struct(
-                "TextValue",
-                2 + self.c.is_some() as usize + self.id.is_some() as usize,
-            )?;
-            s.serialize_field("user_provided", &self.user_provided)?;
-            s.serialize_field("localized", &self.localized)?;
-            if let Some(c) = &self.c {
-                s.serialize_field("c", &c)?;
-            }
-            if let Some(id) = &self.id {
-                s.serialize_field("id", &id)?;
-            }
-            s.end()
-        }
-    }
-}
-
-impl TextValue {
-    pub fn localized(&self) -> &str {
-        self.localized.as_str()
-    }
-    pub fn c(&self) -> &str {
-        self.c.as_ref().unwrap_or(&self.localized).as_str()
-    }
-    pub fn id(&self) -> &str {
-        self.id.as_ref().unwrap_or(&self.localized).as_str()
-    }
-}
-
-#[derive(Clone, Debug, Serialize)]
-pub struct TemplateValue {
-    pub args: Vec<Vec<Value>>,
-    pub localized: String,
-    pub id: String,
-}
-
-#[derive(Clone, Debug, Default, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum ValueInner {
-    Number(NumberValue),
-    String(StringValue),
-    Variable(VariableValue),
-    Text(TextValue),
-    Template(TemplateValue),
-
-    #[default]
-    Empty,
-}
-
-impl ValueInner {
-    pub const fn is_empty(&self) -> bool {
-        matches!(self, Self::Empty)
-    }
-    fn show(&self) -> Option<Show> {
-        match self {
-            ValueInner::Number(NumberValue { show, .. })
-            | ValueInner::String(StringValue { show, .. })
-            | ValueInner::Variable(VariableValue { show, .. }) => *show,
-            _ => None,
-        }
-    }
-
-    fn label(&self) -> Option<&str> {
-        self.value_label().or_else(|| self.variable_label())
-    }
-
-    fn value_label(&self) -> Option<&str> {
-        match self {
-            ValueInner::Number(NumberValue { value_label, .. })
-            | ValueInner::String(StringValue { value_label, .. }) => {
-                value_label.as_ref().map(String::as_str)
-            }
-            _ => None,
-        }
-    }
-
-    fn variable_label(&self) -> Option<&str> {
-        match self {
-            ValueInner::Variable(VariableValue { variable_label, .. }) => {
-                variable_label.as_ref().map(String::as_str)
-            }
-            _ => None,
-        }
-    }
-}
-
-#[derive(Clone, Debug, Default)]
-pub struct ValueStyle {
-    pub style: Option<AreaStyle>,
-    pub subscripts: Vec<String>,
-    pub footnotes: Vec<Arc<Footnote>>,
-}
-
-impl ValueStyle {
-    pub fn is_empty(&self) -> bool {
-        self.style.is_none() && self.subscripts.is_empty() && self.footnotes.is_empty()
-    }
-}
-
-impl ValueInner {
-    // Returns an object that will format this value.  Settings on `options`
-    // control whether variable and value labels are included.
-    pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
-        let options = options.into_value_options();
-        let (show_value, show_label) = if let Some(value_label) = self.value_label() {
-            interpret_show(
-                || Settings::global().show_values,
-                options.show_values,
-                self.show(),
-                value_label,
-            )
-        } else if let Some(variable_label) = self.variable_label() {
-            interpret_show(
-                || Settings::global().show_variables,
-                options.show_variables,
-                self.show(),
-                variable_label,
-            )
-        } else {
-            (true, None)
-        };
-        DisplayValue {
-            inner: self,
-            markup: false,
-            subscripts: &[],
-            footnotes: &[],
-            options,
-            show_value,
-            show_label,
-        }
-    }
-}
-
-pub struct MetadataEntry {
-    pub name: Value,
-    pub value: MetadataValue,
-}
-
-pub enum MetadataValue {
-    Leaf(Value),
-    Group(Vec<MetadataEntry>),
-}
-
-impl MetadataEntry {
-    pub fn into_pivot_table(self) -> PivotTable {
-        let mut data = Vec::new();
-        let group = match self.visit(&mut data) {
-            Category::Group(group) => group,
-            Category::Leaf(leaf) => Group::new("Metadata").with(leaf).with_label_shown(),
-        };
-        PivotTable::new([(Axis3::Y, Dimension::new(group))]).with_data(
-            data.into_iter()
-                .enumerate()
-                .filter(|(_row, value)| !value.is_empty())
-                .map(|(row, value)| ([row], value)),
-        )
-    }
-    fn visit(self, data: &mut Vec<Value>) -> Category {
-        match self.value {
-            MetadataValue::Leaf(value) => {
-                data.push(value);
-                Leaf::new(self.name).into()
-            }
-            MetadataValue::Group(items) => Group::with_capacity(self.name, items.len())
-                .with_multiple(items.into_iter().map(|item| item.visit(data)))
-                .into(),
-        }
-    }
-}
-
-impl Serialize for MetadataValue {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        match self {
-            MetadataValue::Leaf(value) => value.serialize_bare(serializer),
-            MetadataValue::Group(items) => {
-                let mut map = serializer.serialize_map(Some(items.len()))?;
-                for item in items {
-                    let name = item.name.display(()).to_string();
-                    map.serialize_entry(&name, &item.value)?;
-                }
-                map.end()
-            }
-        }
-    }
-}
-impl Serialize for MetadataEntry {
-    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
-    where
-        S: serde::Serializer,
-    {
-        match &self.value {
-            MetadataValue::Leaf(value) => {
-                let mut map = serializer.serialize_map(Some(1))?;
-                let name = self.name.display(()).to_string();
-                map.serialize_entry(&name, &BareValue(value))?;
-                map.end()
-            }
-            MetadataValue::Group(items) => {
-                let mut map = serializer.serialize_map(Some(items.len()))?;
-                for item in items {
-                    let name = item.name.display(()).to_string();
-                    map.serialize_entry(&name, &item.value)?;
-                }
-                map.end()
-            }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::output::pivot::{Display26Adic, MetadataEntry, MetadataValue, Value};
-
-    #[test]
-    fn display_26adic() {
-        for (number, lowercase, uppercase) in [
-            (0, "", ""),
-            (1, "a", "A"),
-            (2, "b", "B"),
-            (26, "z", "Z"),
-            (27, "aa", "AA"),
-            (28, "ab", "AB"),
-            (29, "ac", "AC"),
-            (18278, "zzz", "ZZZ"),
-            (18279, "aaaa", "AAAA"),
-            (19010, "abcd", "ABCD"),
-        ] {
-            assert_eq!(Display26Adic::new_lowercase(number).to_string(), lowercase);
-            assert_eq!(Display26Adic::new_uppercase(number).to_string(), uppercase);
-        }
-    }
-
-    #[test]
-    fn metadata_entry() {
-        let tree = MetadataEntry {
-            name: Value::from("Group"),
-            value: MetadataValue::Group(vec![
-                MetadataEntry {
-                    name: Value::from("Name 1"),
-                    value: MetadataValue::Leaf(Value::from("Value 1")),
-                },
-                MetadataEntry {
-                    name: Value::from("Subgroup 1"),
-                    value: MetadataValue::Group(vec![
-                        MetadataEntry {
-                            name: Value::from("Subname 1"),
-                            value: MetadataValue::Leaf(Value::from("Subvalue 1")),
-                        },
-                        MetadataEntry {
-                            name: Value::from("Subname 2"),
-                            value: MetadataValue::Leaf(Value::from("Subvalue 2")),
-                        },
-                        MetadataEntry {
-                            name: Value::from("Subname 3"),
-                            value: MetadataValue::Leaf(Value::new_integer(Some(3.0))),
-                        },
-                    ]),
-                },
-                MetadataEntry {
-                    name: Value::from("Name 2"),
-                    value: MetadataValue::Leaf(Value::from("Value 2")),
-                },
-            ]),
-        };
-        assert_eq!(
-            serde_json::to_string_pretty(&tree).unwrap(),
-            r#"{
-  "Name 1": "Value 1",
-  "Subgroup 1": {
-    "Subname 1": "Subvalue 1",
-    "Subname 2": "Subvalue 2",
-    "Subname 3": 3
-  },
-  "Name 2": "Value 2"
-}"#
-        );
-
-        assert_eq!(
-            tree.into_pivot_table().to_string(),
-            r#"╭────────────────────┬──────────╮
-│           Name 1   │Value 1   │
-├────────────────────┼──────────┤
-│Subgroup 1 Subname 1│Subvalue 1│
-│           Subname 2│Subvalue 2│
-│           Subname 3│         3│
-├────────────────────┼──────────┤
-│           Name 2   │Value 2   │
-╰────────────────────┴──────────╯
-"#
-        );
-    }
-}
diff --git a/rust/pspp/src/sys.rs b/rust/pspp/src/sys.rs

new file mode 100644 (file)

index 0000000..4f59614
--- /dev/null
+++ b/rust/pspp/src/sys.rs
@@ -0,0 +1,55 @@
+// PSPP - a program for statistical analysis.
+// Copyright (C) 2025 Free Software Foundation, Inc.
+//
+// This program is free software: you can redistribute it and/or modify it under
+// the terms of the GNU General Public License as published by the Free Software
+// Foundation, either version 3 of the License, or (at your option) any later
+// version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+// details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program.  If not, see <http://www.gnu.org/licenses/>.
+
+//! Reading and writing system files.
+//!
+//! This module enables reading and writing "system files", the binary format
+//! for SPSS data files.  The system file format dates back 40+ years and has
+//! evolved greatly over that time to support new features, but in a way to
+//! facilitate interchange between even the oldest and newest versions of
+//! software.
+//!
+//! Use [ReadOptions] to read a system file in the simplest way.
+//! Use [WriteOptions] to write a system file.
+
+// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
+#![cfg_attr(not(test), warn(missing_docs))]
+
+mod cooked;
+use binrw::Endian;
+pub use cooked::*;
+pub mod encoding;
+pub mod raw;
+
+#[cfg(test)]
+pub mod sack;
+
+mod write;
+use serde::Serializer;
+pub use write::{SystemFileVersion, WriteOptions, Writer};
+
+#[cfg(test)]
+mod test;
+
+fn serialize_endian<S>(endian: &Endian, serializer: S) -> Result<S::Ok, S::Error>
+where
+    S: Serializer,
+{
+    match endian {
+        Endian::Big => serializer.serialize_unit_variant("Endian", 0, "Big"),
+        Endian::Little => serializer.serialize_unit_variant("Endian", 1, "Little"),
+    }
+}
diff --git a/rust/pspp/src/sys/mod.rs b/rust/pspp/src/sys/mod.rs

deleted file mode 100644 (file)

index 4f59614..0000000
--- a/rust/pspp/src/sys/mod.rs
+++ /dev/null
@@ -1,55 +0,0 @@
-// PSPP - a program for statistical analysis.
-// Copyright (C) 2025 Free Software Foundation, Inc.
-//
-// This program is free software: you can redistribute it and/or modify it under
-// the terms of the GNU General Public License as published by the Free Software
-// Foundation, either version 3 of the License, or (at your option) any later
-// version.
-//
-// This program is distributed in the hope that it will be useful, but WITHOUT
-// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-// FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
-// details.
-//
-// You should have received a copy of the GNU General Public License along with
-// this program.  If not, see <http://www.gnu.org/licenses/>.
-
-//! Reading and writing system files.
-//!
-//! This module enables reading and writing "system files", the binary format
-//! for SPSS data files.  The system file format dates back 40+ years and has
-//! evolved greatly over that time to support new features, but in a way to
-//! facilitate interchange between even the oldest and newest versions of
-//! software.
-//!
-//! Use [ReadOptions] to read a system file in the simplest way.
-//! Use [WriteOptions] to write a system file.
-
-// Warn about missing docs, but not for items declared with `#[cfg(test)]`.
-#![cfg_attr(not(test), warn(missing_docs))]
-
-mod cooked;
-use binrw::Endian;
-pub use cooked::*;
-pub mod encoding;
-pub mod raw;
-
-#[cfg(test)]
-pub mod sack;
-
-mod write;
-use serde::Serializer;
-pub use write::{SystemFileVersion, WriteOptions, Writer};
-
-#[cfg(test)]
-mod test;
-
-fn serialize_endian<S>(endian: &Endian, serializer: S) -> Result<S::Ok, S::Error>
-where
-    S: Serializer,
-{
-    match endian {
-        Endian::Big => serializer.serialize_unit_variant("Endian", 0, "Big"),
-        Endian::Little => serializer.serialize_unit_variant("Endian", 1, "Little"),
-    }
-}
author	Ben Pfaff <blp@cs.stanford.edu>
	Wed, 3 Sep 2025 18:24:28 +0000 (11:24 -0700)
committer	Ben Pfaff <blp@cs.stanford.edu>
	Wed, 3 Sep 2025 18:24:28 +0000 (11:24 -0700)
rust/pspp/src/command.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/command/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/crypto.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/crypto/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/format.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/format/display.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/format/display/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/format/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/lex.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/lex/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/lex/scan.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/lex/scan/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/lex/segment.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/lex/segment/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/output.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/output/cairo.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/output/cairo/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/output/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/output/pivot.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/output/pivot/mod.rs	[deleted file]	patch \| blob \| history
rust/pspp/src/sys.rs	[new file with mode: 0644]	patch \| blob
rust/pspp/src/sys/mod.rs	[deleted file]	patch \| blob \| history