work
authorBen Pfaff <blp@cs.stanford.edu>
Tue, 9 Dec 2025 16:58:31 +0000 (08:58 -0800)
committerBen Pfaff <blp@cs.stanford.edu>
Tue, 9 Dec 2025 16:58:31 +0000 (08:58 -0800)
rust/doc/src/spv/light-detail.md
rust/pspp/src/format/display.rs
rust/pspp/src/output/drivers/csv.rs
rust/pspp/src/output/pivot.rs
rust/pspp/src/sys/cooked.rs

index bd335a5e6e501471cd7b3ec5570aed85e1ba61d5..0cb32535649dc79c02da7888b1106949eb43d365 100644 (file)
@@ -1029,7 +1029,7 @@ the first nonzero byte in the encoding.
   * `^I`  
     Expands to a formatted version of argument `I`, which must have
     only a single value.  For example, `^1` expands to the first
-    argument's `value`.
+    argument.
 
   * `[:A:]I`  
     Expands `A` for each of the values in `I`.  `A` should contain one
@@ -1044,11 +1044,11 @@ the first nonzero byte in the encoding.
       new-line.
 
     * `[:^1 = ^2:]2`  
-      Expands to `X = Y` where X is the second argument's first alue
-      and Y is its second value.  (This would be used only if the
-      argument has two values.  If there were more values, the second
-      and third values would be directly concatenated, which would
-      look funny.)
+      Expands to `X = Y` where `X` is the second argument's first
+      value and `Y` is its second value.  (This would be used only if
+      the argument has two values.  If there were more values, the
+      second and third values would be directly concatenated, which
+      would look funny.)
 
   * `[A:B:]I`  
     This extends the previous form so that the first values are
index 5c619e1705d7ba03a9c50484c2d60ecd71b5aaba..c101ce74645a4329432359f10288e95fb94acca1 100644 (file)
@@ -43,10 +43,25 @@ pub struct DisplayDatum<'b, B> {
     endian: EndianSettings,
     datum: Datum<B>,
 
-    /// If true, the output will remove leading and trailing spaces from numeric
-    /// values, and trailing spaces from string values.  (This might make the
-    /// output narrower than the requested width.)
-    trim_spaces: bool,
+    /// If false, the output will omit leading spaces in output, except for
+    /// string values.
+    ///
+    /// Omitting trailing spaces also causes the overflow indication to be
+    /// output as just `*` instead of enough to fill the output width.
+    ///
+    /// Omitting leading spaces can make the output narrower than the requested
+    /// width.
+    leading_spaces: bool,
+
+    /// If false, the output will omit trailing spaces in output.  For numeric
+    /// values, in practice this only affects output of missing values.
+    ///
+    /// Omitting trailing spaces also causes the overflow indication to be
+    /// output as just `*` instead of enough to fill the output width.
+    ///
+    /// Omitting trailing spaces can make the output narrower than the requested
+    /// width.
+    trailing_spaces: bool,
 
     /// If true, the output will include a double quote before and after string
     /// values.
@@ -61,7 +76,8 @@ impl<'b, B> DisplayDatum<'b, B> {
         match self.format.type_.category() {
             Category::Basic | Category::Custom => Self {
                 format: self.format.with_max_width(),
-                trim_spaces: true,
+                leading_spaces: false,
+                trailing_spaces: false,
                 ..self
             },
             _ => self,
@@ -165,7 +181,7 @@ where
                 } else {
                     let quote = if self.quote_strings { "\"" } else { "" };
                     let s = string.as_str();
-                    let s = if self.trim_spaces {
+                    let s = if !self.trailing_spaces {
                         s.trim_end_matches(' ')
                     } else {
                         &s
@@ -226,7 +242,8 @@ where
             datum,
             settings: &settings.formats,
             endian: settings.endian,
-            trim_spaces: false,
+            leading_spaces: true,
+            trailing_spaces: true,
             quote_strings: false,
         }
     }
@@ -236,9 +253,22 @@ where
     pub fn with_endian(self, endian: EndianSettings) -> Self {
         Self { endian, ..self }
     }
-    pub fn with_trimming(self) -> Self {
+    pub fn without_spaces(self) -> Self {
+        Self {
+            leading_spaces: false,
+            trailing_spaces: false,
+            ..self
+        }
+    }
+    pub fn without_leading_spaces(self) -> Self {
+        Self {
+            leading_spaces: false,
+            ..self
+        }
+    }
+    pub fn without_trailing_spaces(self) -> Self {
         Self {
-            trim_spaces: true,
+            trailing_spaces: false,
             ..self
         }
     }
@@ -290,7 +320,12 @@ where
             } else {
                 "Unknown"
             };
-            let w = if self.trim_spaces { 0 } else { self.format.w() };
+            // XXX does this width trick really work?
+            let w = if self.leading_spaces {
+                self.format.w()
+            } else {
+                0
+            };
             write!(f, "{s:>w$.w$}")
         } else {
             self.overflow(f)
@@ -304,10 +339,6 @@ where
             _ => (),
         }
 
-        if self.trim_spaces {
-            return write!(f, ".");
-        }
-
         let w = self.format.w() as isize;
         let d = self.format.d() as isize;
         let dot_position = match self.format.type_ {
@@ -318,22 +349,26 @@ where
         };
         let dot_position = dot_position.max(0) as u16;
 
-        for i in 0..self.format.w {
-            if i == dot_position {
-                write!(f, ".")?;
-            } else {
-                write!(f, " ")?;
+        if self.leading_spaces {
+            for _ in 0..dot_position {
+                f.write_char(' ')?;
+            }
+        }
+        f.write_char('.')?;
+        if self.trailing_spaces {
+            for _ in dot_position + 1..self.format.w {
+                f.write_char(' ')?;
             }
         }
         Ok(())
     }
 
     fn overflow(&self, f: &mut Formatter<'_>) -> FmtResult {
-        if self.trim_spaces {
-            write!(f, "*")?;
+        if !self.leading_spaces || !self.trailing_spaces {
+            f.write_char('*')?;
         } else {
             for _ in 0..self.format.w {
-                write!(f, "*")?;
+                f.write_char('*')?;
             }
         }
         Ok(())
@@ -383,7 +418,7 @@ where
             // Assemble number.
             let magnitude = rounder.format(decimals as usize);
             let mut output = SmallString::<[u8; 40]>::new();
-            if !self.trim_spaces {
+            if self.leading_spaces {
                 for _ in width..self.format.w() {
                     output.push(' ');
                 }
@@ -420,7 +455,7 @@ where
                 }
             }
 
-            debug_assert!(self.trim_spaces || output.len() >= self.format.w());
+            debug_assert!(!self.leading_spaces || output.len() >= self.format.w());
             debug_assert!(output.len() <= self.format.w() + style.extra_bytes);
             f.write_str(&output)?;
             return Ok(true);
@@ -460,7 +495,7 @@ where
         width += fraction_width;
 
         let mut output = SmallString::<[u8; 40]>::new();
-        if !self.trim_spaces {
+        if self.leading_spaces {
             for _ in width..self.format.w() {
                 output.push(' ');
             }
@@ -666,7 +701,7 @@ where
                 _ => unreachable!(),
             }
         }
-        if !self.trim_spaces {
+        if self.leading_spaces {
             write!(f, "{:>1$}", &output, self.format.w())
         } else {
             f.write_str(&output)
@@ -675,7 +710,7 @@ where
 
     fn month(&self, f: &mut Formatter<'_>, number: f64) -> FmtResult {
         if let Some(month) = month_name(number as u32) {
-            if !self.trim_spaces {
+            if self.leading_spaces {
                 write!(f, "{month:.*}", self.format.w())
             } else {
                 f.write_str(month)
index d27a424ed2b75aade4e794dfe038097bae48ff43..749e50535a52ea462eac16854f5d611b289f0389 100644 (file)
@@ -119,7 +119,7 @@ impl CsvOptions {
                 self.field(
                     &datum
                         .display(variable.print_format)
-                        .with_trimming()
+                        .without_spaces()
                         .to_small_string::<64>(),
                 )
             )
index 95a8e80c8dea065c679c032c35a4f7b9acd3b016..448e95f92c20ae7d71bc9763828bac9a0dd54b07 100644 (file)
@@ -44,7 +44,7 @@
 
 use std::{
     collections::HashMap,
-    fmt::{Debug, Display, Write},
+    fmt::{Debug, Display},
     io::Read,
     iter::{FusedIterator, once, repeat, repeat_n},
     ops::{Index, IndexMut, Not, Range, RangeInclusive},
@@ -66,7 +66,6 @@ use serde::{
     de::Visitor,
     ser::{SerializeMap, SerializeStruct},
 };
-use smallstr::SmallString;
 use smallvec::SmallVec;
 use thiserror::Error as ThisError;
 pub use tlo::parse_bool;
@@ -2100,19 +2099,6 @@ impl CellIndex for PrecomputedIndex {
     }
 }
 
-fn cell_index<I>(data_indexes: &[usize], dimensions: I) -> usize
-where
-    I: ExactSizeIterator<Item = usize>,
-{
-    debug_assert_eq!(data_indexes.len(), dimensions.len());
-    let mut index = 0;
-    for (dimension, data_index) in dimensions.zip(data_indexes.iter()) {
-        debug_assert!(*data_index < dimension);
-        index = dimension * index + data_index;
-    }
-    index
-}
-
 impl PivotTable {
     pub fn new(axes_and_dimensions: impl IntoIterator<Item = (Axis3, Dimension)>) -> Self {
         let mut dimensions = Vec::new();
@@ -2768,161 +2754,12 @@ impl<'a> DisplayValue<'a> {
             _ => VarType::String,
         }
     }
-
-    fn template(
-        &self,
-        f: &mut std::fmt::Formatter<'_>,
-        template: &str,
-        args: &[Vec<Value>],
-    ) -> std::fmt::Result {
-        let mut iter = template.as_bytes().iter();
-        while let Some(c) = iter.next() {
-            match c {
-                b'\\' => {
-                    let c = *iter.next().unwrap_or(&b'\\') as char;
-                    let c = if c == 'n' { '\n' } else { c };
-                    write!(f, "{c}")?;
-                }
-                b'^' => {
-                    let (index, rest) = consume_int(iter.as_slice());
-                    iter = rest.iter();
-                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
-                        continue;
-                    };
-                    if let Some(arg) = arg.first() {
-                        write!(f, "{}", arg.display(self.options))?;
-                    }
-                }
-                b'[' => {
-                    let (a, rest) = extract_inner_template(iter.as_slice());
-                    let (b, rest) = extract_inner_template(rest);
-                    let rest = rest.strip_prefix(b"]").unwrap_or(rest);
-                    let (index, rest) = consume_int(rest);
-                    iter = rest.iter();
-
-                    let Some(mut args) = args.get(index.wrapping_sub(1)).map(|vec| vec.as_slice())
-                    else {
-                        continue;
-                    };
-                    let (mut template, mut escape) =
-                        if !a.is_empty() { (a, b'%') } else { (b, b'^') };
-                    while !args.is_empty() {
-                        let n_consumed = self.inner_template(f, template, escape, args)?;
-                        if n_consumed == 0 {
-                            break;
-                        }
-                        args = &args[n_consumed..];
-
-                        template = b;
-                        escape = b'^';
-                    }
-                }
-                c => write!(f, "{c}")?,
-            }
-        }
-        Ok(())
-    }
-
-    fn inner_template(
-        &self,
-        f: &mut std::fmt::Formatter<'_>,
-        template: &[u8],
-        escape: u8,
-        args: &[Value],
-    ) -> Result<usize, std::fmt::Error> {
-        let mut iter = template.iter();
-        let mut args_consumed = 0;
-        while let Some(c) = iter.next() {
-            match c {
-                b'\\' => {
-                    let c = *iter.next().unwrap_or(&b'\\') as char;
-                    let c = if c == 'n' { '\n' } else { c };
-                    write!(f, "{c}")?;
-                }
-                c if *c == escape => {
-                    let (index, rest) = consume_int(iter.as_slice());
-                    iter = rest.iter();
-                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
-                        continue;
-                    };
-                    args_consumed = args_consumed.max(index);
-                    write!(f, "{}", arg.display(self.options))?;
-                }
-                c => write!(f, "{c}")?,
-            }
-        }
-        Ok(args_consumed)
-    }
-}
-
-fn consume_int(input: &[u8]) -> (usize, &[u8]) {
-    let mut n = 0;
-    for (index, c) in input.iter().enumerate() {
-        if !c.is_ascii_digit() {
-            return (n, &input[index..]);
-        }
-        n = n * 10 + (c - b'0') as usize;
-    }
-    (n, &[])
-}
-
-fn extract_inner_template(input: &[u8]) -> (&[u8], &[u8]) {
-    for (index, c) in input.iter().copied().enumerate() {
-        if c == b':' && (index == 0 || input[index - 1] != b'\\') {
-            return input.split_at(index);
-        }
-    }
-    (input, &[])
-}
-
-fn interpret_show(
-    global_show: impl Fn() -> Show,
-    table_show: Option<Show>,
-    value_show: Option<Show>,
-    label: &str,
-) -> (bool, Option<&str>) {
-    match value_show.or(table_show).unwrap_or_else(global_show) {
-        Show::Value => (true, None),
-        Show::Label => (false, Some(label)),
-        Show::Both => (true, Some(label)),
-    }
 }
 
 impl Display for DisplayValue<'_> {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self.inner {
-            ValueInner::Number(NumberValue {
-                format,
-                honor_small,
-                value,
-                ..
-            }) => {
-                if self.show_value {
-                    let format = if format.type_() == Type::F
-                        && *honor_small
-                        && value.is_some_and(|value| value != 0.0 && value.abs() < self.small())
-                    {
-                        UncheckedFormat::new(Type::E, 40, format.d() as u8).fix()
-                    } else {
-                        *format
-                    };
-                    let mut buf = SmallString::<[u8; 40]>::new();
-                    write!(
-                        &mut buf,
-                        "{}",
-                        Datum::<&str>::Number(*value).display(format)
-                    )
-                    .unwrap();
-                    write!(f, "{}", buf.trim_start_matches(' '))?;
-                }
-                if let Some(label) = self.show_label {
-                    if self.show_value {
-                        write!(f, " ")?;
-                    }
-                    f.write_str(label)?;
-                }
-                Ok(())
-            }
+            ValueInner::Number(number_value) => number_value.display(self, f),
 
             ValueInner::String(StringValue { s, .. })
             | ValueInner::Variable(VariableValue { var_name: s, .. }) => {
@@ -2940,11 +2777,7 @@ impl Display for DisplayValue<'_> {
                 localized: local, ..
             }) => f.write_str(local),
 
-            ValueInner::Template(TemplateValue {
-                args,
-                localized: local,
-                ..
-            }) => self.template(f, local, args),
+            ValueInner::Template(template_value) => template_value.display(self, f),
 
             ValueInner::Empty => Ok(()),
         }?;
@@ -3036,6 +2869,36 @@ impl Serialize for NumberValue {
 }
 
 impl NumberValue {
+    pub fn display<'a>(
+        &self,
+        display: &DisplayValue<'a>,
+        f: &mut std::fmt::Formatter<'_>,
+    ) -> std::fmt::Result {
+        if display.show_value {
+            let format = if self.format.type_() == Type::F
+                && self.honor_small
+                && let Some(value) = self.value
+                && value != 0.0
+                && value.abs() < display.small()
+            {
+                UncheckedFormat::new(Type::E, 40, self.format.d() as u8).fix()
+            } else {
+                self.format
+            };
+            Datum::<&str>::Number(self.value)
+                .display(format)
+                .without_leading_spaces()
+                .fmt(f)?;
+        }
+        if let Some(label) = display.show_label {
+            if display.show_value {
+                write!(f, " ")?;
+            }
+            f.write_str(label)?;
+        }
+        Ok(())
+    }
+
     pub fn serialize_bare<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
     where
         S: Serializer,
@@ -3136,6 +2999,119 @@ pub struct TemplateValue {
     pub id: Option<String>,
 }
 
+impl TemplateValue {
+    fn display<'a>(
+        &self,
+        display: &DisplayValue<'a>,
+        f: &mut std::fmt::Formatter<'_>,
+    ) -> std::fmt::Result {
+        fn extract_inner_template(input: &str) -> (&str, &str) {
+            let mut prev = None;
+            for (index, c) in input.char_indices() {
+                if c == ':' && prev != Some('\\') {
+                    return (&input[..index], &input[index + 1..]);
+                }
+                prev = Some(c);
+            }
+            (input, "")
+        }
+
+        let mut iter = self.localized.chars();
+        while let Some(c) = iter.next() {
+            match c {
+                '\\' => {
+                    let c = iter.next().unwrap_or('\\') as char;
+                    let c = if c == 'n' { '\n' } else { c };
+                    write!(f, "{c}")?;
+                }
+                '^' => {
+                    let (index, rest) = Self::consume_int(iter.as_str());
+                    iter = rest.chars();
+                    let Some(arg) = self.args.get(index.wrapping_sub(1)) else {
+                        continue;
+                    };
+                    if let Some(arg) = arg.first() {
+                        write!(f, "{}", arg.display(display.options))?;
+                    }
+                }
+                '[' => {
+                    let (a, rest) = extract_inner_template(iter.as_str());
+                    let (b, rest) = extract_inner_template(rest);
+                    let rest = rest.strip_prefix("]").unwrap_or(rest);
+                    let (index, rest) = Self::consume_int(rest);
+                    iter = rest.chars();
+
+                    dbg!((a, b, index));
+                    let Some(mut args) = self
+                        .args
+                        .get(index.wrapping_sub(1))
+                        .map(|vec| vec.as_slice())
+                    else {
+                        continue;
+                    };
+                    let (mut template, mut escape) =
+                        if !a.is_empty() { (a, '%') } else { (b, '^') };
+                    while !args.is_empty() {
+                        let n_consumed = self.inner_template(display, f, template, escape, args)?;
+                        if n_consumed == 0 {
+                            break;
+                        }
+                        args = &args[n_consumed..];
+
+                        template = b;
+                        escape = '^';
+                    }
+                }
+                c => write!(f, "{c}")?,
+            }
+        }
+        Ok(())
+    }
+
+    fn inner_template<'a>(
+        &self,
+        display: &DisplayValue<'a>,
+        f: &mut std::fmt::Formatter<'_>,
+        template: &str,
+        escape: char,
+        args: &[Value],
+    ) -> Result<usize, std::fmt::Error> {
+        let mut iter = template.chars();
+        let mut args_consumed = 0;
+        while let Some(c) = iter.next() {
+            match c {
+                '\\' => {
+                    let c = iter.next().unwrap_or('\\') as char;
+                    let c = if c == 'n' { '\n' } else { c };
+                    write!(f, "{c}")?;
+                }
+                c if c == escape => {
+                    let (index, rest) = Self::consume_int(iter.as_str());
+                    iter = rest.chars();
+                    let Some(arg) = args.get(index.wrapping_sub(1)) else {
+                        continue;
+                    };
+                    args_consumed = args_consumed.max(index);
+                    write!(f, "{}", arg.display(display.options))?;
+                }
+                c => write!(f, "{c}")?,
+            }
+        }
+        Ok(args_consumed)
+    }
+
+    fn consume_int(input: &str) -> (usize, &str) {
+        let mut n = 0;
+        for (index, c) in input.char_indices() {
+            match c.to_digit(10) {
+                Some(digit) => n = n * 10 + digit as usize,
+                None => return (n, &input[index..]),
+            }
+        }
+        (n, "")
+    }
+}
+
 #[derive(Clone, Debug, Default, Serialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ValueInner {
@@ -3215,6 +3191,19 @@ impl ValueInner {
     // Returns an object that will format this value.  Settings on `options`
     // control whether variable and value labels are included.
     pub fn display(&self, options: impl IntoValueOptions) -> DisplayValue<'_> {
+        fn interpret_show(
+            global_show: impl Fn() -> Show,
+            table_show: Option<Show>,
+            value_show: Option<Show>,
+            label: &str,
+        ) -> (bool, Option<&str>) {
+            match value_show.or(table_show).unwrap_or_else(global_show) {
+                Show::Value => (true, None),
+                Show::Label => (false, Some(label)),
+                Show::Both => (true, Some(label)),
+            }
+        }
+
         let options = options.into_value_options();
         let (show_value, show_label) = if let Some(value_label) = self.value_label() {
             interpret_show(
index 6b09440ac8a9f8e9852abb4b5ce41e22f248e10d..dfe40a1ce6f2d16debf5b55c5baab02f2d040129 100644 (file)
@@ -1046,7 +1046,7 @@ impl Records {
                                     .decode(variable.width)
                                     .as_encoded(variable.encoding())
                                     .display(variable.print_format)
-                                    .with_trimming()
+                                    .without_spaces()
                                     .with_quoted_string()
                                     .to_string()
                             })