--- /dev/null
+use chardetng::EncodingDetector;
+use encoding_rs::{Decoder, Encoding};
+use std::io::{BufRead, Read, Result};
+
+struct Autodecode<R>
+where
+ R: Read,
+{
+ inner: R,
+ buffer: Box<[u8]>,
+ state: State,
+}
+
+enum State {
+ /// Stream encoding is not yet known.
+ Auto {
+ detector: EncodingDetector,
+ back: usize,
+ front: usize,
+ ascii: usize,
+ },
+
+ /// Stream encoding is known.
+ Decode(Decoder),
+}
+
+fn read_fully<R>(reader: &mut R, mut buffer: &mut [u8]) -> Result<usize>
+where
+ R: Read,
+{
+ let mut len = 0;
+ while len < buffer.len() {
+ let n = reader.read(&mut buffer[len..])?;
+ if n == 0 {
+ break;
+ }
+ len += n;
+ }
+ Ok(len)
+}
+
+impl<R> Autodecode<R>
+where
+ R: Read,
+{
+ fn new(inner: R) -> Result<Self> {
+ Self::with_capacity(8192, inner)
+ }
+ fn with_capacity(capacity: usize, mut inner: R) -> Result<Self> {
+ let mut buffer = Vec::with_capacity(capacity);
+ buffer.resize(capacity, 0);
+ let len = read_fully(&mut inner, buffer.as_mut_slice())?;
+ let mut detector = EncodingDetector::new();
+ let state = if len < buffer.len() {
+ detector.feed(&buffer[..len], true);
+ State::Decode(detector.guess(None, true).new_decoder_with_bom_removal())
+ } else {
+ let ascii = feed(&mut detector, &buffer[..len], false);
+ State::Auto {
+ detector,
+ back: 0,
+ front: len,
+ ascii,
+ }
+ };
+ Ok(Self {
+ inner,
+ buffer: buffer.into_boxed_slice(),
+ state,
+ })
+ }
+}
+
+impl<R> Read for Autodecode<R>
+where
+ R: Read,
+{
+ fn read(&mut self, outbuf: &mut [u8]) -> Result<usize> {
+ let mut buffer = self.fill_buf()?;
+ let n = buffer.read(outbuf)?;
+ self.consume(n);
+ Ok(n)
+ }
+}
+
+impl<R> BufRead for Autodecode<R>
+where
+ R: Read,
+{
+ fn fill_buf(&mut self) -> Result<&[u8]> {
+ match &mut self.state {
+ State::Auto {
+ detector,
+ back,
+ front,
+ ascii,
+ } => {
+ if back < ascii {
+ // Consume data up to the first non-ASCII byte.
+ Ok(&self.buffer[*back..*ascii])
+ } else if ascii < front {
+ // We had a non-ASCII byte and we consumed everything up to
+ // it. We want to get a full buffer starting at the
+ // non-ASCII byte before we decide on the encoding.
+ debug_assert_eq!(ascii, back);
+
+ // Shift buffered data to the beginning of the buffer to
+ // make room to get a full buffer.
+ self.buffer.copy_within(*back..*front, 0);
+ *front -= *back;
+ *back = 0;
+ *ascii = 0;
+
+ // Fill up the remainder of the buffer.
+ let old_front = *front;
+ *front += read_fully(&mut self.inner, &mut self.buffer[*front..])?;
+ detector.feed(&self.buffer[old_front..*front], *front < self.buffer.len());
+ self.state = State::Decode(
+ detector.guess(None, true).new_decoder_with_bom_removal(),
+ );
+ self.fill_buf()
+ } else {
+ // We have not had a non-ASCII byte yet but we consumed the
+ // whole buffer. Read a new one.
+ *back = 0;
+ *front = 0;
+ *ascii = 0;
+ *front += read_fully(&mut self.inner, &mut self.buffer[*front..])?;
+ let eof = *front < self.buffer.len();
+ *ascii = feed(detector, &self.buffer[..*front], eof);
+ if eof || *ascii == 0 {
+ self.state = State::Decode(
+ detector.guess(None, true).new_decoder_with_bom_removal(),
+ );
+ self.fill_buf()
+ } else {
+ Ok(&self.buffer[..*ascii])
+ }
+ }
+ }
+ State::Decode(_) => todo!(),
+ }
+ }
+
+ fn consume(&mut self, n: usize) {
+ todo!()
+ }
+}
+
+fn feed(detector: &mut EncodingDetector, buffer: &[u8], last: bool) -> usize {
+ if detector.feed(buffer, last) {
+ Encoding::ascii_valid_up_to(buffer)
+ } else {
+ buffer.len()
+ }
+}
+/*
+ } else {
+ debug_assert_eq!(ascii, back);
+ debug_assert_eq!(back, front);
+ *back = 0;
+ *front = 0;
+ *ascii = 0;
+ *front += read_fully(&mut self.inner, &mut self.buffer[..])?;
+ *ascii = feed(detector, &self.buffer[..*front], *front < self.buffer.len());
+ Ok(&self.buffer[*back..*ascii])
+ }
+*/
--- /dev/null
+use std::io::Read;
+
+use encoding_rs::Encoding;
+
+use crate::prompt::PromptStyle;
+
+use super::segment::Mode;
+
+/// Error handling for a [`Reader`].
+pub enum ErrorHandling {
+ /// Discard input line and continue reading.
+ Terminal,
+
+ /// Continue to next command, except for cascading failures.
+ Continue,
+
+ /// Continue, even for cascading failures.
+ Ignore,
+
+ /// Stop processing,
+ Stop,
+}
+
+/// Reads a single syntax file as a stream of bytes encoded in UTF-8.
+pub struct Reader {
+ /// Segmentation mode.
+ mode: Mode,
+
+ /// Error-handling mode.
+ error_handling: ErrorHandling,
+
+ /// Encoding (although the reader must always produce UTF-8).
+ encoding: &'static Encoding,
+
+ /// `None` if this reader is not associated with a file.
+ file_name: Option<String>,
+
+ /// Zero if there's no line number.
+ line_number: u32,
+
+ /// True if we've reached EOF already.
+ eof: bool,
+
+ /// Reads UTF-8 bytes.
+ reader: dyn LexRead,
+}
+
+pub trait LexRead: Read {
+ /// Tells the reader what kind of prompt is appropriate for the next
+ /// read. Non-interactive readers can ignore this.
+ fn set_prompt_style(&mut self, _prompt: PromptStyle) {}
+}