experimental

This commit is contained in:
Zacharias-Brohn
2025-12-18 18:25:28 +01:00
parent 05e94ac655
commit d47ee0d1ef
9 changed files with 5025 additions and 1982 deletions
+127 -73
View File
@@ -9,12 +9,14 @@
//! 2. Pass decoded codepoints to the text handler, not raw bytes
//! 3. Control characters (LF, CR, TAB, BS, etc.) are handled inline in text drawing
//! 4. Only ESC triggers state machine transitions
//! 5. Use SIMD-accelerated byte search for finding escape sequence terminators
/// Maximum number of CSI parameters.
pub const MAX_CSI_PARAMS: usize = 256;
/// Maximum length of an OSC string.
const MAX_OSC_LEN: usize = 4096;
/// Maximum length of an OSC string (same as escape length - no separate limit needed).
/// Kitty doesn't have a separate OSC limit, just the overall escape sequence limit.
const MAX_OSC_LEN: usize = 262144; // 256KB, same as MAX_ESCAPE_LEN
/// Maximum length of an escape sequence before we give up.
const MAX_ESCAPE_LEN: usize = 262144; // 256KB like Kitty
@@ -103,10 +105,11 @@ impl Utf8Decoder {
let prev_state = self.state;
match decode_utf8(&mut self.state, &mut self.codep, byte) {
UTF8_ACCEPT => {
// Safe because we control the codepoint values from valid UTF-8
if let Some(c) = char::from_u32(self.codep) {
output.push(c);
}
// SAFETY: The DFA decoder guarantees valid Unicode codepoints when
// state is ACCEPT. This is the same guarantee that Kitty relies on.
// Using unchecked avoids a redundant validity check in the hot path.
let c = unsafe { char::from_u32_unchecked(self.codep) };
output.push(c);
}
UTF8_REJECT => {
// Invalid UTF-8 sequence
@@ -211,9 +214,13 @@ impl Default for CsiParams {
impl CsiParams {
/// Reset for a new CSI sequence.
/// Note: We don't zero the params/is_sub_param arrays since they're written before being read.
/// This avoids zeroing 1280 bytes on every CSI sequence.
#[inline]
pub fn reset(&mut self) {
self.params = [0; MAX_CSI_PARAMS];
self.is_sub_param = [false; MAX_CSI_PARAMS];
// Don't zero arrays - individual elements are written before being read
// self.params = [0; MAX_CSI_PARAMS]; // Skip - saves 1024 bytes memset
// self.is_sub_param = [false; MAX_CSI_PARAMS]; // Skip - saves 256 bytes memset
self.num_params = 0;
self.primary = 0;
self.secondary = 0;
@@ -672,96 +679,103 @@ impl Parser {
handler.csi(&self.csi);
}
/// Process OSC sequence bytes.
/// Process OSC sequence bytes using SIMD-accelerated terminator search.
/// Like Kitty's find_st_terminator + accumulate_st_terminated_esc_code.
fn consume_osc<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
let mut consumed = 0;
let remaining = &bytes[pos..];
while pos + consumed < bytes.len() {
let ch = bytes[pos + consumed];
consumed += 1;
self.escape_len += 1;
// Use SIMD-accelerated search to find BEL (0x07), ESC (0x1B), or C1 ST (0x9C)
// memchr2 finds either of two bytes; we check ESC specially for ESC \ sequence
// First, try to find BEL or C1 ST (the simple terminators)
if let Some(term_pos) = memchr::memchr3(0x07, 0x1B, 0x9C, remaining) {
let terminator = remaining[term_pos];
// Check for max length
if self.escape_len > MAX_ESCAPE_LEN || self.osc_buffer.len() > MAX_OSC_LEN {
// Check max length before accepting
if self.escape_len + term_pos > MAX_ESCAPE_LEN || self.osc_buffer.len() + term_pos > MAX_OSC_LEN {
log::debug!("OSC sequence too long, aborting");
self.state = State::Normal;
return consumed;
return remaining.len();
}
match ch {
// BEL terminates OSC
match terminator {
0x07 => {
// BEL terminator - copy data in bulk and dispatch
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
handler.osc(&self.osc_buffer);
self.state = State::Normal;
return consumed;
self.escape_len += term_pos + 1;
return term_pos + 1;
}
0x9C => {
// C1 ST terminator - copy data in bulk and dispatch
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
handler.osc(&self.osc_buffer);
self.state = State::Normal;
self.escape_len += term_pos + 1;
return term_pos + 1;
}
// ESC \ (ST) terminates OSC
0x1B => {
// Need to peek at next byte
if pos + consumed < bytes.len() && bytes[pos + consumed] == b'\\' {
consumed += 1;
// ESC found - check if followed by \ for ST
if term_pos + 1 < remaining.len() && remaining[term_pos + 1] == b'\\' {
// ESC \ (ST) terminator
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
handler.osc(&self.osc_buffer);
self.state = State::Normal;
return consumed;
} else {
// ESC not followed by \, dispatch what we have
self.escape_len += term_pos + 2;
return term_pos + 2;
} else if term_pos + 1 < remaining.len() {
// ESC not followed by \ - this is a new escape sequence
// Copy everything before ESC and transition to Escape state
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
handler.osc(&self.osc_buffer);
self.state = State::Escape;
return consumed;
self.escape_len += term_pos + 1;
return term_pos + 1;
} else {
// ESC at end of buffer, need more data
// Copy everything before ESC, keep ESC for next parse
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
self.escape_len += term_pos;
return term_pos;
}
}
// C1 ST (0x9C) terminates OSC
0x9C => {
handler.osc(&self.osc_buffer);
self.state = State::Normal;
return consumed;
}
_ => {
self.osc_buffer.push(ch);
}
_ => unreachable!(),
}
} else {
// No terminator found - check max length
if self.escape_len + remaining.len() > MAX_ESCAPE_LEN || self.osc_buffer.len() + remaining.len() > MAX_OSC_LEN {
log::debug!("OSC sequence too long, aborting");
self.state = State::Normal;
return remaining.len();
}
// Buffer all remaining bytes for next parse call
self.osc_buffer.extend_from_slice(remaining);
self.escape_len += remaining.len();
return remaining.len();
}
consumed
}
/// Process DCS/APC/PM/SOS sequence bytes (string commands terminated by ST).
/// Process DCS/APC/PM/SOS sequence bytes using SIMD-accelerated terminator search.
/// Like Kitty's find_st_terminator + accumulate_st_terminated_esc_code.
fn consume_string_command<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
let mut consumed = 0;
let remaining = &bytes[pos..];
while pos + consumed < bytes.len() {
let ch = bytes[pos + consumed];
consumed += 1;
self.escape_len += 1;
// Use SIMD-accelerated search to find ESC (0x1B) or C1 ST (0x9C)
if let Some(term_pos) = memchr::memchr2(0x1B, 0x9C, remaining) {
let terminator = remaining[term_pos];
// Check for max length
if self.escape_len > MAX_ESCAPE_LEN {
// Check max length before accepting
if self.escape_len + term_pos > MAX_ESCAPE_LEN {
log::debug!("String command too long, aborting");
self.state = State::Normal;
return consumed;
return remaining.len();
}
match ch {
// ESC \ (ST) terminates
0x1B => {
if pos + consumed < bytes.len() && bytes[pos + consumed] == b'\\' {
consumed += 1;
// Dispatch based on original state
match self.state {
State::Dcs => handler.dcs(&self.string_buffer),
State::Apc => handler.apc(&self.string_buffer),
State::Pm => handler.pm(&self.string_buffer),
State::Sos => handler.sos(&self.string_buffer),
_ => {}
}
self.state = State::Normal;
return consumed;
} else {
self.string_buffer.push(ch);
}
}
// C1 ST (0x9C) terminates
match terminator {
0x9C => {
// C1 ST terminator - copy data in bulk and dispatch
self.string_buffer.extend_from_slice(&remaining[..term_pos]);
match self.state {
State::Dcs => handler.dcs(&self.string_buffer),
State::Apc => handler.apc(&self.string_buffer),
@@ -770,15 +784,55 @@ impl Parser {
_ => {}
}
self.state = State::Normal;
return consumed;
self.escape_len += term_pos + 1;
return term_pos + 1;
}
_ => {
self.string_buffer.push(ch);
0x1B => {
// ESC found - check if followed by \ for ST
if term_pos + 1 < remaining.len() && remaining[term_pos + 1] == b'\\' {
// ESC \ (ST) terminator
self.string_buffer.extend_from_slice(&remaining[..term_pos]);
match self.state {
State::Dcs => handler.dcs(&self.string_buffer),
State::Apc => handler.apc(&self.string_buffer),
State::Pm => handler.pm(&self.string_buffer),
State::Sos => handler.sos(&self.string_buffer),
_ => {}
}
self.state = State::Normal;
self.escape_len += term_pos + 2;
return term_pos + 2;
} else if term_pos + 1 < remaining.len() {
// ESC not followed by \ - include ESC in data and continue
// (Unlike OSC, string commands include raw ESC that isn't ST)
self.string_buffer.extend_from_slice(&remaining[..=term_pos]);
self.escape_len += term_pos + 1;
// Continue searching from after this ESC
let consumed = term_pos + 1;
return consumed + self.consume_string_command(bytes, pos + consumed, handler);
} else {
// ESC at end of buffer, need more data
// Copy everything before ESC, keep ESC for next parse
self.string_buffer.extend_from_slice(&remaining[..term_pos]);
self.escape_len += term_pos;
return term_pos;
}
}
_ => unreachable!(),
}
} else {
// No terminator found - check max length
if self.escape_len + remaining.len() > MAX_ESCAPE_LEN {
log::debug!("String command too long, aborting");
self.state = State::Normal;
return remaining.len();
}
// Buffer all remaining bytes for next parse call
self.string_buffer.extend_from_slice(remaining);
self.escape_len += remaining.len();
return remaining.len();
}
consumed
}
}