1992 lines
67 KiB
Rust
1992 lines
67 KiB
Rust
//! VT Parser - A high-performance terminal escape sequence parser.
|
|
//!
|
|
//! Based on Kitty's vt-parser.c design, this parser uses explicit state tracking
|
|
//! to enable fast-path processing of normal text while correctly handling
|
|
//! escape sequences.
|
|
//!
|
|
//! Key design principles from Kitty:
|
|
//! 1. UTF-8 decode until ESC sentinel is found (not byte-by-byte parsing)
|
|
//! 2. Pass decoded codepoints to the text handler, not raw bytes
|
|
//! 3. Control characters (LF, CR, TAB, BS, etc.) are handled inline in text drawing
|
|
//! 4. Only ESC triggers state machine transitions
|
|
//! 5. Buffer is integrated into parser - I/O writes directly here
|
|
//! 6. Lock is released during parsing - I/O can continue while main parses
|
|
|
|
use crate::simd_utf8::SimdUtf8Decoder;
|
|
use std::sync::Mutex;
|
|
|
|
/// Buffer size - 1MB like Kitty
|
|
pub const BUF_SIZE: usize = 1024 * 1024;
|
|
|
|
/// Maximum number of CSI parameters.
|
|
pub const MAX_CSI_PARAMS: usize = 256;
|
|
|
|
/// Maximum length of an OSC string (same as escape length - no separate limit needed).
|
|
/// Kitty doesn't have a separate OSC limit, just the overall escape sequence limit.
|
|
const MAX_OSC_LEN: usize = 262144; // 256KB, same as MAX_ESCAPE_LEN
|
|
|
|
/// Maximum length of an escape sequence before we give up.
|
|
const MAX_ESCAPE_LEN: usize = 262144; // 256KB like Kitty
|
|
|
|
/// Parser state.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub enum State {
|
|
/// Normal text processing mode.
|
|
Normal,
|
|
/// Just saw ESC, waiting for next character.
|
|
Escape,
|
|
/// ESC seen, waiting for second char of two-char sequence (e.g., ESC ( B).
|
|
EscapeIntermediate(u8),
|
|
/// Processing CSI sequence (ESC [).
|
|
Csi,
|
|
/// Processing OSC sequence (ESC ]).
|
|
Osc,
|
|
/// Processing DCS sequence (ESC P).
|
|
Dcs,
|
|
/// Processing APC sequence (ESC _).
|
|
Apc,
|
|
/// Processing PM sequence (ESC ^).
|
|
Pm,
|
|
/// Processing SOS sequence (ESC X).
|
|
Sos,
|
|
}
|
|
|
|
impl Default for State {
|
|
fn default() -> Self {
|
|
State::Normal
|
|
}
|
|
}
|
|
|
|
/// CSI parsing sub-state.
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
|
|
enum CsiState {
|
|
#[default]
|
|
Start,
|
|
Body,
|
|
PostSecondary,
|
|
}
|
|
|
|
/// Digit multipliers for reverse-order accumulation (like Kitty).
|
|
/// Digits are accumulated with multipliers, then divided at commit time.
|
|
/// This avoids a multiply on every digit, using a table lookup instead.
|
|
static DIGIT_MULTIPLIERS: [i64; 16] = [
|
|
10_000_000_000_000_000,
|
|
1_000_000_000_000_000,
|
|
100_000_000_000_000,
|
|
10_000_000_000_000,
|
|
1_000_000_000_000,
|
|
100_000_000_000,
|
|
10_000_000_000,
|
|
1_000_000_000,
|
|
100_000_000,
|
|
10_000_000,
|
|
1_000_000,
|
|
100_000,
|
|
10_000,
|
|
1_000,
|
|
100,
|
|
10,
|
|
];
|
|
|
|
/// Parsed CSI sequence data.
|
|
#[derive(Debug, Clone)]
|
|
pub struct CsiParams {
|
|
/// Collected parameters.
|
|
pub params: [i32; MAX_CSI_PARAMS],
|
|
/// Which parameters are sub-parameters (colon-separated).
|
|
pub is_sub_param: [bool; MAX_CSI_PARAMS],
|
|
/// Number of collected parameters.
|
|
pub num_params: usize,
|
|
/// Primary modifier (e.g., '?' in CSI ? Ps h).
|
|
pub primary: u8,
|
|
/// Secondary modifier (e.g., '$' in CSI Ps $ p).
|
|
pub secondary: u8,
|
|
/// Final character (e.g., 'm' in CSI 1 m).
|
|
pub final_char: u8,
|
|
/// Whether the sequence is valid.
|
|
pub is_valid: bool,
|
|
// Internal parsing state
|
|
state: CsiState,
|
|
accumulator: i64,
|
|
multiplier: i32,
|
|
num_digits: usize,
|
|
}
|
|
|
|
impl Default for CsiParams {
|
|
fn default() -> Self {
|
|
Self {
|
|
params: [0; MAX_CSI_PARAMS],
|
|
is_sub_param: [false; MAX_CSI_PARAMS],
|
|
num_params: 0,
|
|
primary: 0,
|
|
secondary: 0,
|
|
final_char: 0,
|
|
is_valid: false,
|
|
state: CsiState::Start,
|
|
accumulator: 0,
|
|
multiplier: 1,
|
|
num_digits: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl CsiParams {
|
|
/// Reset for a new CSI sequence.
|
|
/// Note: We don't zero the params/is_sub_param arrays since they're written before being read.
|
|
/// This avoids zeroing 1280 bytes on every CSI sequence.
|
|
#[inline]
|
|
pub fn reset(&mut self) {
|
|
// Don't zero arrays - individual elements are written before being read
|
|
// self.params = [0; MAX_CSI_PARAMS]; // Skip - saves 1024 bytes memset
|
|
// self.is_sub_param = [false; MAX_CSI_PARAMS]; // Skip - saves 256 bytes memset
|
|
self.num_params = 0;
|
|
self.primary = 0;
|
|
self.secondary = 0;
|
|
self.final_char = 0;
|
|
self.is_valid = false;
|
|
self.state = CsiState::Start;
|
|
self.accumulator = 0;
|
|
self.multiplier = 1;
|
|
self.num_digits = 0;
|
|
}
|
|
|
|
/// Get parameter at index, or default value if not present.
|
|
#[inline]
|
|
pub fn get(&self, index: usize, default: i32) -> i32 {
|
|
if index < self.num_params && self.params[index] != 0 {
|
|
self.params[index]
|
|
} else {
|
|
default
|
|
}
|
|
}
|
|
|
|
/// Add a digit to the current parameter.
|
|
/// Uses Kitty's reverse-order accumulation with lookup table.
|
|
#[inline(always)]
|
|
fn add_digit(&mut self, digit: u8) {
|
|
// Like Kitty: accumulate with multipliers, divide at commit
|
|
if self.num_digits < DIGIT_MULTIPLIERS.len() {
|
|
self.accumulator +=
|
|
(digit - b'0') as i64 * DIGIT_MULTIPLIERS[self.num_digits];
|
|
self.num_digits += 1;
|
|
}
|
|
}
|
|
|
|
/// Commit the current parameter.
|
|
#[inline]
|
|
fn commit_param(&mut self) -> bool {
|
|
if self.num_params >= MAX_CSI_PARAMS {
|
|
return false;
|
|
}
|
|
// Convert reverse-order accumulator to final value
|
|
// Like Kitty: accumulator / digit_multipliers[num_digits - 1]
|
|
let value = if self.num_digits == 0 {
|
|
0
|
|
} else {
|
|
// Division converts from reverse-order accumulation
|
|
(self.accumulator / DIGIT_MULTIPLIERS[self.num_digits - 1]) as i32
|
|
* self.multiplier
|
|
};
|
|
self.params[self.num_params] = value;
|
|
self.num_params += 1;
|
|
self.accumulator = 0;
|
|
self.multiplier = 1;
|
|
self.num_digits = 0;
|
|
true
|
|
}
|
|
}
|
|
|
|
/// VT Parser with Kitty-style state tracking.
|
|
#[derive(Debug)]
|
|
pub struct Parser {
|
|
/// Current parser state.
|
|
pub state: State,
|
|
/// CSI parameters being collected.
|
|
pub csi: CsiParams,
|
|
/// UTF-8 decoder for text (SIMD-optimized).
|
|
utf8: SimdUtf8Decoder,
|
|
/// Decoded codepoint buffer (reused to avoid allocation).
|
|
codepoint_buf: Vec<u32>,
|
|
/// OSC string buffer.
|
|
osc_buffer: Vec<u8>,
|
|
/// DCS/APC/PM/SOS string buffer.
|
|
string_buffer: Vec<u8>,
|
|
/// Number of bytes consumed in current escape sequence (for max length check).
|
|
escape_len: usize,
|
|
}
|
|
|
|
impl Default for Parser {
|
|
fn default() -> Self {
|
|
Self {
|
|
state: State::Normal,
|
|
csi: CsiParams::default(),
|
|
utf8: SimdUtf8Decoder::new(),
|
|
// Pre-allocate to match typical read buffer sizes (1MB) to avoid reallocation
|
|
codepoint_buf: Vec::with_capacity(1024 * 1024),
|
|
osc_buffer: Vec::new(),
|
|
string_buffer: Vec::new(),
|
|
escape_len: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Shared buffer state for I/O thread communication.
|
|
/// This tracks read/write positions like Kitty's PS struct.
|
|
struct BufferState {
|
|
/// Read tracking (like Kitty's read struct):
|
|
/// - pos: current parse position (advances as we parse)
|
|
/// - consumed: bytes that can be discarded (complete sequences only)
|
|
/// - sz: total valid bytes in buffer
|
|
read_pos: usize,
|
|
read_consumed: usize,
|
|
read_sz: usize,
|
|
/// Write tracking (like Kitty's write struct):
|
|
/// - pending: bytes written by I/O but not yet visible to reader
|
|
/// - offset: where I/O thread is writing (for compaction fixup)
|
|
/// - sz: size of current write buffer (0 if none outstanding)
|
|
write_pending: usize,
|
|
write_offset: usize,
|
|
write_sz: usize,
|
|
}
|
|
|
|
/// Kitty-style shared parser with integrated 1MB buffer.
|
|
///
|
|
/// Like Kitty's PS struct, this owns the buffer AND all parser state.
|
|
/// I/O thread writes directly to this buffer, main thread parses in-place.
|
|
///
|
|
/// Critical: Lock is RELEASED during parsing so I/O can continue writing.
|
|
pub struct SharedParser {
|
|
/// The 1MB buffer - I/O writes to end, main reads from front
|
|
buf: std::cell::UnsafeCell<Box<[u8; BUF_SIZE]>>,
|
|
/// Buffer state protected by mutex
|
|
state: Mutex<BufferState>,
|
|
/// Eventfd for waking I/O thread when space available
|
|
wakeup_fd: i32,
|
|
|
|
// ========== Parser state (main thread only, not behind mutex) ==========
|
|
// These are copies of read_pos/read_sz/read_consumed for use while lock is released
|
|
/// Current parse position (main thread working copy)
|
|
parse_pos: std::cell::UnsafeCell<usize>,
|
|
/// Total valid bytes (main thread working copy)
|
|
parse_sz: std::cell::UnsafeCell<usize>,
|
|
/// Bytes that can be discarded (main thread working copy)
|
|
parse_consumed: std::cell::UnsafeCell<usize>,
|
|
/// Current parser state
|
|
vte_state: std::cell::UnsafeCell<State>,
|
|
/// CSI parameters being collected
|
|
csi: std::cell::UnsafeCell<CsiParams>,
|
|
/// UTF-8 decoder for text (SIMD-optimized)
|
|
utf8: std::cell::UnsafeCell<SimdUtf8Decoder>,
|
|
/// Decoded codepoint buffer (reused to avoid allocation)
|
|
codepoint_buf: std::cell::UnsafeCell<Vec<u32>>,
|
|
/// OSC string buffer
|
|
osc_buffer: std::cell::UnsafeCell<Vec<u8>>,
|
|
/// DCS/APC/PM/SOS string buffer
|
|
string_buffer: std::cell::UnsafeCell<Vec<u8>>,
|
|
/// Number of bytes consumed in current escape sequence (for max length check)
|
|
escape_len: std::cell::UnsafeCell<usize>,
|
|
}
|
|
|
|
// SAFETY: I/O thread only writes to buf[read_sz+write_pending..], main thread
|
|
// only reads buf[read_pos..read_sz]. Parser state is only used by main thread.
|
|
unsafe impl Sync for SharedParser {}
|
|
unsafe impl Send for SharedParser {}
|
|
|
|
impl SharedParser {
|
|
/// Create a new shared parser with integrated buffer.
|
|
pub fn new() -> Self {
|
|
let wakeup_fd =
|
|
unsafe { libc::eventfd(0, libc::EFD_NONBLOCK | libc::EFD_CLOEXEC) };
|
|
if wakeup_fd < 0 {
|
|
panic!(
|
|
"Failed to create eventfd: {}",
|
|
std::io::Error::last_os_error()
|
|
);
|
|
}
|
|
|
|
Self {
|
|
buf: std::cell::UnsafeCell::new(Box::new([0u8; BUF_SIZE])),
|
|
state: Mutex::new(BufferState {
|
|
read_pos: 0,
|
|
read_consumed: 0,
|
|
read_sz: 0,
|
|
write_pending: 0,
|
|
write_offset: 0,
|
|
write_sz: 0,
|
|
}),
|
|
wakeup_fd,
|
|
// Parser state - working copies for use while lock is released
|
|
parse_pos: std::cell::UnsafeCell::new(0),
|
|
parse_sz: std::cell::UnsafeCell::new(0),
|
|
parse_consumed: std::cell::UnsafeCell::new(0),
|
|
vte_state: std::cell::UnsafeCell::new(State::Normal),
|
|
csi: std::cell::UnsafeCell::new(CsiParams::default()),
|
|
utf8: std::cell::UnsafeCell::new(SimdUtf8Decoder::new()),
|
|
codepoint_buf: std::cell::UnsafeCell::new(Vec::with_capacity(
|
|
BUF_SIZE,
|
|
)),
|
|
osc_buffer: std::cell::UnsafeCell::new(Vec::new()),
|
|
string_buffer: std::cell::UnsafeCell::new(Vec::new()),
|
|
escape_len: std::cell::UnsafeCell::new(0),
|
|
}
|
|
}
|
|
|
|
/// Get the wakeup fd for I/O thread to poll on.
|
|
pub fn wakeup_fd(&self) -> i32 {
|
|
self.wakeup_fd
|
|
}
|
|
|
|
// ========== I/O Thread API ==========
|
|
|
|
/// Check if there's space for writing. Called by I/O thread.
|
|
pub fn has_space(&self) -> bool {
|
|
let state = self.state.lock().unwrap();
|
|
state.read_sz + state.write_pending < BUF_SIZE
|
|
}
|
|
|
|
/// Get write buffer for I/O thread. Returns (ptr, available_bytes).
|
|
/// Caller MUST call commit_write() after writing.
|
|
/// Like Kitty's vt_parser_create_write_buffer().
|
|
pub fn create_write_buffer(&self) -> (*mut u8, usize) {
|
|
let mut state = self.state.lock().unwrap();
|
|
|
|
if state.write_sz > 0 {
|
|
log::error!(
|
|
"create_write_buffer called with existing write buffer"
|
|
);
|
|
return (std::ptr::null_mut(), 0);
|
|
}
|
|
|
|
let write_offset = state.read_sz + state.write_pending;
|
|
let available = BUF_SIZE.saturating_sub(write_offset);
|
|
|
|
if available == 0 {
|
|
return (std::ptr::null_mut(), 0);
|
|
}
|
|
|
|
state.write_offset = write_offset;
|
|
state.write_sz = available;
|
|
|
|
let ptr = unsafe { (*self.buf.get()).as_mut_ptr().add(write_offset) };
|
|
(ptr, available)
|
|
}
|
|
|
|
/// Commit bytes written by I/O thread.
|
|
/// Like Kitty's vt_parser_commit_write() - handles compaction that happened
|
|
/// between create_write_buffer and commit_write by moving data if needed.
|
|
pub fn commit_write(&self, len: usize) {
|
|
let mut state = self.state.lock().unwrap();
|
|
let current_offset = state.read_sz + state.write_pending;
|
|
|
|
if state.write_offset > current_offset {
|
|
unsafe {
|
|
let buf = &mut *self.buf.get();
|
|
std::ptr::copy(
|
|
buf.as_ptr().add(state.write_offset),
|
|
buf.as_mut_ptr().add(current_offset),
|
|
len,
|
|
);
|
|
}
|
|
}
|
|
|
|
state.write_pending += len;
|
|
state.write_sz = 0;
|
|
}
|
|
|
|
/// Read from PTY fd into buffer. Returns bytes read, -1 for error.
|
|
pub fn read_from_fd(&self, fd: i32) -> isize {
|
|
let (ptr, available) = self.create_write_buffer();
|
|
if available == 0 {
|
|
return 0;
|
|
}
|
|
|
|
let result =
|
|
unsafe { libc::read(fd, ptr as *mut libc::c_void, available) };
|
|
|
|
if result > 0 {
|
|
self.commit_write(result as usize);
|
|
} else {
|
|
self.cancel_write();
|
|
}
|
|
result
|
|
}
|
|
|
|
/// Cancel a pending write buffer (on error/EOF).
|
|
fn cancel_write(&self) {
|
|
let mut state = self.state.lock().unwrap();
|
|
state.write_sz = 0;
|
|
}
|
|
|
|
/// Drain the wakeup eventfd.
|
|
pub fn drain_wakeup(&self) {
|
|
let mut buf = 0u64;
|
|
unsafe {
|
|
libc::read(
|
|
self.wakeup_fd,
|
|
&mut buf as *mut u64 as *mut libc::c_void,
|
|
8,
|
|
);
|
|
}
|
|
}
|
|
|
|
// ========== Main Thread API ==========
|
|
|
|
/// Run a parse pass. This is the Kitty-style run_worker():
|
|
/// 1. Lock, make pending visible
|
|
/// 2. UNLOCK during actual parsing (consume_input)
|
|
/// 3. Re-lock, add new pending, check for more data, repeat
|
|
/// 4. Final compaction and wake I/O if space created
|
|
///
|
|
/// Returns true if any data was parsed.
|
|
pub fn run_parse_pass<H: Handler>(&self, handler: &mut H) -> bool {
|
|
let mut parsed_any = false;
|
|
|
|
// Lock for initial bookkeeping
|
|
let mut state = self.state.lock().unwrap();
|
|
|
|
// Make pending writes visible (like Kitty: self->read.sz += self->write.pending)
|
|
state.read_sz += state.write_pending;
|
|
state.write_pending = 0;
|
|
|
|
// Check if there's data to parse (like Kitty: read.pos < read.sz)
|
|
let has_pending_input = state.read_pos < state.read_sz;
|
|
if !has_pending_input {
|
|
return false;
|
|
}
|
|
|
|
// Track if buffer was ever full during this parse pass (for wakeup decision)
|
|
// Like Kitty: pd->write_space_created = self->read.sz >= BUF_SZ (checked BEFORE compaction)
|
|
let mut buffer_was_ever_full = state.read_sz >= BUF_SIZE;
|
|
|
|
// Reset consumed counter for this parse pass (like Kitty: self->read.consumed = 0)
|
|
state.read_consumed = 0;
|
|
|
|
// Copy positions to UnsafeCell fields for use while lock is released
|
|
unsafe {
|
|
*self.parse_pos.get() = state.read_pos;
|
|
*self.parse_sz.get() = state.read_sz;
|
|
*self.parse_consumed.get() = state.read_pos;
|
|
}
|
|
|
|
// Like Kitty's do { ... } while (self->read.pos < self->read.sz)
|
|
loop {
|
|
let parse_pos = unsafe { *self.parse_pos.get() };
|
|
let parse_sz = unsafe { *self.parse_sz.get() };
|
|
|
|
if parse_pos >= parse_sz {
|
|
break;
|
|
}
|
|
|
|
// RELEASE LOCK during parsing - I/O can continue writing!
|
|
drop(state);
|
|
|
|
// Like Kitty line 1516: consume_input(self, ...)
|
|
let made_progress = self.consume_input(handler);
|
|
parsed_any = true;
|
|
|
|
// Re-acquire lock
|
|
state = self.state.lock().unwrap();
|
|
|
|
// CRITICAL: Like Kitty line 1518, add new pending data INSIDE the loop
|
|
// This allows us to process data that arrived while we were parsing
|
|
state.read_sz += state.write_pending;
|
|
state.write_pending = 0;
|
|
|
|
// Update buffer_was_ever_full if buffer is now full
|
|
if state.read_sz >= BUF_SIZE {
|
|
buffer_was_ever_full = true;
|
|
}
|
|
|
|
// Update state with new positions from parsing
|
|
state.read_pos = unsafe { *self.parse_pos.get() };
|
|
state.read_consumed = unsafe { *self.parse_consumed.get() };
|
|
|
|
// Update parse_sz to include new data for next iteration
|
|
unsafe {
|
|
*self.parse_sz.get() = state.read_sz;
|
|
}
|
|
|
|
// Like Kitty: while read.pos < read.sz
|
|
if state.read_pos >= state.read_sz || !made_progress {
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Compaction - remove consumed bytes (like Kitty)
|
|
if state.read_consumed > 0 {
|
|
// Like Kitty: pos -= consumed, sz -= consumed, memmove
|
|
state.read_pos = state.read_pos.saturating_sub(state.read_consumed);
|
|
state.read_sz = state.read_sz.saturating_sub(state.read_consumed);
|
|
|
|
// memmove remaining data to front
|
|
if state.read_sz > 0 {
|
|
unsafe {
|
|
let buf = &mut *self.buf.get();
|
|
std::ptr::copy(
|
|
buf.as_ptr().add(state.read_consumed),
|
|
buf.as_mut_ptr(),
|
|
state.read_sz,
|
|
);
|
|
}
|
|
}
|
|
|
|
state.read_consumed = 0;
|
|
|
|
// Wake I/O thread if buffer was ever full during this pass and we freed space
|
|
// Like Kitty: if (pd.write_space_created) wakeup_io_loop()
|
|
if buffer_was_ever_full && state.read_sz < BUF_SIZE {
|
|
drop(state);
|
|
let val = 1u64;
|
|
unsafe {
|
|
libc::write(
|
|
self.wakeup_fd,
|
|
&val as *const u64 as *const libc::c_void,
|
|
8,
|
|
);
|
|
}
|
|
return parsed_any;
|
|
}
|
|
} else if buffer_was_ever_full {
|
|
// Buffer was full but nothing consumed - stuck in partial sequence?
|
|
log::warn!("[PARSE] Buffer was full but read_consumed=0! read_pos={} read_sz={}",
|
|
state.read_pos, state.read_sz);
|
|
}
|
|
|
|
drop(state);
|
|
parsed_any
|
|
}
|
|
|
|
/// Check if there's pending data (for tick scheduling).
|
|
pub fn has_pending_data(&self) -> bool {
|
|
let state = self.state.lock().unwrap();
|
|
state.read_pos < state.read_sz || state.write_pending > 0
|
|
}
|
|
|
|
// ========== Internal parsing methods (main thread only) ==========
|
|
|
|
/// Main parsing dispatch - like Kitty's consume_input().
|
|
/// Processes ONE state case per call and returns, like Kitty lines 1458-1490.
|
|
/// The outer loop in run_parse_pass() calls this repeatedly until buffer exhausted.
|
|
///
|
|
/// Returns true if we made progress (consumed some bytes or changed state).
|
|
fn consume_input<H: Handler>(&self, handler: &mut H) -> bool {
|
|
#[cfg(feature = "render_timing")]
|
|
let start = std::time::Instant::now();
|
|
|
|
let parse_pos = unsafe { &mut *self.parse_pos.get() };
|
|
let parse_sz = unsafe { *self.parse_sz.get() };
|
|
let parse_consumed = unsafe { &mut *self.parse_consumed.get() };
|
|
let vte_state = unsafe { &mut *self.vte_state.get() };
|
|
let csi = unsafe { &mut *self.csi.get() };
|
|
let utf8 = unsafe { &mut *self.utf8.get() };
|
|
let codepoint_buf = unsafe { &mut *self.codepoint_buf.get() };
|
|
let osc_buffer = unsafe { &mut *self.osc_buffer.get() };
|
|
let string_buffer = unsafe { &mut *self.string_buffer.get() };
|
|
let escape_len = unsafe { &mut *self.escape_len.get() };
|
|
let buf = unsafe { &*self.buf.get() };
|
|
|
|
if *parse_pos >= parse_sz {
|
|
#[cfg(feature = "render_timing")]
|
|
handler.add_vt_parser_ns(start.elapsed().as_nanos() as u64);
|
|
return false;
|
|
}
|
|
|
|
let made_progress = match *vte_state {
|
|
State::Normal => {
|
|
// Like Kitty line 1460: consume_normal(self); self->read.consumed = self->read.pos; break;
|
|
Self::consume_normal_impl(
|
|
handler,
|
|
buf,
|
|
parse_pos,
|
|
parse_sz,
|
|
utf8,
|
|
codepoint_buf,
|
|
vte_state,
|
|
escape_len,
|
|
);
|
|
*parse_consumed = *parse_pos;
|
|
true
|
|
}
|
|
State::Escape => {
|
|
// Like Kitty lines 1461-1463:
|
|
// case VTE_ESC: if (consume_esc(self)) { self->read.consumed = self->read.pos; } break;
|
|
if Self::consume_escape_impl(
|
|
handler,
|
|
buf,
|
|
parse_pos,
|
|
parse_sz,
|
|
*parse_consumed,
|
|
vte_state,
|
|
csi,
|
|
osc_buffer,
|
|
string_buffer,
|
|
escape_len,
|
|
) {
|
|
*parse_consumed = *parse_pos;
|
|
}
|
|
true
|
|
}
|
|
State::EscapeIntermediate(_) => {
|
|
if Self::consume_escape_intermediate_impl(
|
|
handler, buf, parse_pos, parse_sz, vte_state,
|
|
) {
|
|
*parse_consumed = *parse_pos;
|
|
}
|
|
true
|
|
}
|
|
State::Csi => {
|
|
// Like Kitty lines 1465-1466:
|
|
// if (consume_csi(self)) { self->read.consumed = self->read.pos; if (self->csi.is_valid) dispatch_csi(self); SET_STATE(NORMAL); }
|
|
if Self::consume_csi_impl(
|
|
handler,
|
|
buf,
|
|
parse_pos,
|
|
parse_sz,
|
|
*parse_consumed,
|
|
csi,
|
|
escape_len,
|
|
) {
|
|
*parse_consumed = *parse_pos;
|
|
if csi.is_valid {
|
|
handler.csi(csi);
|
|
}
|
|
*vte_state = State::Normal;
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
State::Osc => {
|
|
if Self::consume_osc_impl(
|
|
handler, buf, parse_pos, parse_sz, vte_state, osc_buffer,
|
|
escape_len,
|
|
) {
|
|
*parse_consumed = *parse_pos;
|
|
*vte_state = State::Normal;
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
State::Dcs | State::Apc | State::Pm | State::Sos => {
|
|
if Self::consume_string_impl(
|
|
handler,
|
|
buf,
|
|
parse_pos,
|
|
parse_sz,
|
|
vte_state,
|
|
string_buffer,
|
|
escape_len,
|
|
) {
|
|
*parse_consumed = *parse_pos;
|
|
*vte_state = State::Normal;
|
|
true
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
};
|
|
|
|
#[cfg(feature = "render_timing")]
|
|
handler.add_vt_parser_ns(start.elapsed().as_nanos() as u64);
|
|
|
|
made_progress
|
|
}
|
|
|
|
/// Consume normal text - like Kitty's consume_normal().
|
|
/// UTF-8 decodes until ESC is found using SIMD-optimized decoder.
|
|
///
|
|
/// Like Kitty: processes text until ESC found, then sets state to Escape and returns.
|
|
/// The outer loop will call consume_input again to handle the Escape state.
|
|
#[inline]
|
|
fn consume_normal_impl<H: Handler>(
|
|
handler: &mut H,
|
|
buf: &[u8; BUF_SIZE],
|
|
parse_pos: &mut usize,
|
|
parse_sz: usize,
|
|
utf8: &mut SimdUtf8Decoder,
|
|
codepoint_buf: &mut Vec<u32>,
|
|
vte_state: &mut State,
|
|
escape_len: &mut usize,
|
|
) {
|
|
// Like Kitty's consume_normal() inner loop
|
|
loop {
|
|
if *parse_pos >= parse_sz {
|
|
break;
|
|
}
|
|
|
|
let remaining = &buf[*parse_pos..parse_sz];
|
|
let (consumed, found_esc) =
|
|
utf8.decode_to_esc(remaining, codepoint_buf);
|
|
|
|
if !codepoint_buf.is_empty() {
|
|
handler.text(codepoint_buf);
|
|
}
|
|
|
|
// Like Kitty: self->read.pos += self->utf8_decoder.num_consumed
|
|
*parse_pos += consumed;
|
|
|
|
if found_esc {
|
|
// Like Kitty: if (sentinel_found) { SET_STATE(ESC); break; }
|
|
*vte_state = State::Escape;
|
|
*escape_len = 0;
|
|
break;
|
|
}
|
|
}
|
|
// Like Kitty line 1460: consume_normal(self); self->read.consumed = self->read.pos; break;
|
|
// The caller (consume_input) will set parse_consumed = parse_pos
|
|
}
|
|
|
|
/// Consume escape sequence start - like Kitty's consume_esc().
|
|
/// Returns true if sequence is complete (consumed = pos).
|
|
#[inline]
|
|
fn consume_escape_impl<H: Handler>(
|
|
handler: &mut H,
|
|
buf: &[u8; BUF_SIZE],
|
|
parse_pos: &mut usize,
|
|
parse_sz: usize,
|
|
parse_consumed: usize,
|
|
vte_state: &mut State,
|
|
csi: &mut CsiParams,
|
|
osc_buffer: &mut Vec<u8>,
|
|
string_buffer: &mut Vec<u8>,
|
|
escape_len: &mut usize,
|
|
) -> bool {
|
|
if *parse_pos >= parse_sz {
|
|
return false;
|
|
}
|
|
|
|
let ch = buf[*parse_pos];
|
|
*parse_pos += 1;
|
|
*escape_len += 1;
|
|
|
|
// Like Kitty: is_first_char = read.pos - read.consumed == 1
|
|
let is_first_char = *parse_pos - parse_consumed == 1;
|
|
|
|
if is_first_char {
|
|
match ch {
|
|
// Multi-byte sequences: return false so parse_consumed is NOT updated.
|
|
// This prevents ESC[ from being discarded on buffer compaction before
|
|
// the full sequence completes.
|
|
b'[' => {
|
|
*vte_state = State::Csi;
|
|
csi.reset();
|
|
}
|
|
b']' => {
|
|
*vte_state = State::Osc;
|
|
osc_buffer.clear();
|
|
}
|
|
b'P' => {
|
|
*vte_state = State::Dcs;
|
|
string_buffer.clear();
|
|
}
|
|
b'_' => {
|
|
*vte_state = State::Apc;
|
|
string_buffer.clear();
|
|
}
|
|
b'^' => {
|
|
*vte_state = State::Pm;
|
|
string_buffer.clear();
|
|
}
|
|
b'X' => {
|
|
*vte_state = State::Sos;
|
|
string_buffer.clear();
|
|
}
|
|
// Two-byte escape sequences - return false like Kitty's IS_ESCAPED_CHAR
|
|
b'(' | b')' | b'*' | b'+' | b'-' | b'.' | b'/' | b'%'
|
|
| b'#' | b' ' => {
|
|
*vte_state = State::EscapeIntermediate(ch);
|
|
return false;
|
|
}
|
|
b'7' => {
|
|
handler.save_cursor();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'8' => {
|
|
handler.restore_cursor();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'c' => {
|
|
handler.reset();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'D' => {
|
|
handler.index();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'E' => {
|
|
handler.newline();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'H' => {
|
|
handler.set_tab_stop();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'M' => {
|
|
handler.reverse_index();
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'=' => {
|
|
handler.set_keypad_mode(true);
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'>' => {
|
|
handler.set_keypad_mode(false);
|
|
*vte_state = State::Normal;
|
|
}
|
|
b'\\' => {
|
|
*vte_state = State::Normal;
|
|
}
|
|
_ => {
|
|
log::debug!("Unknown escape sequence: ESC {:02x}", ch);
|
|
*vte_state = State::Normal;
|
|
}
|
|
}
|
|
return true;
|
|
} else {
|
|
// Second char of two-char sequence - like Kitty's else branch
|
|
let prev_ch = buf[*parse_pos - 2];
|
|
*vte_state = State::Normal;
|
|
|
|
match prev_ch {
|
|
b'(' | b')' => {
|
|
let set = if prev_ch == b'(' { 0 } else { 1 };
|
|
handler.designate_charset(set, ch);
|
|
}
|
|
b'#' => {
|
|
if ch == b'8' {
|
|
handler.screen_alignment();
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/// Consume second byte of two-char escape sequence.
|
|
fn consume_escape_intermediate_impl<H: Handler>(
|
|
handler: &mut H,
|
|
buf: &[u8; BUF_SIZE],
|
|
parse_pos: &mut usize,
|
|
parse_sz: usize,
|
|
vte_state: &mut State,
|
|
) -> bool {
|
|
if *parse_pos >= parse_sz {
|
|
return false;
|
|
}
|
|
|
|
let ch = buf[*parse_pos];
|
|
*parse_pos += 1;
|
|
|
|
let intermediate = match *vte_state {
|
|
State::EscapeIntermediate(i) => i,
|
|
_ => {
|
|
*vte_state = State::Normal;
|
|
return true;
|
|
}
|
|
};
|
|
|
|
*vte_state = State::Normal;
|
|
|
|
match intermediate {
|
|
b'(' | b')' => {
|
|
let set = if intermediate == b'(' { 0 } else { 1 };
|
|
handler.designate_charset(set, ch);
|
|
}
|
|
b'#' => {
|
|
if ch == b'8' {
|
|
handler.screen_alignment();
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
true
|
|
}
|
|
|
|
/// Consume CSI sequence - like Kitty's csi_parse_loop().
|
|
/// Returns true when sequence is complete.
|
|
#[inline]
|
|
fn consume_csi_impl<H: Handler>(
|
|
handler: &mut H,
|
|
buf: &[u8; BUF_SIZE],
|
|
parse_pos: &mut usize,
|
|
parse_sz: usize,
|
|
parse_consumed: usize,
|
|
csi: &mut CsiParams,
|
|
escape_len: &mut usize,
|
|
) -> bool {
|
|
while *parse_pos < parse_sz {
|
|
let ch = buf[*parse_pos];
|
|
*parse_pos += 1;
|
|
*escape_len += 1;
|
|
|
|
// Handle embedded control characters
|
|
if ch <= 0x1F && ch != 0x1B {
|
|
handler.control(ch);
|
|
continue;
|
|
}
|
|
|
|
match csi.state {
|
|
CsiState::Start => match ch {
|
|
b';' => {
|
|
csi.params[csi.num_params] = 0;
|
|
csi.num_params += 1;
|
|
csi.state = CsiState::Body;
|
|
}
|
|
b'0'..=b'9' => {
|
|
csi.add_digit(ch);
|
|
csi.state = CsiState::Body;
|
|
}
|
|
b'?' | b'>' | b'<' | b'=' => {
|
|
csi.primary = ch;
|
|
csi.state = CsiState::Body;
|
|
}
|
|
b'-' => {
|
|
csi.multiplier = -1;
|
|
csi.num_digits = 1;
|
|
csi.state = CsiState::Body;
|
|
}
|
|
b' ' | b'\'' | b'"' | b'!' | b'$' | b'#' | b'*' => {
|
|
csi.secondary = ch;
|
|
csi.state = CsiState::PostSecondary;
|
|
}
|
|
b'@'..=b'~' => {
|
|
csi.final_char = ch;
|
|
csi.is_valid = true;
|
|
return true;
|
|
}
|
|
_ => {
|
|
log::debug!("Invalid CSI character: {:02x}", ch);
|
|
return true;
|
|
}
|
|
},
|
|
CsiState::Body => match ch {
|
|
b'0'..=b'9' => {
|
|
csi.add_digit(ch);
|
|
}
|
|
b';' => {
|
|
if csi.num_digits == 0 {
|
|
csi.num_digits = 1;
|
|
}
|
|
if !csi.commit_param() {
|
|
return true;
|
|
}
|
|
csi.is_sub_param[csi.num_params] = false;
|
|
}
|
|
b':' => {
|
|
if !csi.commit_param() {
|
|
return true;
|
|
}
|
|
csi.is_sub_param[csi.num_params] = true;
|
|
}
|
|
b'-' if csi.num_digits == 0 => {
|
|
csi.multiplier = -1;
|
|
csi.num_digits = 1;
|
|
}
|
|
b' ' | b'\'' | b'"' | b'!' | b'$' | b'#' | b'*' => {
|
|
if !csi.commit_param() {
|
|
return true;
|
|
}
|
|
csi.secondary = ch;
|
|
csi.state = CsiState::PostSecondary;
|
|
}
|
|
b'@'..=b'~' => {
|
|
if csi.num_digits > 0 || csi.num_params > 0 {
|
|
csi.commit_param();
|
|
}
|
|
csi.final_char = ch;
|
|
csi.is_valid = true;
|
|
return true;
|
|
}
|
|
_ => {
|
|
log::debug!("Invalid CSI body character: {:02x}", ch);
|
|
return true;
|
|
}
|
|
},
|
|
CsiState::PostSecondary => match ch {
|
|
b'@'..=b'~' => {
|
|
csi.final_char = ch;
|
|
csi.is_valid = true;
|
|
return true;
|
|
}
|
|
_ => {
|
|
log::debug!(
|
|
"Invalid CSI post-secondary character: {:02x}",
|
|
ch
|
|
);
|
|
return true;
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
// Check max length
|
|
if *parse_pos - parse_consumed > MAX_ESCAPE_LEN {
|
|
log::debug!("CSI escape too long, ignoring");
|
|
return true;
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
/// Consume OSC sequence.
|
|
fn consume_osc_impl<H: Handler>(
|
|
handler: &mut H,
|
|
buf: &[u8; BUF_SIZE],
|
|
parse_pos: &mut usize,
|
|
parse_sz: usize,
|
|
vte_state: &mut State,
|
|
osc_buffer: &mut Vec<u8>,
|
|
escape_len: &mut usize,
|
|
) -> bool {
|
|
while *parse_pos < parse_sz {
|
|
let ch = buf[*parse_pos];
|
|
|
|
match ch {
|
|
0x07 => {
|
|
// BEL terminator
|
|
*parse_pos += 1;
|
|
handler.osc(osc_buffer);
|
|
return true;
|
|
}
|
|
0x9C => {
|
|
// C1 ST terminator
|
|
*parse_pos += 1;
|
|
handler.osc(osc_buffer);
|
|
return true;
|
|
}
|
|
0x1B => {
|
|
// Check for ESC \
|
|
if *parse_pos + 1 < parse_sz && buf[*parse_pos + 1] == b'\\'
|
|
{
|
|
*parse_pos += 2;
|
|
handler.osc(osc_buffer);
|
|
return true;
|
|
} else if *parse_pos + 1 < parse_sz {
|
|
// ESC followed by something else - abort OSC, start new escape
|
|
*parse_pos += 1;
|
|
handler.osc(osc_buffer);
|
|
*vte_state = State::Escape;
|
|
*escape_len = 0;
|
|
return false;
|
|
} else {
|
|
// ESC at end of buffer - need more data
|
|
return false;
|
|
}
|
|
}
|
|
_ => {
|
|
osc_buffer.push(ch);
|
|
*parse_pos += 1;
|
|
*escape_len += 1;
|
|
}
|
|
}
|
|
|
|
if *escape_len > MAX_ESCAPE_LEN {
|
|
log::debug!("OSC sequence too long, aborting");
|
|
return true;
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
/// Consume DCS/APC/PM/SOS string sequence.
|
|
fn consume_string_impl<H: Handler>(
|
|
handler: &mut H,
|
|
buf: &[u8; BUF_SIZE],
|
|
parse_pos: &mut usize,
|
|
parse_sz: usize,
|
|
vte_state: &mut State,
|
|
string_buffer: &mut Vec<u8>,
|
|
escape_len: &mut usize,
|
|
) -> bool {
|
|
while *parse_pos < parse_sz {
|
|
let ch = buf[*parse_pos];
|
|
|
|
match ch {
|
|
0x9C => {
|
|
// C1 ST terminator
|
|
*parse_pos += 1;
|
|
Self::dispatch_string_command(
|
|
handler,
|
|
vte_state,
|
|
string_buffer,
|
|
);
|
|
return true;
|
|
}
|
|
0x1B => {
|
|
// Check for ESC \
|
|
if *parse_pos + 1 < parse_sz && buf[*parse_pos + 1] == b'\\'
|
|
{
|
|
*parse_pos += 2;
|
|
Self::dispatch_string_command(
|
|
handler,
|
|
vte_state,
|
|
string_buffer,
|
|
);
|
|
return true;
|
|
} else if *parse_pos + 1 < parse_sz {
|
|
// ESC not followed by \ - include in buffer
|
|
string_buffer.push(ch);
|
|
*parse_pos += 1;
|
|
*escape_len += 1;
|
|
} else {
|
|
// ESC at end of buffer - need more data
|
|
return false;
|
|
}
|
|
}
|
|
_ => {
|
|
string_buffer.push(ch);
|
|
*parse_pos += 1;
|
|
*escape_len += 1;
|
|
}
|
|
}
|
|
|
|
if *escape_len > MAX_ESCAPE_LEN {
|
|
log::debug!("String command too long, aborting");
|
|
return true;
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
/// Dispatch string command to handler.
|
|
fn dispatch_string_command<H: Handler>(
|
|
handler: &mut H,
|
|
vte_state: &State,
|
|
string_buffer: &[u8],
|
|
) {
|
|
match vte_state {
|
|
State::Dcs => handler.dcs(string_buffer),
|
|
State::Apc => handler.apc(string_buffer),
|
|
State::Pm => handler.pm(string_buffer),
|
|
State::Sos => handler.sos(string_buffer),
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Drop for SharedParser {
|
|
fn drop(&mut self) {
|
|
unsafe {
|
|
libc::close(self.wakeup_fd);
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Parser {
|
|
/// Create a new parser.
|
|
pub fn new() -> Self {
|
|
Self::default()
|
|
}
|
|
|
|
/// Check if parser is in normal (ground) state.
|
|
#[inline]
|
|
pub fn is_normal(&self) -> bool {
|
|
self.state == State::Normal
|
|
}
|
|
|
|
/// Reset parser to normal state.
|
|
pub fn reset(&mut self) {
|
|
self.state = State::Normal;
|
|
self.csi.reset();
|
|
self.utf8.reset();
|
|
self.codepoint_buf.clear();
|
|
self.osc_buffer.clear();
|
|
self.string_buffer.clear();
|
|
self.escape_len = 0;
|
|
}
|
|
|
|
/// Process a buffer of bytes, calling the handler for each action.
|
|
/// Returns the number of bytes consumed.
|
|
pub fn parse<H: Handler>(
|
|
&mut self,
|
|
bytes: &[u8],
|
|
handler: &mut H,
|
|
) -> usize {
|
|
let mut pos = 0;
|
|
|
|
while pos < bytes.len() {
|
|
match self.state {
|
|
State::Normal => {
|
|
// Fast path: UTF-8 decode until ESC using SIMD
|
|
let (consumed, found_esc) = self
|
|
.utf8
|
|
.decode_to_esc(&bytes[pos..], &mut self.codepoint_buf);
|
|
|
|
// Process decoded codepoints (text + control chars)
|
|
if !self.codepoint_buf.is_empty() {
|
|
handler.text(&self.codepoint_buf);
|
|
}
|
|
|
|
pos += consumed;
|
|
|
|
if found_esc {
|
|
self.state = State::Escape;
|
|
self.escape_len = 0;
|
|
}
|
|
}
|
|
State::Escape => {
|
|
pos += self.consume_escape(bytes, pos, handler);
|
|
}
|
|
State::EscapeIntermediate(_) => {
|
|
pos +=
|
|
self.consume_escape_intermediate(bytes, pos, handler);
|
|
}
|
|
State::Csi => {
|
|
pos += self.consume_csi(bytes, pos, handler);
|
|
}
|
|
State::Osc => {
|
|
pos += self.consume_osc(bytes, pos, handler);
|
|
}
|
|
State::Dcs | State::Apc | State::Pm | State::Sos => {
|
|
pos += self.consume_string_command(bytes, pos, handler);
|
|
}
|
|
}
|
|
}
|
|
|
|
pos
|
|
}
|
|
|
|
/// Process bytes after ESC.
|
|
fn consume_escape<H: Handler>(
|
|
&mut self,
|
|
bytes: &[u8],
|
|
pos: usize,
|
|
handler: &mut H,
|
|
) -> usize {
|
|
if pos >= bytes.len() {
|
|
return 0;
|
|
}
|
|
|
|
let ch = bytes[pos];
|
|
self.escape_len += 1;
|
|
|
|
match ch {
|
|
// CSI: ESC [
|
|
b'[' => {
|
|
self.state = State::Csi;
|
|
self.csi.reset();
|
|
1
|
|
}
|
|
// OSC: ESC ]
|
|
b']' => {
|
|
self.state = State::Osc;
|
|
self.osc_buffer.clear();
|
|
1
|
|
}
|
|
// DCS: ESC P
|
|
b'P' => {
|
|
self.state = State::Dcs;
|
|
self.string_buffer.clear();
|
|
1
|
|
}
|
|
// APC: ESC _
|
|
b'_' => {
|
|
self.state = State::Apc;
|
|
self.string_buffer.clear();
|
|
1
|
|
}
|
|
// PM: ESC ^
|
|
b'^' => {
|
|
self.state = State::Pm;
|
|
self.string_buffer.clear();
|
|
1
|
|
}
|
|
// SOS: ESC X
|
|
b'X' => {
|
|
self.state = State::Sos;
|
|
self.string_buffer.clear();
|
|
1
|
|
}
|
|
// Two-char sequences: ESC ( ESC ) ESC # ESC % ESC SP etc.
|
|
b'(' | b')' | b'*' | b'+' | b'-' | b'.' | b'/' | b'%' | b'#'
|
|
| b' ' => {
|
|
self.state = State::EscapeIntermediate(ch);
|
|
1
|
|
}
|
|
// Single-char escape sequences
|
|
b'7' => {
|
|
// DECSC - Save cursor
|
|
handler.save_cursor();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'8' => {
|
|
// DECRC - Restore cursor
|
|
handler.restore_cursor();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'c' => {
|
|
// RIS - Full reset
|
|
handler.reset();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'D' => {
|
|
// IND - Index (move down, scroll if needed)
|
|
handler.index();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'E' => {
|
|
// NEL - Next line
|
|
handler.newline();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'H' => {
|
|
// HTS - Horizontal tab set
|
|
handler.set_tab_stop();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'M' => {
|
|
// RI - Reverse index
|
|
handler.reverse_index();
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'=' => {
|
|
// DECKPAM - Application keypad mode
|
|
handler.set_keypad_mode(true);
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'>' => {
|
|
// DECKPNM - Normal keypad mode
|
|
handler.set_keypad_mode(false);
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
b'\\' => {
|
|
// ST - String terminator (ignore if not in string mode)
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
_ => {
|
|
// Unknown escape sequence, ignore and return to normal
|
|
log::debug!("Unknown escape sequence: ESC {:02x}", ch);
|
|
self.state = State::Normal;
|
|
1
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Process second byte of two-char escape sequence.
|
|
fn consume_escape_intermediate<H: Handler>(
|
|
&mut self,
|
|
bytes: &[u8],
|
|
pos: usize,
|
|
handler: &mut H,
|
|
) -> usize {
|
|
if pos >= bytes.len() {
|
|
return 0;
|
|
}
|
|
|
|
let ch = bytes[pos];
|
|
// Extract intermediate from state enum (eliminates redundant self.intermediate field)
|
|
let intermediate = match self.state {
|
|
State::EscapeIntermediate(i) => i,
|
|
_ => return 0, // Should never happen
|
|
};
|
|
self.escape_len += 1;
|
|
self.state = State::Normal;
|
|
|
|
match intermediate {
|
|
b'(' | b')' => {
|
|
// Designate character set G0/G1
|
|
let set = if intermediate == b'(' { 0 } else { 1 };
|
|
handler.designate_charset(set, ch);
|
|
}
|
|
b'#' => {
|
|
if ch == b'8' {
|
|
// DECALN - Screen alignment test
|
|
handler.screen_alignment();
|
|
}
|
|
}
|
|
b'%' => {
|
|
// Character set selection (we always use UTF-8)
|
|
}
|
|
b' ' => {
|
|
// S7C1T / S8C1T - we ignore these
|
|
}
|
|
_ => {}
|
|
}
|
|
|
|
1
|
|
}
|
|
|
|
/// Process CSI sequence bytes.
|
|
fn consume_csi<H: Handler>(
|
|
&mut self,
|
|
bytes: &[u8],
|
|
pos: usize,
|
|
handler: &mut H,
|
|
) -> usize {
|
|
let mut consumed = 0;
|
|
|
|
while pos + consumed < bytes.len() {
|
|
let ch = bytes[pos + consumed];
|
|
consumed += 1;
|
|
self.escape_len += 1;
|
|
|
|
// Check for max length
|
|
if self.escape_len > MAX_ESCAPE_LEN {
|
|
log::debug!("CSI sequence too long, aborting");
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
|
|
// Handle control characters embedded in CSI (common to all states)
|
|
if ch <= 0x1F && ch != 0x1B {
|
|
handler.control(ch);
|
|
continue;
|
|
}
|
|
|
|
match self.csi.state {
|
|
CsiState::Start => {
|
|
match ch {
|
|
b';' => {
|
|
// Empty parameter = 0
|
|
self.csi.params[self.csi.num_params] = 0;
|
|
self.csi.num_params += 1;
|
|
self.csi.state = CsiState::Body;
|
|
}
|
|
b'0'..=b'9' => {
|
|
self.csi.add_digit(ch);
|
|
self.csi.state = CsiState::Body;
|
|
}
|
|
b'?' | b'>' | b'<' | b'=' => {
|
|
self.csi.primary = ch;
|
|
self.csi.state = CsiState::Body;
|
|
}
|
|
b' ' | b'\'' | b'"' | b'!' | b'$' | b'#' | b'*' => {
|
|
self.csi.secondary = ch;
|
|
self.csi.state = CsiState::PostSecondary;
|
|
}
|
|
b'-' => {
|
|
self.csi.multiplier = -1;
|
|
self.csi.num_digits = 1;
|
|
self.csi.state = CsiState::Body;
|
|
}
|
|
// Final byte
|
|
b'@'..=b'~' => {
|
|
self.csi.final_char = ch;
|
|
self.csi.is_valid = true;
|
|
self.dispatch_csi(handler);
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
_ => {
|
|
log::debug!("Invalid CSI character: {:02x}", ch);
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
}
|
|
}
|
|
CsiState::Body => {
|
|
match ch {
|
|
b'0'..=b'9' => {
|
|
self.csi.add_digit(ch);
|
|
}
|
|
b';' => {
|
|
if self.csi.num_digits == 0 {
|
|
self.csi.num_digits = 1; // Empty = 0
|
|
}
|
|
if !self.csi.commit_param() {
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
self.csi.is_sub_param[self.csi.num_params] = false;
|
|
}
|
|
b':' => {
|
|
if !self.csi.commit_param() {
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
self.csi.is_sub_param[self.csi.num_params] = true;
|
|
}
|
|
b' ' | b'\'' | b'"' | b'!' | b'$' | b'#' | b'*' => {
|
|
if !self.csi.commit_param() {
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
self.csi.secondary = ch;
|
|
self.csi.state = CsiState::PostSecondary;
|
|
}
|
|
b'-' if self.csi.num_digits == 0 => {
|
|
self.csi.multiplier = -1;
|
|
self.csi.num_digits = 1;
|
|
}
|
|
// Final byte
|
|
b'@'..=b'~' => {
|
|
if self.csi.num_digits > 0
|
|
|| self.csi.num_params > 0
|
|
{
|
|
self.csi.commit_param();
|
|
}
|
|
self.csi.final_char = ch;
|
|
self.csi.is_valid = true;
|
|
self.dispatch_csi(handler);
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
_ => {
|
|
log::debug!(
|
|
"Invalid CSI body character: {:02x}",
|
|
ch
|
|
);
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
}
|
|
}
|
|
CsiState::PostSecondary => {
|
|
match ch {
|
|
// Final byte
|
|
b'@'..=b'~' => {
|
|
self.csi.final_char = ch;
|
|
self.csi.is_valid = true;
|
|
self.dispatch_csi(handler);
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
_ => {
|
|
log::debug!(
|
|
"Invalid CSI post-secondary character: {:02x}",
|
|
ch
|
|
);
|
|
self.state = State::Normal;
|
|
return consumed;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
consumed
|
|
}
|
|
|
|
/// Dispatch a complete CSI sequence to the handler.
|
|
fn dispatch_csi<H: Handler>(&mut self, handler: &mut H) {
|
|
handler.csi(&self.csi);
|
|
}
|
|
|
|
/// Process OSC sequence bytes using SIMD-accelerated terminator search.
|
|
/// Like Kitty's find_st_terminator + accumulate_st_terminated_esc_code.
|
|
fn consume_osc<H: Handler>(
|
|
&mut self,
|
|
bytes: &[u8],
|
|
pos: usize,
|
|
handler: &mut H,
|
|
) -> usize {
|
|
let remaining = &bytes[pos..];
|
|
|
|
// Use SIMD-accelerated search to find BEL (0x07), ESC (0x1B), or C1 ST (0x9C)
|
|
// memchr2 finds either of two bytes; we check ESC specially for ESC \ sequence
|
|
// First, try to find BEL or C1 ST (the simple terminators)
|
|
if let Some(term_pos) = memchr::memchr3(0x07, 0x1B, 0x9C, remaining) {
|
|
let terminator = remaining[term_pos];
|
|
|
|
// Check max length before accepting
|
|
if self.escape_len + term_pos > MAX_ESCAPE_LEN
|
|
|| self.osc_buffer.len() + term_pos > MAX_OSC_LEN
|
|
{
|
|
log::debug!("OSC sequence too long, aborting");
|
|
self.state = State::Normal;
|
|
return remaining.len();
|
|
}
|
|
|
|
match terminator {
|
|
0x07 => {
|
|
// BEL terminator - copy data in bulk and dispatch
|
|
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
|
|
handler.osc(&self.osc_buffer);
|
|
self.state = State::Normal;
|
|
self.escape_len += term_pos + 1;
|
|
return term_pos + 1;
|
|
}
|
|
0x9C => {
|
|
// C1 ST terminator - copy data in bulk and dispatch
|
|
self.osc_buffer.extend_from_slice(&remaining[..term_pos]);
|
|
handler.osc(&self.osc_buffer);
|
|
self.state = State::Normal;
|
|
self.escape_len += term_pos + 1;
|
|
return term_pos + 1;
|
|
}
|
|
0x1B => {
|
|
// ESC found - check if followed by \ for ST
|
|
if term_pos + 1 < remaining.len()
|
|
&& remaining[term_pos + 1] == b'\\'
|
|
{
|
|
// ESC \ (ST) terminator
|
|
self.osc_buffer
|
|
.extend_from_slice(&remaining[..term_pos]);
|
|
handler.osc(&self.osc_buffer);
|
|
self.state = State::Normal;
|
|
self.escape_len += term_pos + 2;
|
|
return term_pos + 2;
|
|
} else if term_pos + 1 < remaining.len() {
|
|
// ESC not followed by \ - this is a new escape sequence
|
|
// Copy everything before ESC and transition to Escape state
|
|
self.osc_buffer
|
|
.extend_from_slice(&remaining[..term_pos]);
|
|
handler.osc(&self.osc_buffer);
|
|
self.state = State::Escape;
|
|
self.escape_len += term_pos + 1;
|
|
return term_pos + 1;
|
|
} else {
|
|
// ESC at end of buffer, need more data
|
|
// Copy everything before ESC, keep ESC for next parse
|
|
self.osc_buffer
|
|
.extend_from_slice(&remaining[..term_pos]);
|
|
self.escape_len += term_pos;
|
|
return term_pos;
|
|
}
|
|
}
|
|
_ => unreachable!(),
|
|
}
|
|
} else {
|
|
// No terminator found - check max length
|
|
if self.escape_len + remaining.len() > MAX_ESCAPE_LEN
|
|
|| self.osc_buffer.len() + remaining.len() > MAX_OSC_LEN
|
|
{
|
|
log::debug!("OSC sequence too long, aborting");
|
|
self.state = State::Normal;
|
|
return remaining.len();
|
|
}
|
|
|
|
// Buffer all remaining bytes for next parse call
|
|
self.osc_buffer.extend_from_slice(remaining);
|
|
self.escape_len += remaining.len();
|
|
return remaining.len();
|
|
}
|
|
}
|
|
|
|
/// Dispatch the string command to the appropriate handler method.
|
|
#[inline]
|
|
fn dispatch_string_command<H: Handler>(&self, handler: &mut H) {
|
|
match self.state {
|
|
State::Dcs => handler.dcs(&self.string_buffer),
|
|
State::Apc => handler.apc(&self.string_buffer),
|
|
State::Pm => handler.pm(&self.string_buffer),
|
|
State::Sos => handler.sos(&self.string_buffer),
|
|
_ => {
|
|
unreachable!("dispatch_string_command called in invalid state")
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Process DCS/APC/PM/SOS sequence bytes using SIMD-accelerated terminator search.
|
|
/// Like Kitty's find_st_terminator + accumulate_st_terminated_esc_code.
|
|
/// Uses iterative approach to avoid stack overflow on malformed input.
|
|
fn consume_string_command<H: Handler>(
|
|
&mut self,
|
|
bytes: &[u8],
|
|
pos: usize,
|
|
handler: &mut H,
|
|
) -> usize {
|
|
let mut current_pos = pos;
|
|
let mut total_consumed = 0;
|
|
|
|
loop {
|
|
let remaining = &bytes[current_pos..];
|
|
|
|
// Use SIMD-accelerated search to find ESC (0x1B) or C1 ST (0x9C)
|
|
if let Some(term_pos) = memchr::memchr2(0x1B, 0x9C, remaining) {
|
|
let terminator = remaining[term_pos];
|
|
|
|
// Check max length before accepting
|
|
if self.escape_len + term_pos > MAX_ESCAPE_LEN {
|
|
log::debug!("String command too long, aborting");
|
|
self.state = State::Normal;
|
|
return total_consumed + remaining.len();
|
|
}
|
|
|
|
match terminator {
|
|
0x9C => {
|
|
// C1 ST terminator - copy data in bulk and dispatch
|
|
self.string_buffer
|
|
.extend_from_slice(&remaining[..term_pos]);
|
|
self.dispatch_string_command(handler);
|
|
self.state = State::Normal;
|
|
self.escape_len += term_pos + 1;
|
|
return total_consumed + term_pos + 1;
|
|
}
|
|
0x1B => {
|
|
// ESC found - check if followed by \ for ST
|
|
if term_pos + 1 < remaining.len()
|
|
&& remaining[term_pos + 1] == b'\\'
|
|
{
|
|
// ESC \ (ST) terminator
|
|
self.string_buffer
|
|
.extend_from_slice(&remaining[..term_pos]);
|
|
self.dispatch_string_command(handler);
|
|
self.state = State::Normal;
|
|
self.escape_len += term_pos + 2;
|
|
return total_consumed + term_pos + 2;
|
|
} else if term_pos + 1 < remaining.len() {
|
|
// ESC not followed by \ - include ESC in data and continue
|
|
// (Unlike OSC, string commands include raw ESC that isn't ST)
|
|
self.string_buffer
|
|
.extend_from_slice(&remaining[..=term_pos]);
|
|
self.escape_len += term_pos + 1;
|
|
// Continue searching from after this ESC (iterative, not recursive)
|
|
let consumed = term_pos + 1;
|
|
total_consumed += consumed;
|
|
current_pos += consumed;
|
|
continue;
|
|
} else {
|
|
// ESC at end of buffer, need more data
|
|
// Copy everything before ESC, keep ESC for next parse
|
|
self.string_buffer
|
|
.extend_from_slice(&remaining[..term_pos]);
|
|
self.escape_len += term_pos;
|
|
return total_consumed + term_pos;
|
|
}
|
|
}
|
|
_ => unreachable!(),
|
|
}
|
|
} else {
|
|
// No terminator found - check max length
|
|
if self.escape_len + remaining.len() > MAX_ESCAPE_LEN {
|
|
log::debug!("String command too long, aborting");
|
|
self.state = State::Normal;
|
|
return total_consumed + remaining.len();
|
|
}
|
|
|
|
// Buffer all remaining bytes for next parse call
|
|
self.string_buffer.extend_from_slice(remaining);
|
|
self.escape_len += remaining.len();
|
|
return total_consumed + remaining.len();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Handler trait for responding to parsed escape sequences.
|
|
///
|
|
/// Unlike the vte crate's Perform trait, this trait receives decoded characters
|
|
/// (not bytes) for text, and control characters are expected to be handled
|
|
/// inline in the text() method (like Kitty does).
|
|
pub trait Handler {
|
|
/// Handle a chunk of decoded text (Unicode codepoints as u32).
|
|
///
|
|
/// This includes control characters (0x00-0x1F except ESC).
|
|
/// The handler should process control chars like:
|
|
/// - LF (0x0A), VT (0x0B), FF (0x0C): line feed
|
|
/// - CR (0x0D): carriage return
|
|
/// - HT (0x09): tab
|
|
/// - BS (0x08): backspace
|
|
/// - BEL (0x07): bell
|
|
///
|
|
/// ESC is never passed to this method - it triggers state transitions.
|
|
///
|
|
/// Codepoints are passed as u32 for efficiency (avoiding char validation).
|
|
/// All codepoints are guaranteed to be valid Unicode (validated during UTF-8 decode).
|
|
fn text(&mut self, codepoints: &[u32]);
|
|
|
|
/// Handle a single control character embedded in a CSI/OSC sequence.
|
|
/// This is called for control chars (0x00-0x1F) that appear inside
|
|
/// escape sequences, which should still be processed.
|
|
fn control(&mut self, byte: u8);
|
|
|
|
/// Handle a complete CSI sequence.
|
|
fn csi(&mut self, params: &CsiParams);
|
|
|
|
/// Handle a complete OSC sequence.
|
|
fn osc(&mut self, data: &[u8]);
|
|
|
|
/// Handle a DCS sequence.
|
|
fn dcs(&mut self, _data: &[u8]) {}
|
|
|
|
/// Handle an APC sequence.
|
|
fn apc(&mut self, _data: &[u8]) {}
|
|
|
|
/// Handle a PM sequence.
|
|
fn pm(&mut self, _data: &[u8]) {}
|
|
|
|
/// Handle a SOS sequence.
|
|
fn sos(&mut self, _data: &[u8]) {}
|
|
|
|
/// Save cursor position (DECSC).
|
|
fn save_cursor(&mut self) {}
|
|
|
|
/// Restore cursor position (DECRC).
|
|
fn restore_cursor(&mut self) {}
|
|
|
|
/// Full terminal reset (RIS).
|
|
fn reset(&mut self) {}
|
|
|
|
/// Index - move cursor down, scroll if at bottom (IND).
|
|
fn index(&mut self) {}
|
|
|
|
/// Newline - carriage return + line feed (NEL).
|
|
fn newline(&mut self) {}
|
|
|
|
/// Reverse index - move cursor up, scroll if at top (RI).
|
|
fn reverse_index(&mut self) {}
|
|
|
|
/// Set tab stop at current position (HTS).
|
|
fn set_tab_stop(&mut self) {}
|
|
|
|
/// Set keypad application/normal mode.
|
|
fn set_keypad_mode(&mut self, _application: bool) {}
|
|
|
|
/// Designate character set.
|
|
fn designate_charset(&mut self, _set: u8, _charset: u8) {}
|
|
|
|
/// Screen alignment test (DECALN).
|
|
fn screen_alignment(&mut self) {}
|
|
|
|
/// Add VT parser time (for performance tracking).
|
|
/// Called by the parser to report time spent in consume_input.
|
|
fn add_vt_parser_ns(&mut self, _ns: u64) {}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
struct TestHandler {
|
|
text_chunks: Vec<Vec<u32>>,
|
|
csi_count: usize,
|
|
osc_count: usize,
|
|
control_chars: Vec<u8>,
|
|
}
|
|
|
|
impl TestHandler {
|
|
fn new() -> Self {
|
|
Self {
|
|
text_chunks: Vec::new(),
|
|
csi_count: 0,
|
|
osc_count: 0,
|
|
control_chars: Vec::new(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Handler for TestHandler {
|
|
fn text(&mut self, codepoints: &[u32]) {
|
|
self.text_chunks.push(codepoints.to_vec());
|
|
}
|
|
|
|
fn control(&mut self, byte: u8) {
|
|
self.control_chars.push(byte);
|
|
}
|
|
|
|
fn csi(&mut self, _params: &CsiParams) {
|
|
self.csi_count += 1;
|
|
}
|
|
|
|
fn osc(&mut self, _data: &[u8]) {
|
|
self.osc_count += 1;
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn test_plain_text() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
parser.parse(b"Hello, World!", &mut handler);
|
|
|
|
assert_eq!(handler.text_chunks.len(), 1);
|
|
let text: String = handler.text_chunks[0]
|
|
.iter()
|
|
.filter_map(|&cp| char::from_u32(cp))
|
|
.collect();
|
|
assert_eq!(text, "Hello, World!");
|
|
}
|
|
|
|
#[test]
|
|
fn test_utf8_text() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
parser.parse("Hello, 世界!".as_bytes(), &mut handler);
|
|
|
|
assert_eq!(handler.text_chunks.len(), 1);
|
|
let text: String = handler.text_chunks[0]
|
|
.iter()
|
|
.filter_map(|&cp| char::from_u32(cp))
|
|
.collect();
|
|
assert_eq!(text, "Hello, 世界!");
|
|
}
|
|
|
|
#[test]
|
|
fn test_control_chars_in_text() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
// Text with LF and CR
|
|
parser.parse(b"Hello\nWorld\r!", &mut handler);
|
|
|
|
assert_eq!(handler.text_chunks.len(), 1);
|
|
let text: String = handler.text_chunks[0]
|
|
.iter()
|
|
.filter_map(|&cp| char::from_u32(cp))
|
|
.collect();
|
|
assert_eq!(text, "Hello\nWorld\r!");
|
|
}
|
|
|
|
#[test]
|
|
fn test_csi_sequence() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
// ESC [ 1 ; 2 m (SGR bold + dim)
|
|
parser.parse(b"\x1b[1;2m", &mut handler);
|
|
|
|
assert_eq!(handler.csi_count, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_mixed_text_and_csi() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
parser.parse(b"Hello\x1b[1mWorld", &mut handler);
|
|
|
|
assert_eq!(handler.text_chunks.len(), 2);
|
|
let text1: String = handler.text_chunks[0]
|
|
.iter()
|
|
.filter_map(|&cp| char::from_u32(cp))
|
|
.collect();
|
|
let text2: String = handler.text_chunks[1]
|
|
.iter()
|
|
.filter_map(|&cp| char::from_u32(cp))
|
|
.collect();
|
|
assert_eq!(text1, "Hello");
|
|
assert_eq!(text2, "World");
|
|
assert_eq!(handler.csi_count, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_osc_sequence() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
// OSC 0 ; title BEL
|
|
parser.parse(b"\x1b]0;My Title\x07", &mut handler);
|
|
|
|
assert_eq!(handler.osc_count, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_csi_with_subparams() {
|
|
let mut parser = Parser::new();
|
|
let mut handler = TestHandler::new();
|
|
|
|
// CSI 38:2:255:128:64 m (RGB foreground with colon separators)
|
|
parser.parse(b"\x1b[38:2:255:128:64m", &mut handler);
|
|
|
|
assert_eq!(handler.csi_count, 1);
|
|
}
|
|
}
|