refactor for background process + spawning gui

This commit is contained in:
Zacharias-Brohn
2025-12-15 12:20:37 +01:00
parent 5d47177fbf
commit e4d742cadf
19 changed files with 5928 additions and 4384 deletions
+982
View File
@@ -0,0 +1,982 @@
//! VT Parser - A high-performance terminal escape sequence parser.
//!
//! Based on Kitty's vt-parser.c design, this parser uses explicit state tracking
//! to enable fast-path processing of normal text while correctly handling
//! escape sequences.
//!
//! Key design principles from Kitty:
//! 1. UTF-8 decode until ESC sentinel is found (not byte-by-byte parsing)
//! 2. Pass decoded codepoints to the text handler, not raw bytes
//! 3. Control characters (LF, CR, TAB, BS, etc.) are handled inline in text drawing
//! 4. Only ESC triggers state machine transitions
/// Maximum number of CSI parameters.
pub const MAX_CSI_PARAMS: usize = 256;
/// Maximum length of an OSC string.
const MAX_OSC_LEN: usize = 4096;
/// Maximum length of an escape sequence before we give up.
const MAX_ESCAPE_LEN: usize = 262144; // 256KB like Kitty
/// Replacement character for invalid UTF-8.
const REPLACEMENT_CHAR: char = '\u{FFFD}';
/// UTF-8 decoder states (DFA-based, like Kitty uses).
const UTF8_ACCEPT: u8 = 0;
const UTF8_REJECT: u8 = 12;
/// UTF-8 state transition and character class tables.
/// Based on Bjoern Hoehrmann's DFA decoder.
static UTF8_DECODE_TABLE: [u8; 364] = [
// Character class lookup (0-255)
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
// State transition table
0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
12,36,12,12,12,12,12,12,12,12,12,12,
];
/// Decode a single UTF-8 byte using DFA.
#[inline]
fn decode_utf8(state: &mut u8, codep: &mut u32, byte: u8) -> u8 {
let char_class = UTF8_DECODE_TABLE[byte as usize];
*codep = if *state == UTF8_ACCEPT {
(0xFF >> char_class) as u32 & byte as u32
} else {
(byte as u32 & 0x3F) | (*codep << 6)
};
*state = UTF8_DECODE_TABLE[256 + *state as usize + char_class as usize];
*state
}
/// UTF-8 decoder that decodes until ESC (0x1B) is found.
/// Returns (output_chars, bytes_consumed, found_esc).
#[derive(Debug, Default)]
pub struct Utf8Decoder {
state: u8,
codep: u32,
}
impl Utf8Decoder {
pub fn new() -> Self {
Self::default()
}
pub fn reset(&mut self) {
self.state = UTF8_ACCEPT;
self.codep = 0;
}
/// Decode UTF-8 bytes until ESC is found.
/// Outputs decoded codepoints to the output buffer.
/// Returns (bytes_consumed, found_esc).
#[inline]
pub fn decode_to_esc(&mut self, src: &[u8], output: &mut Vec<char>) -> (usize, bool) {
output.clear();
// Pre-allocate capacity to avoid reallocations during decode.
// Worst case: one char per byte (ASCII). Kitty does the same.
output.reserve(src.len());
let mut consumed = 0;
for &byte in src {
consumed += 1;
if byte == 0x1B {
// ESC found - emit replacement if we were in the middle of a sequence
if self.state != UTF8_ACCEPT {
output.push(REPLACEMENT_CHAR);
}
self.reset();
return (consumed, true);
}
let prev_state = self.state;
match decode_utf8(&mut self.state, &mut self.codep, byte) {
UTF8_ACCEPT => {
// Safe because we control the codepoint values from valid UTF-8
if let Some(c) = char::from_u32(self.codep) {
output.push(c);
}
}
UTF8_REJECT => {
// Invalid UTF-8 sequence
output.push(REPLACEMENT_CHAR);
self.state = UTF8_ACCEPT;
// If previous state was accept, we consumed a bad lead byte
// Otherwise, re-process this byte as a potential new sequence start
if prev_state != UTF8_ACCEPT {
consumed -= 1;
continue;
}
}
_ => {
// Continue accumulating multi-byte sequence
}
}
}
(consumed, false)
}
}
/// Parser state.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum State {
/// Normal text processing mode.
Normal,
/// Just saw ESC, waiting for next character.
Escape,
/// ESC seen, waiting for second char of two-char sequence (e.g., ESC ( B).
EscapeIntermediate(u8),
/// Processing CSI sequence (ESC [).
Csi,
/// Processing OSC sequence (ESC ]).
Osc,
/// Processing DCS sequence (ESC P).
Dcs,
/// Processing APC sequence (ESC _).
Apc,
/// Processing PM sequence (ESC ^).
Pm,
/// Processing SOS sequence (ESC X).
Sos,
}
impl Default for State {
fn default() -> Self {
State::Normal
}
}
/// CSI parsing sub-state.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
enum CsiState {
#[default]
Start,
Body,
PostSecondary,
}
/// Parsed CSI sequence data.
#[derive(Debug, Clone)]
pub struct CsiParams {
/// Collected parameters.
pub params: [i32; MAX_CSI_PARAMS],
/// Which parameters are sub-parameters (colon-separated).
pub is_sub_param: [bool; MAX_CSI_PARAMS],
/// Number of collected parameters.
pub num_params: usize,
/// Primary modifier (e.g., '?' in CSI ? Ps h).
pub primary: u8,
/// Secondary modifier (e.g., '$' in CSI Ps $ p).
pub secondary: u8,
/// Final character (e.g., 'm' in CSI 1 m).
pub final_char: u8,
/// Whether the sequence is valid.
pub is_valid: bool,
// Internal parsing state
state: CsiState,
accumulator: i64,
multiplier: i32,
num_digits: usize,
}
impl Default for CsiParams {
fn default() -> Self {
Self {
params: [0; MAX_CSI_PARAMS],
is_sub_param: [false; MAX_CSI_PARAMS],
num_params: 0,
primary: 0,
secondary: 0,
final_char: 0,
is_valid: false,
state: CsiState::Start,
accumulator: 0,
multiplier: 1,
num_digits: 0,
}
}
}
impl CsiParams {
/// Reset for a new CSI sequence.
pub fn reset(&mut self) {
self.params = [0; MAX_CSI_PARAMS];
self.is_sub_param = [false; MAX_CSI_PARAMS];
self.num_params = 0;
self.primary = 0;
self.secondary = 0;
self.final_char = 0;
self.is_valid = false;
self.state = CsiState::Start;
self.accumulator = 0;
self.multiplier = 1;
self.num_digits = 0;
}
/// Get parameter at index, or default value if not present.
#[inline]
pub fn get(&self, index: usize, default: i32) -> i32 {
if index < self.num_params && self.params[index] != 0 {
self.params[index]
} else {
default
}
}
/// Add a digit to the current parameter.
#[inline]
fn add_digit(&mut self, digit: u8) {
self.accumulator = self.accumulator.saturating_mul(10).saturating_add((digit - b'0') as i64);
self.num_digits += 1;
}
/// Commit the current parameter.
fn commit_param(&mut self) -> bool {
if self.num_params >= MAX_CSI_PARAMS {
return false;
}
let value = (self.accumulator as i32).saturating_mul(self.multiplier);
self.params[self.num_params] = value;
self.num_params += 1;
self.accumulator = 0;
self.multiplier = 1;
self.num_digits = 0;
true
}
}
/// VT Parser with Kitty-style state tracking.
#[derive(Debug)]
pub struct Parser {
/// Current parser state.
pub state: State,
/// CSI parameters being collected.
pub csi: CsiParams,
/// UTF-8 decoder for text.
utf8: Utf8Decoder,
/// Decoded character buffer (reused to avoid allocation).
char_buf: Vec<char>,
/// OSC string buffer.
osc_buffer: Vec<u8>,
/// DCS/APC/PM/SOS string buffer.
string_buffer: Vec<u8>,
/// Intermediate byte for two-char escape sequences.
intermediate: u8,
/// Number of bytes consumed in current escape sequence (for max length check).
escape_len: usize,
}
impl Default for Parser {
fn default() -> Self {
Self {
state: State::Normal,
csi: CsiParams::default(),
utf8: Utf8Decoder::new(),
// Pre-allocate to match typical read buffer sizes (1MB) to avoid reallocation
char_buf: Vec::with_capacity(1024 * 1024),
osc_buffer: Vec::new(),
string_buffer: Vec::new(),
intermediate: 0,
escape_len: 0,
}
}
}
impl Parser {
/// Create a new parser.
pub fn new() -> Self {
Self::default()
}
/// Check if parser is in normal (ground) state.
#[inline]
pub fn is_normal(&self) -> bool {
self.state == State::Normal
}
/// Reset parser to normal state.
pub fn reset(&mut self) {
self.state = State::Normal;
self.csi.reset();
self.utf8.reset();
self.char_buf.clear();
self.osc_buffer.clear();
self.string_buffer.clear();
self.intermediate = 0;
self.escape_len = 0;
}
/// Process a buffer of bytes, calling the handler for each action.
/// Returns the number of bytes consumed.
pub fn parse<H: Handler>(&mut self, bytes: &[u8], handler: &mut H) -> usize {
let mut pos = 0;
while pos < bytes.len() {
match self.state {
State::Normal => {
// Fast path: UTF-8 decode until ESC
let (consumed, found_esc) = self.utf8.decode_to_esc(&bytes[pos..], &mut self.char_buf);
// Process decoded characters (text + control chars)
if !self.char_buf.is_empty() {
handler.text(&self.char_buf);
}
pos += consumed;
if found_esc {
self.state = State::Escape;
self.escape_len = 0;
}
}
State::Escape => {
pos += self.consume_escape(bytes, pos, handler);
}
State::EscapeIntermediate(_) => {
pos += self.consume_escape_intermediate(bytes, pos, handler);
}
State::Csi => {
pos += self.consume_csi(bytes, pos, handler);
}
State::Osc => {
pos += self.consume_osc(bytes, pos, handler);
}
State::Dcs | State::Apc | State::Pm | State::Sos => {
pos += self.consume_string_command(bytes, pos, handler);
}
}
}
pos
}
/// Process bytes after ESC.
fn consume_escape<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
if pos >= bytes.len() {
return 0;
}
let ch = bytes[pos];
self.escape_len += 1;
match ch {
// CSI: ESC [
b'[' => {
self.state = State::Csi;
self.csi.reset();
1
}
// OSC: ESC ]
b']' => {
self.state = State::Osc;
self.osc_buffer.clear();
1
}
// DCS: ESC P
b'P' => {
self.state = State::Dcs;
self.string_buffer.clear();
1
}
// APC: ESC _
b'_' => {
self.state = State::Apc;
self.string_buffer.clear();
1
}
// PM: ESC ^
b'^' => {
self.state = State::Pm;
self.string_buffer.clear();
1
}
// SOS: ESC X
b'X' => {
self.state = State::Sos;
self.string_buffer.clear();
1
}
// Two-char sequences: ESC ( ESC ) ESC # ESC % ESC SP etc.
b'(' | b')' | b'*' | b'+' | b'-' | b'.' | b'/' | b'%' | b'#' | b' ' => {
self.state = State::EscapeIntermediate(ch);
self.intermediate = ch;
1
}
// Single-char escape sequences
b'7' => {
// DECSC - Save cursor
handler.save_cursor();
self.state = State::Normal;
1
}
b'8' => {
// DECRC - Restore cursor
handler.restore_cursor();
self.state = State::Normal;
1
}
b'c' => {
// RIS - Full reset
handler.reset();
self.state = State::Normal;
1
}
b'D' => {
// IND - Index (move down, scroll if needed)
handler.index();
self.state = State::Normal;
1
}
b'E' => {
// NEL - Next line
handler.newline();
self.state = State::Normal;
1
}
b'H' => {
// HTS - Horizontal tab set
handler.set_tab_stop();
self.state = State::Normal;
1
}
b'M' => {
// RI - Reverse index
handler.reverse_index();
self.state = State::Normal;
1
}
b'=' => {
// DECKPAM - Application keypad mode
handler.set_keypad_mode(true);
self.state = State::Normal;
1
}
b'>' => {
// DECKPNM - Normal keypad mode
handler.set_keypad_mode(false);
self.state = State::Normal;
1
}
b'\\' => {
// ST - String terminator (ignore if not in string mode)
self.state = State::Normal;
1
}
_ => {
// Unknown escape sequence, ignore and return to normal
log::debug!("Unknown escape sequence: ESC {:02x}", ch);
self.state = State::Normal;
1
}
}
}
/// Process second byte of two-char escape sequence.
fn consume_escape_intermediate<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
if pos >= bytes.len() {
return 0;
}
let ch = bytes[pos];
let intermediate = self.intermediate;
self.escape_len += 1;
self.state = State::Normal;
match intermediate {
b'(' | b')' => {
// Designate character set G0/G1
let set = if intermediate == b'(' { 0 } else { 1 };
handler.designate_charset(set, ch);
}
b'#' => {
if ch == b'8' {
// DECALN - Screen alignment test
handler.screen_alignment();
}
}
b'%' => {
// Character set selection (we always use UTF-8)
}
b' ' => {
// S7C1T / S8C1T - we ignore these
}
_ => {}
}
1
}
/// Process CSI sequence bytes.
fn consume_csi<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
let mut consumed = 0;
while pos + consumed < bytes.len() {
let ch = bytes[pos + consumed];
consumed += 1;
self.escape_len += 1;
// Check for max length
if self.escape_len > MAX_ESCAPE_LEN {
log::debug!("CSI sequence too long, aborting");
self.state = State::Normal;
return consumed;
}
match self.csi.state {
CsiState::Start => {
match ch {
// Control characters embedded in CSI - handle them
0x00..=0x1F => {
// Handle control chars (except ESC which would be weird here)
if ch != 0x1B {
handler.control(ch);
}
}
b';' => {
// Empty parameter = 0
self.csi.params[self.csi.num_params] = 0;
self.csi.num_params += 1;
self.csi.state = CsiState::Body;
}
b'0'..=b'9' => {
self.csi.add_digit(ch);
self.csi.state = CsiState::Body;
}
b'?' | b'>' | b'<' | b'=' => {
self.csi.primary = ch;
self.csi.state = CsiState::Body;
}
b' ' | b'\'' | b'"' | b'!' | b'$' => {
self.csi.secondary = ch;
self.csi.state = CsiState::PostSecondary;
}
b'-' => {
self.csi.multiplier = -1;
self.csi.num_digits = 1;
self.csi.state = CsiState::Body;
}
// Final byte
b'@'..=b'~' => {
self.csi.final_char = ch;
self.csi.is_valid = true;
self.dispatch_csi(handler);
self.state = State::Normal;
return consumed;
}
_ => {
log::debug!("Invalid CSI character: {:02x}", ch);
self.state = State::Normal;
return consumed;
}
}
}
CsiState::Body => {
match ch {
0x00..=0x1F => {
if ch != 0x1B {
handler.control(ch);
}
}
b'0'..=b'9' => {
self.csi.add_digit(ch);
}
b';' => {
if self.csi.num_digits == 0 {
self.csi.num_digits = 1; // Empty = 0
}
if !self.csi.commit_param() {
self.state = State::Normal;
return consumed;
}
self.csi.is_sub_param[self.csi.num_params] = false;
}
b':' => {
if !self.csi.commit_param() {
self.state = State::Normal;
return consumed;
}
self.csi.is_sub_param[self.csi.num_params] = true;
}
b' ' | b'\'' | b'"' | b'!' | b'$' | b'#' | b'*' => {
if !self.csi.commit_param() {
self.state = State::Normal;
return consumed;
}
self.csi.secondary = ch;
self.csi.state = CsiState::PostSecondary;
}
b'-' if self.csi.num_digits == 0 => {
self.csi.multiplier = -1;
self.csi.num_digits = 1;
}
// Final byte
b'@'..=b'~' => {
if self.csi.num_digits > 0 || self.csi.num_params > 0 {
self.csi.commit_param();
}
self.csi.final_char = ch;
self.csi.is_valid = true;
self.dispatch_csi(handler);
self.state = State::Normal;
return consumed;
}
_ => {
log::debug!("Invalid CSI body character: {:02x}", ch);
self.state = State::Normal;
return consumed;
}
}
}
CsiState::PostSecondary => {
match ch {
0x00..=0x1F => {
if ch != 0x1B {
handler.control(ch);
}
}
// Final byte
b'@'..=b'~' => {
self.csi.final_char = ch;
self.csi.is_valid = true;
self.dispatch_csi(handler);
self.state = State::Normal;
return consumed;
}
_ => {
log::debug!("Invalid CSI post-secondary character: {:02x}", ch);
self.state = State::Normal;
return consumed;
}
}
}
}
}
consumed
}
/// Dispatch a complete CSI sequence to the handler.
fn dispatch_csi<H: Handler>(&mut self, handler: &mut H) {
handler.csi(&self.csi);
}
/// Process OSC sequence bytes.
fn consume_osc<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
let mut consumed = 0;
while pos + consumed < bytes.len() {
let ch = bytes[pos + consumed];
consumed += 1;
self.escape_len += 1;
// Check for max length
if self.escape_len > MAX_ESCAPE_LEN || self.osc_buffer.len() > MAX_OSC_LEN {
log::debug!("OSC sequence too long, aborting");
self.state = State::Normal;
return consumed;
}
match ch {
// BEL terminates OSC
0x07 => {
handler.osc(&self.osc_buffer);
self.state = State::Normal;
return consumed;
}
// ESC \ (ST) terminates OSC
0x1B => {
// Need to peek at next byte
if pos + consumed < bytes.len() && bytes[pos + consumed] == b'\\' {
consumed += 1;
handler.osc(&self.osc_buffer);
self.state = State::Normal;
return consumed;
} else {
// ESC not followed by \, dispatch what we have
handler.osc(&self.osc_buffer);
self.state = State::Escape;
return consumed;
}
}
// C1 ST (0x9C) terminates OSC
0x9C => {
handler.osc(&self.osc_buffer);
self.state = State::Normal;
return consumed;
}
_ => {
self.osc_buffer.push(ch);
}
}
}
consumed
}
/// Process DCS/APC/PM/SOS sequence bytes (string commands terminated by ST).
fn consume_string_command<H: Handler>(&mut self, bytes: &[u8], pos: usize, handler: &mut H) -> usize {
let mut consumed = 0;
while pos + consumed < bytes.len() {
let ch = bytes[pos + consumed];
consumed += 1;
self.escape_len += 1;
// Check for max length
if self.escape_len > MAX_ESCAPE_LEN {
log::debug!("String command too long, aborting");
self.state = State::Normal;
return consumed;
}
match ch {
// ESC \ (ST) terminates
0x1B => {
if pos + consumed < bytes.len() && bytes[pos + consumed] == b'\\' {
consumed += 1;
// Dispatch based on original state
match self.state {
State::Dcs => handler.dcs(&self.string_buffer),
State::Apc => handler.apc(&self.string_buffer),
State::Pm => handler.pm(&self.string_buffer),
State::Sos => handler.sos(&self.string_buffer),
_ => {}
}
self.state = State::Normal;
return consumed;
} else {
self.string_buffer.push(ch);
}
}
// C1 ST (0x9C) terminates
0x9C => {
match self.state {
State::Dcs => handler.dcs(&self.string_buffer),
State::Apc => handler.apc(&self.string_buffer),
State::Pm => handler.pm(&self.string_buffer),
State::Sos => handler.sos(&self.string_buffer),
_ => {}
}
self.state = State::Normal;
return consumed;
}
_ => {
self.string_buffer.push(ch);
}
}
}
consumed
}
}
/// Handler trait for responding to parsed escape sequences.
///
/// Unlike the vte crate's Perform trait, this trait receives decoded characters
/// (not bytes) for text, and control characters are expected to be handled
/// inline in the text() method (like Kitty does).
pub trait Handler {
/// Handle a chunk of decoded text (Unicode codepoints).
///
/// This includes control characters (0x00-0x1F except ESC).
/// The handler should process control chars like:
/// - LF (0x0A), VT (0x0B), FF (0x0C): line feed
/// - CR (0x0D): carriage return
/// - HT (0x09): tab
/// - BS (0x08): backspace
/// - BEL (0x07): bell
///
/// ESC is never passed to this method - it triggers state transitions.
fn text(&mut self, chars: &[char]);
/// Handle a single control character embedded in a CSI/OSC sequence.
/// This is called for control chars (0x00-0x1F) that appear inside
/// escape sequences, which should still be processed.
fn control(&mut self, byte: u8);
/// Handle a complete CSI sequence.
fn csi(&mut self, params: &CsiParams);
/// Handle a complete OSC sequence.
fn osc(&mut self, data: &[u8]);
/// Handle a DCS sequence.
fn dcs(&mut self, _data: &[u8]) {}
/// Handle an APC sequence.
fn apc(&mut self, _data: &[u8]) {}
/// Handle a PM sequence.
fn pm(&mut self, _data: &[u8]) {}
/// Handle a SOS sequence.
fn sos(&mut self, _data: &[u8]) {}
/// Save cursor position (DECSC).
fn save_cursor(&mut self) {}
/// Restore cursor position (DECRC).
fn restore_cursor(&mut self) {}
/// Full terminal reset (RIS).
fn reset(&mut self) {}
/// Index - move cursor down, scroll if at bottom (IND).
fn index(&mut self) {}
/// Newline - carriage return + line feed (NEL).
fn newline(&mut self) {}
/// Reverse index - move cursor up, scroll if at top (RI).
fn reverse_index(&mut self) {}
/// Set tab stop at current position (HTS).
fn set_tab_stop(&mut self) {}
/// Set keypad application/normal mode.
fn set_keypad_mode(&mut self, _application: bool) {}
/// Designate character set.
fn designate_charset(&mut self, _set: u8, _charset: u8) {}
/// Screen alignment test (DECALN).
fn screen_alignment(&mut self) {}
}
#[cfg(test)]
mod tests {
use super::*;
struct TestHandler {
text_chunks: Vec<Vec<char>>,
csi_count: usize,
osc_count: usize,
control_chars: Vec<u8>,
}
impl TestHandler {
fn new() -> Self {
Self {
text_chunks: Vec::new(),
csi_count: 0,
osc_count: 0,
control_chars: Vec::new(),
}
}
}
impl Handler for TestHandler {
fn text(&mut self, chars: &[char]) {
self.text_chunks.push(chars.to_vec());
}
fn control(&mut self, byte: u8) {
self.control_chars.push(byte);
}
fn csi(&mut self, _params: &CsiParams) {
self.csi_count += 1;
}
fn osc(&mut self, _data: &[u8]) {
self.osc_count += 1;
}
}
#[test]
fn test_plain_text() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
parser.parse(b"Hello, World!", &mut handler);
assert_eq!(handler.text_chunks.len(), 1);
let text: String = handler.text_chunks[0].iter().collect();
assert_eq!(text, "Hello, World!");
}
#[test]
fn test_utf8_text() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
parser.parse("Hello, 世界!".as_bytes(), &mut handler);
assert_eq!(handler.text_chunks.len(), 1);
let text: String = handler.text_chunks[0].iter().collect();
assert_eq!(text, "Hello, 世界!");
}
#[test]
fn test_control_chars_in_text() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
// Text with LF and CR
parser.parse(b"Hello\nWorld\r!", &mut handler);
assert_eq!(handler.text_chunks.len(), 1);
let text: String = handler.text_chunks[0].iter().collect();
assert_eq!(text, "Hello\nWorld\r!");
}
#[test]
fn test_csi_sequence() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
// ESC [ 1 ; 2 m (SGR bold + dim)
parser.parse(b"\x1b[1;2m", &mut handler);
assert_eq!(handler.csi_count, 1);
}
#[test]
fn test_mixed_text_and_csi() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
parser.parse(b"Hello\x1b[1mWorld", &mut handler);
assert_eq!(handler.text_chunks.len(), 2);
let text1: String = handler.text_chunks[0].iter().collect();
let text2: String = handler.text_chunks[1].iter().collect();
assert_eq!(text1, "Hello");
assert_eq!(text2, "World");
assert_eq!(handler.csi_count, 1);
}
#[test]
fn test_osc_sequence() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
// OSC 0 ; title BEL
parser.parse(b"\x1b]0;My Title\x07", &mut handler);
assert_eq!(handler.osc_count, 1);
}
#[test]
fn test_csi_with_subparams() {
let mut parser = Parser::new();
let mut handler = TestHandler::new();
// CSI 38:2:255:128:64 m (RGB foreground with colon separators)
parser.parse(b"\x1b[38:2:255:128:64m", &mut handler);
assert_eq!(handler.csi_count, 1);
}
}