mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-06 01:05:48 +02:00
Implement UTF-8 decoding for screen_draw()
This commit is contained in:
@@ -5,7 +5,6 @@
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
// TODO: Implement utf-8 parsing for screen_draw with reset
|
||||
// TODO: Fix dump_commands for OSC and DCS commands that used to take strings but now take memoryview
|
||||
// TODO: Test clipboard kitten with 52 and 5522
|
||||
// TODO: Test shell integration with secondary prompts
|
||||
@@ -13,6 +12,7 @@
|
||||
// TODO: Test that C1 characters are ignored by screen_draw()
|
||||
|
||||
#include "vt-parser.h"
|
||||
#include "charsets.h"
|
||||
#include "screen.h"
|
||||
#include "base64.h"
|
||||
#include "control-codes.h"
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#define RESTORE_INPUT_DATA self->input_data = orig_input_data; self->input_sz = orig_input_sz; self->input_pos = orig_input_pos
|
||||
|
||||
#define SET_STATE(state) self->vte_state = state; self->parser_buf_pos = 0;
|
||||
#define SET_STATE(state) self->vte_state = state; self->parser_buf_pos = 0; self->utf8_state = UTF8_ACCEPT;
|
||||
|
||||
#define IS_DIGIT \
|
||||
case '0': \
|
||||
@@ -149,9 +149,14 @@ typedef enum VTEState {
|
||||
|
||||
typedef struct PS {
|
||||
id_type window_id;
|
||||
|
||||
unsigned parser_buf_pos;
|
||||
bool extended_osc_code;
|
||||
UTF8State utf8_state;
|
||||
VTEState vte_state;
|
||||
|
||||
// this is used only during dispatch of a single byte, its present here just to avoid adding an extra parameter to accumulate_osc()
|
||||
bool extended_osc_code;
|
||||
|
||||
struct {
|
||||
monotonic_t activated_at, wait_time;
|
||||
unsigned stop_escape_code_type;
|
||||
@@ -170,6 +175,20 @@ typedef struct PS {
|
||||
|
||||
// Normal mode {{{
|
||||
|
||||
static void
|
||||
draw_byte(PS *self, uint8_t b) {
|
||||
uint32_t ch;
|
||||
switch (decode_utf8(&self->utf8_state, &ch, b)) {
|
||||
case UTF8_ACCEPT:
|
||||
REPORT_DRAW(ch);
|
||||
screen_draw(self->screen, ch, true);
|
||||
break;
|
||||
case UTF8_REJECT:
|
||||
self->utf8_state = UTF8_ACCEPT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
dispatch_normal_mode_byte(PS *self) {
|
||||
#define CALL_SCREEN_HANDLER(name) REPORT_COMMAND(name); name(self->screen); break;
|
||||
@@ -197,8 +216,7 @@ dispatch_normal_mode_byte(PS *self) {
|
||||
case DEL:
|
||||
break; // no-op
|
||||
default:
|
||||
REPORT_DRAW(ch);
|
||||
screen_draw(self->screen, ch, true);
|
||||
draw_byte(self, ch);
|
||||
break;
|
||||
}
|
||||
#undef CALL_SCREEN_HANDLER
|
||||
@@ -224,9 +242,9 @@ screen_nel(Screen *screen) { screen_carriage_return(screen); screen_linefeed(scr
|
||||
|
||||
static void
|
||||
dispatch_esc_mode_byte(PS *self) {
|
||||
#define CALL_ED(name) REPORT_COMMAND(name); name(self->screen); SET_STATE(0);
|
||||
#define CALL_ED1(name, ch) REPORT_COMMAND(name, ch); name(self->screen, ch); SET_STATE(0);
|
||||
#define CALL_ED2(name, a, b) REPORT_COMMAND(name, a, b); name(self->screen, a, b); SET_STATE(0);
|
||||
#define CALL_ED(name) REPORT_COMMAND(name); name(self->screen); SET_STATE(VTE_NORMAL);
|
||||
#define CALL_ED1(name, ch) REPORT_COMMAND(name, ch); name(self->screen, ch); SET_STATE(VTE_NORMAL);
|
||||
#define CALL_ED2(name, a, b) REPORT_COMMAND(name, a, b); name(self->screen, a, b); SET_STATE(VTE_NORMAL);
|
||||
uint8_t ch = self->input_data[self->input_pos++];
|
||||
switch(self->parser_buf_pos) {
|
||||
case 0:
|
||||
@@ -264,7 +282,7 @@ dispatch_esc_mode_byte(PS *self) {
|
||||
break;
|
||||
default:
|
||||
REPORT_ERROR("%s0x%x", "Unknown char after ESC: ", ch);
|
||||
SET_STATE(0); break;
|
||||
SET_STATE(VTE_NORMAL); break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -311,7 +329,7 @@ dispatch_esc_mode_byte(PS *self) {
|
||||
default:
|
||||
REPORT_ERROR("Unhandled charset related escape code: 0x%x 0x%x", self->parser_buf[0], ch); break;
|
||||
}
|
||||
SET_STATE(0);
|
||||
SET_STATE(VTE_NORMAL);
|
||||
break;
|
||||
}
|
||||
#undef CALL_ED
|
||||
@@ -530,7 +548,7 @@ END_ALLOW_CASE_RANGE
|
||||
if (self->parser_buf_pos > 0 && self->parser_buf[self->parser_buf_pos-1] == ESC) {
|
||||
if (ch == '\\') { self->parser_buf_pos--; return true; }
|
||||
REPORT_ERROR("DCS sequence contained ESC without trailing \\ at pos: %u ignoring the sequence", self->parser_buf_pos);
|
||||
SET_STATE(ESC); return false;
|
||||
SET_STATE(VTE_ESC); return false;
|
||||
}
|
||||
if (self->parser_buf_pos >= PARSER_BUF_SZ - 1) {
|
||||
REPORT_ERROR("DCS sequence too long, truncating.");
|
||||
@@ -660,7 +678,7 @@ accumulate_csi(PS *self) {
|
||||
#define ENSURE_SPACE \
|
||||
if (self->parser_buf_pos > PARSER_BUF_SZ - 1) { \
|
||||
REPORT_ERROR("CSI sequence too long, ignoring"); \
|
||||
SET_STATE(0); \
|
||||
SET_STATE(VTE_NORMAL); \
|
||||
return false; \
|
||||
}
|
||||
|
||||
@@ -679,7 +697,7 @@ accumulate_csi(PS *self) {
|
||||
case '=':
|
||||
if (self->parser_buf_pos != 0) {
|
||||
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
||||
SET_STATE(0);
|
||||
SET_STATE(VTE_NORMAL);
|
||||
return false;
|
||||
}
|
||||
ENSURE_SPACE;
|
||||
@@ -711,11 +729,11 @@ END_ALLOW_CASE_RANGE
|
||||
break;
|
||||
case NUL:
|
||||
case DEL:
|
||||
SET_STATE(0);
|
||||
SET_STATE(VTE_NORMAL);
|
||||
break; // no-op
|
||||
default:
|
||||
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
||||
SET_STATE(0);
|
||||
SET_STATE(VTE_NORMAL);
|
||||
return false;
|
||||
|
||||
}
|
||||
@@ -1319,26 +1337,26 @@ accumulate_oth(PS *self) {
|
||||
dispatch##_esc_mode_byte(self); \
|
||||
break; \
|
||||
case VTE_CSI: \
|
||||
if (accumulate_csi(self)) { dispatch##_csi(self); SET_STATE(0); watch_for_pending; } \
|
||||
if (accumulate_csi(self)) { dispatch##_csi(self); SET_STATE(VTE_NORMAL); watch_for_pending; } \
|
||||
break; \
|
||||
case VTE_OSC: \
|
||||
{ \
|
||||
if (accumulate_osc(self)) { \
|
||||
dispatch##_osc(self); \
|
||||
if (self->extended_osc_code) { \
|
||||
if (accumulate_osc(self)) { dispatch##_osc(self); SET_STATE(0); } \
|
||||
} else { SET_STATE(0); } \
|
||||
if (accumulate_osc(self)) { dispatch##_osc(self); SET_STATE(VTE_NORMAL); } \
|
||||
} else { SET_STATE(VTE_NORMAL); } \
|
||||
} \
|
||||
} \
|
||||
break; \
|
||||
case VTE_APC: \
|
||||
if (accumulate_oth(self)) { dispatch##_apc(self); SET_STATE(0); } \
|
||||
if (accumulate_oth(self)) { dispatch##_apc(self); SET_STATE(VTE_NORMAL); } \
|
||||
break; \
|
||||
case VTE_PM: \
|
||||
if (accumulate_oth(self)) { dispatch##_pm(self); SET_STATE(0); } \
|
||||
if (accumulate_oth(self)) { dispatch##_pm(self); SET_STATE(VTE_NORMAL); } \
|
||||
break; \
|
||||
case VTE_DCS: \
|
||||
if (accumulate_dcs(self)) { dispatch##_dcs(self); SET_STATE(0); watch_for_pending; } \
|
||||
if (accumulate_dcs(self)) { dispatch##_dcs(self); SET_STATE(VTE_NORMAL); watch_for_pending; } \
|
||||
if (self->vte_state == ESC) { self->input_pos--; dispatch##_esc_mode_byte(self); } \
|
||||
break; \
|
||||
case VTE_NORMAL: \
|
||||
@@ -1606,10 +1624,12 @@ free_vt_parser(Parser* self) {
|
||||
|
||||
static void
|
||||
reset(PS *self) {
|
||||
self->parser_buf_pos = 0;
|
||||
self->extended_osc_code = false;
|
||||
self->vte_state = VTE_NORMAL;
|
||||
self->utf8_state = UTF8_ACCEPT;
|
||||
self->parser_buf_pos = 0;
|
||||
|
||||
self->pending_mode.activated_at = 0;
|
||||
self->pending_mode.stop_escape_code_type = 0;
|
||||
}
|
||||
|
||||
void
|
||||
|
||||
@@ -6,7 +6,7 @@ from base64 import standard_b64encode
|
||||
from binascii import hexlify
|
||||
from functools import partial
|
||||
|
||||
from kitty.fast_data_types import CURSOR_BLOCK, base64_decode, base64_encode, parse_bytes, parse_bytes_dump
|
||||
from kitty.fast_data_types import CURSOR_BLOCK, base64_decode, base64_encode
|
||||
from kitty.notify import NotificationCommand, handle_notification_cmd, notification_activated, reset_registry
|
||||
|
||||
from . import BaseTest
|
||||
@@ -25,7 +25,7 @@ class TestParser(BaseTest):
|
||||
if isinstance(x, str):
|
||||
x = x.encode('utf-8')
|
||||
cmds = tuple(('draw', x) if isinstance(x, str) else x for x in cmds)
|
||||
parse_bytes_dump(cd, s, x)
|
||||
s.vt_parser.parse_bytes(s, x, cd)
|
||||
current = ''
|
||||
q = []
|
||||
for args in cd:
|
||||
@@ -65,7 +65,7 @@ class TestParser(BaseTest):
|
||||
self.ae(str(s.line(1)), '6')
|
||||
self.ae(str(s.line(2)), ' 123')
|
||||
self.ae(str(s.line(3)), '45')
|
||||
parse_bytes(s, b'\rabcde')
|
||||
s.vt_parser.parse_bytes(s, b'\rabcde')
|
||||
self.ae(str(s.line(3)), 'abcde')
|
||||
pb('\rßxyz1', ('screen_carriage_return',), 'ßxyz1')
|
||||
self.ae(str(s.line(3)), 'ßxyz1')
|
||||
@@ -331,7 +331,7 @@ class TestParser(BaseTest):
|
||||
for sgr in '0;34;102;1;2;3;4 0;38:5:200;58:2:10:11:12'.split():
|
||||
expected = set(sgr.split(';')) - {'0'}
|
||||
c.clear()
|
||||
parse_bytes(s, f'\033[{sgr}m\033P$qm\033\\'.encode('ascii'))
|
||||
s.vte_parser.parse_bytes(s, f'\033[{sgr}m\033P$qm\033\\'.encode('ascii'))
|
||||
r = c.wtcbuf.decode('ascii').partition('r')[2].partition('m')[0]
|
||||
self.ae(expected, set(r.split(';')))
|
||||
c.clear()
|
||||
|
||||
Reference in New Issue
Block a user