mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 22:28:24 +02:00
Implement UTF-8 decoding for screen_draw()
This commit is contained in:
@@ -5,7 +5,6 @@
|
|||||||
* Distributed under terms of the GPL3 license.
|
* Distributed under terms of the GPL3 license.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// TODO: Implement utf-8 parsing for screen_draw with reset
|
|
||||||
// TODO: Fix dump_commands for OSC and DCS commands that used to take strings but now take memoryview
|
// TODO: Fix dump_commands for OSC and DCS commands that used to take strings but now take memoryview
|
||||||
// TODO: Test clipboard kitten with 52 and 5522
|
// TODO: Test clipboard kitten with 52 and 5522
|
||||||
// TODO: Test shell integration with secondary prompts
|
// TODO: Test shell integration with secondary prompts
|
||||||
@@ -13,6 +12,7 @@
|
|||||||
// TODO: Test that C1 characters are ignored by screen_draw()
|
// TODO: Test that C1 characters are ignored by screen_draw()
|
||||||
|
|
||||||
#include "vt-parser.h"
|
#include "vt-parser.h"
|
||||||
|
#include "charsets.h"
|
||||||
#include "screen.h"
|
#include "screen.h"
|
||||||
#include "base64.h"
|
#include "base64.h"
|
||||||
#include "control-codes.h"
|
#include "control-codes.h"
|
||||||
@@ -26,7 +26,7 @@
|
|||||||
|
|
||||||
#define RESTORE_INPUT_DATA self->input_data = orig_input_data; self->input_sz = orig_input_sz; self->input_pos = orig_input_pos
|
#define RESTORE_INPUT_DATA self->input_data = orig_input_data; self->input_sz = orig_input_sz; self->input_pos = orig_input_pos
|
||||||
|
|
||||||
#define SET_STATE(state) self->vte_state = state; self->parser_buf_pos = 0;
|
#define SET_STATE(state) self->vte_state = state; self->parser_buf_pos = 0; self->utf8_state = UTF8_ACCEPT;
|
||||||
|
|
||||||
#define IS_DIGIT \
|
#define IS_DIGIT \
|
||||||
case '0': \
|
case '0': \
|
||||||
@@ -149,9 +149,14 @@ typedef enum VTEState {
|
|||||||
|
|
||||||
typedef struct PS {
|
typedef struct PS {
|
||||||
id_type window_id;
|
id_type window_id;
|
||||||
|
|
||||||
unsigned parser_buf_pos;
|
unsigned parser_buf_pos;
|
||||||
bool extended_osc_code;
|
UTF8State utf8_state;
|
||||||
VTEState vte_state;
|
VTEState vte_state;
|
||||||
|
|
||||||
|
// this is used only during dispatch of a single byte, its present here just to avoid adding an extra parameter to accumulate_osc()
|
||||||
|
bool extended_osc_code;
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
monotonic_t activated_at, wait_time;
|
monotonic_t activated_at, wait_time;
|
||||||
unsigned stop_escape_code_type;
|
unsigned stop_escape_code_type;
|
||||||
@@ -170,6 +175,20 @@ typedef struct PS {
|
|||||||
|
|
||||||
// Normal mode {{{
|
// Normal mode {{{
|
||||||
|
|
||||||
|
static void
|
||||||
|
draw_byte(PS *self, uint8_t b) {
|
||||||
|
uint32_t ch;
|
||||||
|
switch (decode_utf8(&self->utf8_state, &ch, b)) {
|
||||||
|
case UTF8_ACCEPT:
|
||||||
|
REPORT_DRAW(ch);
|
||||||
|
screen_draw(self->screen, ch, true);
|
||||||
|
break;
|
||||||
|
case UTF8_REJECT:
|
||||||
|
self->utf8_state = UTF8_ACCEPT;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
dispatch_normal_mode_byte(PS *self) {
|
dispatch_normal_mode_byte(PS *self) {
|
||||||
#define CALL_SCREEN_HANDLER(name) REPORT_COMMAND(name); name(self->screen); break;
|
#define CALL_SCREEN_HANDLER(name) REPORT_COMMAND(name); name(self->screen); break;
|
||||||
@@ -197,8 +216,7 @@ dispatch_normal_mode_byte(PS *self) {
|
|||||||
case DEL:
|
case DEL:
|
||||||
break; // no-op
|
break; // no-op
|
||||||
default:
|
default:
|
||||||
REPORT_DRAW(ch);
|
draw_byte(self, ch);
|
||||||
screen_draw(self->screen, ch, true);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#undef CALL_SCREEN_HANDLER
|
#undef CALL_SCREEN_HANDLER
|
||||||
@@ -224,9 +242,9 @@ screen_nel(Screen *screen) { screen_carriage_return(screen); screen_linefeed(scr
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
dispatch_esc_mode_byte(PS *self) {
|
dispatch_esc_mode_byte(PS *self) {
|
||||||
#define CALL_ED(name) REPORT_COMMAND(name); name(self->screen); SET_STATE(0);
|
#define CALL_ED(name) REPORT_COMMAND(name); name(self->screen); SET_STATE(VTE_NORMAL);
|
||||||
#define CALL_ED1(name, ch) REPORT_COMMAND(name, ch); name(self->screen, ch); SET_STATE(0);
|
#define CALL_ED1(name, ch) REPORT_COMMAND(name, ch); name(self->screen, ch); SET_STATE(VTE_NORMAL);
|
||||||
#define CALL_ED2(name, a, b) REPORT_COMMAND(name, a, b); name(self->screen, a, b); SET_STATE(0);
|
#define CALL_ED2(name, a, b) REPORT_COMMAND(name, a, b); name(self->screen, a, b); SET_STATE(VTE_NORMAL);
|
||||||
uint8_t ch = self->input_data[self->input_pos++];
|
uint8_t ch = self->input_data[self->input_pos++];
|
||||||
switch(self->parser_buf_pos) {
|
switch(self->parser_buf_pos) {
|
||||||
case 0:
|
case 0:
|
||||||
@@ -264,7 +282,7 @@ dispatch_esc_mode_byte(PS *self) {
|
|||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
REPORT_ERROR("%s0x%x", "Unknown char after ESC: ", ch);
|
REPORT_ERROR("%s0x%x", "Unknown char after ESC: ", ch);
|
||||||
SET_STATE(0); break;
|
SET_STATE(VTE_NORMAL); break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
@@ -311,7 +329,7 @@ dispatch_esc_mode_byte(PS *self) {
|
|||||||
default:
|
default:
|
||||||
REPORT_ERROR("Unhandled charset related escape code: 0x%x 0x%x", self->parser_buf[0], ch); break;
|
REPORT_ERROR("Unhandled charset related escape code: 0x%x 0x%x", self->parser_buf[0], ch); break;
|
||||||
}
|
}
|
||||||
SET_STATE(0);
|
SET_STATE(VTE_NORMAL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#undef CALL_ED
|
#undef CALL_ED
|
||||||
@@ -530,7 +548,7 @@ END_ALLOW_CASE_RANGE
|
|||||||
if (self->parser_buf_pos > 0 && self->parser_buf[self->parser_buf_pos-1] == ESC) {
|
if (self->parser_buf_pos > 0 && self->parser_buf[self->parser_buf_pos-1] == ESC) {
|
||||||
if (ch == '\\') { self->parser_buf_pos--; return true; }
|
if (ch == '\\') { self->parser_buf_pos--; return true; }
|
||||||
REPORT_ERROR("DCS sequence contained ESC without trailing \\ at pos: %u ignoring the sequence", self->parser_buf_pos);
|
REPORT_ERROR("DCS sequence contained ESC without trailing \\ at pos: %u ignoring the sequence", self->parser_buf_pos);
|
||||||
SET_STATE(ESC); return false;
|
SET_STATE(VTE_ESC); return false;
|
||||||
}
|
}
|
||||||
if (self->parser_buf_pos >= PARSER_BUF_SZ - 1) {
|
if (self->parser_buf_pos >= PARSER_BUF_SZ - 1) {
|
||||||
REPORT_ERROR("DCS sequence too long, truncating.");
|
REPORT_ERROR("DCS sequence too long, truncating.");
|
||||||
@@ -660,7 +678,7 @@ accumulate_csi(PS *self) {
|
|||||||
#define ENSURE_SPACE \
|
#define ENSURE_SPACE \
|
||||||
if (self->parser_buf_pos > PARSER_BUF_SZ - 1) { \
|
if (self->parser_buf_pos > PARSER_BUF_SZ - 1) { \
|
||||||
REPORT_ERROR("CSI sequence too long, ignoring"); \
|
REPORT_ERROR("CSI sequence too long, ignoring"); \
|
||||||
SET_STATE(0); \
|
SET_STATE(VTE_NORMAL); \
|
||||||
return false; \
|
return false; \
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -679,7 +697,7 @@ accumulate_csi(PS *self) {
|
|||||||
case '=':
|
case '=':
|
||||||
if (self->parser_buf_pos != 0) {
|
if (self->parser_buf_pos != 0) {
|
||||||
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
||||||
SET_STATE(0);
|
SET_STATE(VTE_NORMAL);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ENSURE_SPACE;
|
ENSURE_SPACE;
|
||||||
@@ -711,11 +729,11 @@ END_ALLOW_CASE_RANGE
|
|||||||
break;
|
break;
|
||||||
case NUL:
|
case NUL:
|
||||||
case DEL:
|
case DEL:
|
||||||
SET_STATE(0);
|
SET_STATE(VTE_NORMAL);
|
||||||
break; // no-op
|
break; // no-op
|
||||||
default:
|
default:
|
||||||
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
REPORT_ERROR("Invalid character in CSI: 0x%x, ignoring the sequence", ch);
|
||||||
SET_STATE(0);
|
SET_STATE(VTE_NORMAL);
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -1319,26 +1337,26 @@ accumulate_oth(PS *self) {
|
|||||||
dispatch##_esc_mode_byte(self); \
|
dispatch##_esc_mode_byte(self); \
|
||||||
break; \
|
break; \
|
||||||
case VTE_CSI: \
|
case VTE_CSI: \
|
||||||
if (accumulate_csi(self)) { dispatch##_csi(self); SET_STATE(0); watch_for_pending; } \
|
if (accumulate_csi(self)) { dispatch##_csi(self); SET_STATE(VTE_NORMAL); watch_for_pending; } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_OSC: \
|
case VTE_OSC: \
|
||||||
{ \
|
{ \
|
||||||
if (accumulate_osc(self)) { \
|
if (accumulate_osc(self)) { \
|
||||||
dispatch##_osc(self); \
|
dispatch##_osc(self); \
|
||||||
if (self->extended_osc_code) { \
|
if (self->extended_osc_code) { \
|
||||||
if (accumulate_osc(self)) { dispatch##_osc(self); SET_STATE(0); } \
|
if (accumulate_osc(self)) { dispatch##_osc(self); SET_STATE(VTE_NORMAL); } \
|
||||||
} else { SET_STATE(0); } \
|
} else { SET_STATE(VTE_NORMAL); } \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
break; \
|
break; \
|
||||||
case VTE_APC: \
|
case VTE_APC: \
|
||||||
if (accumulate_oth(self)) { dispatch##_apc(self); SET_STATE(0); } \
|
if (accumulate_oth(self)) { dispatch##_apc(self); SET_STATE(VTE_NORMAL); } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_PM: \
|
case VTE_PM: \
|
||||||
if (accumulate_oth(self)) { dispatch##_pm(self); SET_STATE(0); } \
|
if (accumulate_oth(self)) { dispatch##_pm(self); SET_STATE(VTE_NORMAL); } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_DCS: \
|
case VTE_DCS: \
|
||||||
if (accumulate_dcs(self)) { dispatch##_dcs(self); SET_STATE(0); watch_for_pending; } \
|
if (accumulate_dcs(self)) { dispatch##_dcs(self); SET_STATE(VTE_NORMAL); watch_for_pending; } \
|
||||||
if (self->vte_state == ESC) { self->input_pos--; dispatch##_esc_mode_byte(self); } \
|
if (self->vte_state == ESC) { self->input_pos--; dispatch##_esc_mode_byte(self); } \
|
||||||
break; \
|
break; \
|
||||||
case VTE_NORMAL: \
|
case VTE_NORMAL: \
|
||||||
@@ -1606,10 +1624,12 @@ free_vt_parser(Parser* self) {
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
reset(PS *self) {
|
reset(PS *self) {
|
||||||
self->parser_buf_pos = 0;
|
|
||||||
self->extended_osc_code = false;
|
|
||||||
self->vte_state = VTE_NORMAL;
|
self->vte_state = VTE_NORMAL;
|
||||||
|
self->utf8_state = UTF8_ACCEPT;
|
||||||
|
self->parser_buf_pos = 0;
|
||||||
|
|
||||||
self->pending_mode.activated_at = 0;
|
self->pending_mode.activated_at = 0;
|
||||||
|
self->pending_mode.stop_escape_code_type = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from base64 import standard_b64encode
|
|||||||
from binascii import hexlify
|
from binascii import hexlify
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from kitty.fast_data_types import CURSOR_BLOCK, base64_decode, base64_encode, parse_bytes, parse_bytes_dump
|
from kitty.fast_data_types import CURSOR_BLOCK, base64_decode, base64_encode
|
||||||
from kitty.notify import NotificationCommand, handle_notification_cmd, notification_activated, reset_registry
|
from kitty.notify import NotificationCommand, handle_notification_cmd, notification_activated, reset_registry
|
||||||
|
|
||||||
from . import BaseTest
|
from . import BaseTest
|
||||||
@@ -25,7 +25,7 @@ class TestParser(BaseTest):
|
|||||||
if isinstance(x, str):
|
if isinstance(x, str):
|
||||||
x = x.encode('utf-8')
|
x = x.encode('utf-8')
|
||||||
cmds = tuple(('draw', x) if isinstance(x, str) else x for x in cmds)
|
cmds = tuple(('draw', x) if isinstance(x, str) else x for x in cmds)
|
||||||
parse_bytes_dump(cd, s, x)
|
s.vt_parser.parse_bytes(s, x, cd)
|
||||||
current = ''
|
current = ''
|
||||||
q = []
|
q = []
|
||||||
for args in cd:
|
for args in cd:
|
||||||
@@ -65,7 +65,7 @@ class TestParser(BaseTest):
|
|||||||
self.ae(str(s.line(1)), '6')
|
self.ae(str(s.line(1)), '6')
|
||||||
self.ae(str(s.line(2)), ' 123')
|
self.ae(str(s.line(2)), ' 123')
|
||||||
self.ae(str(s.line(3)), '45')
|
self.ae(str(s.line(3)), '45')
|
||||||
parse_bytes(s, b'\rabcde')
|
s.vt_parser.parse_bytes(s, b'\rabcde')
|
||||||
self.ae(str(s.line(3)), 'abcde')
|
self.ae(str(s.line(3)), 'abcde')
|
||||||
pb('\rßxyz1', ('screen_carriage_return',), 'ßxyz1')
|
pb('\rßxyz1', ('screen_carriage_return',), 'ßxyz1')
|
||||||
self.ae(str(s.line(3)), 'ßxyz1')
|
self.ae(str(s.line(3)), 'ßxyz1')
|
||||||
@@ -331,7 +331,7 @@ class TestParser(BaseTest):
|
|||||||
for sgr in '0;34;102;1;2;3;4 0;38:5:200;58:2:10:11:12'.split():
|
for sgr in '0;34;102;1;2;3;4 0;38:5:200;58:2:10:11:12'.split():
|
||||||
expected = set(sgr.split(';')) - {'0'}
|
expected = set(sgr.split(';')) - {'0'}
|
||||||
c.clear()
|
c.clear()
|
||||||
parse_bytes(s, f'\033[{sgr}m\033P$qm\033\\'.encode('ascii'))
|
s.vte_parser.parse_bytes(s, f'\033[{sgr}m\033P$qm\033\\'.encode('ascii'))
|
||||||
r = c.wtcbuf.decode('ascii').partition('r')[2].partition('m')[0]
|
r = c.wtcbuf.decode('ascii').partition('r')[2].partition('m')[0]
|
||||||
self.ae(expected, set(r.split(';')))
|
self.ae(expected, set(r.split(';')))
|
||||||
c.clear()
|
c.clear()
|
||||||
|
|||||||
Reference in New Issue
Block a user