From 65aca5b140157654ee7bd199d96122a63c0e2f4b Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 8 Nov 2023 19:16:35 +0530 Subject: [PATCH] Speedup utoi by loading numbers in 8 byte chunks --- kitty/vt-parser.c | 66 ++++++++++++++++++++++++++++++++++--------- kitty_tests/parser.py | 3 +- 2 files changed, 54 insertions(+), 15 deletions(-) diff --git a/kitty/vt-parser.c b/kitty/vt-parser.c index ea755dadd..a6bc57b65 100644 --- a/kitty/vt-parser.c +++ b/kitty/vt-parser.c @@ -132,22 +132,60 @@ static const uint64_t pow_10_array[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 10000000000 }; +typedef struct byte_loader { + uint64_t m; + unsigned sz_of_next_load, digits_left, num_left; + const uint8_t *next_load_at; +} byte_loader; + +static void +byte_loader_init(byte_loader *self, const uint8_t *buf, const unsigned int sz) { + size_t s = MIN(sz, sizeof(self->m)); + self->next_load_at = buf + s; + self->num_left = sz; + self->digits_left = sizeof(self->m); + memcpy(&self->m, buf, sizeof(self->m)); + self->sz_of_next_load = sz - s; +} + + +static uint8_t +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +byte_loader_peek(const byte_loader *self) { return self->m & 0xff; } +#define SHIFT_OP >>= +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +// no idea if this correct needs testing +#define SHIFT_OP <<= +byte_loader_peek(const byte_loader *self) { return (self->m >> ((sizeof(self->m) - 1)*8)) & 0xff; } +#else +#error "Unsupported endianness" +#endif + +static uint8_t +byte_loader_next(byte_loader *self) { + uint8_t ans = byte_loader_peek(self); + self->num_left--; self->digits_left--; self->m SHIFT_OP 8; + if (!self->digits_left) byte_loader_init(self, self->next_load_at, self->sz_of_next_load); + return ans; +} + static int64_t -utoi(const uint8_t *buf, unsigned int sz) { +utoi(const uint8_t *buf, const unsigned int sz) { int64_t ans = 0; - const uint8_t *p = buf; int mult = 1; - if (sz && *p == '-') { - mult = -1; p++; sz--; - } - // Ignore leading zeros - while(sz > 0) { - if (*p == '0') { p++; sz--; } - else break; - } - if (sz < sizeof(pow_10_array)/sizeof(pow_10_array[0])) { - for (int i = sz-1, j=0; i >= 0; i--, j++) { - ans += (p[i] - '0') * pow_10_array[j]; + if (LIKELY(sz > 0)) { + byte_loader b; + byte_loader_init(&b, buf, sz); + uint8_t digit = byte_loader_peek(&b); + if (digit == '-') { mult = -1; byte_loader_next(&b); } + while (b.num_left) { // ignore leading zeros + digit = byte_loader_peek(&b); + if (digit != '0') break; + byte_loader_next(&b); + } + while (b.num_left) { + digit = byte_loader_next(&b); + ans += (digit - '0') * *(pow_10_array + b.num_left); } } return ans * mult; @@ -211,7 +249,7 @@ typedef struct PS { // The buffer struct { size_t consumed, pos, sz; } read; struct { size_t offset, sz; } write; - uint8_t buf[BUF_SZ]; + uint8_t buf[BUF_SZ + 64]; // The extra bytes are so loads of large integers such as for AVX 512 dont read past the end of the buffer } PS; static void diff --git a/kitty_tests/parser.py b/kitty_tests/parser.py index a5d3b5948..86352a790 100644 --- a/kitty_tests/parser.py +++ b/kitty_tests/parser.py @@ -213,7 +213,7 @@ class TestParser(BaseTest): self.ae(str(s.line(0)), 'xy bc') pb('x\033[2;7@y', 'x', ('CSI code @ has 2 > 1 parameters',), 'y') pb('x\033[2;-7@y', 'x', ('CSI code @ has 2 > 1 parameters',), 'y') - pb('x\033[-2@y', 'x', ('CSI code @ is not allowed to have negative parameter (-2)',), 'y') + pb('x\033[-0001234567890@y', 'x', ('CSI code @ is not allowed to have negative parameter (-1234567890)',), 'y') pb('x\033[2-3@y', 'x', ('Invalid character in CSI: 3 (0x33), ignoring the sequence',), '@y') pb('x\033[@y', 'x', ('screen_insert_characters', 1), 'y') pb('x\033[345@y', 'x', ('screen_insert_characters', 345), 'y') @@ -226,6 +226,7 @@ class TestParser(BaseTest): pb('\033[3;2;H', ('screen_cursor_position', 3, 2)) pb('\033[00000000003;0000000000000002H', ('screen_cursor_position', 3, 2)) self.ae(s.cursor.x, 1), self.ae(s.cursor.y, 2) + pb('\033[0001234567890H', ('screen_cursor_position', 1234567890, 1)) pb('\033[J', ('screen_erase_in_display', 0, 0)) pb('\033[?J', ('screen_erase_in_display', 0, 1)) pb('\033[?2J', ('screen_erase_in_display', 2, 1))