From e3d6aa2c60e7ef3b8e5db8129c1b7edeacac936c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 10 Nov 2023 11:53:36 +0530 Subject: [PATCH] Use simd in a few loops --- kitty/simd-string.c | 42 +++++++++++++++++++++++++++++++++++++----- kitty/simd-string.h | 4 ++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/kitty/simd-string.c b/kitty/simd-string.c index 663e388c1..182a0517e 100644 --- a/kitty/simd-string.c +++ b/kitty/simd-string.c @@ -61,9 +61,9 @@ byte_loader_skip(ByteLoader *self) { #define prepare_for_hasvalue(n) (~0ULL/255 * (n)) #define hasvalue(x,n) (haszero((x) ^ (n))) -static uint8_t* -find_either_of_two_chars_simple(uint8_t *haystack, const size_t sz, const uint8_t x, const uint8_t y) { - ByteLoader it; byte_loader_init(&it, haystack, sz); +static const char* +find_either_of_two_bytes_simple(const char *haystack, const size_t sz, const uint8_t x, const uint8_t y) { + ByteLoader it; byte_loader_init(&it, (uint8_t*)haystack, sz); // first align by testing the first few bytes one at a time while (it.num_left && it.digits_left < sizeof(BYTE_LOADER_T)) { @@ -74,7 +74,7 @@ find_either_of_two_chars_simple(uint8_t *haystack, const size_t sz, const uint8_ const BYTE_LOADER_T a = prepare_for_hasvalue(x), b = prepare_for_hasvalue(y); while (it.num_left) { if (hasvalue(it.m, a) || hasvalue(it.m, b)) { - uint8_t *ans = haystack + sz - it.num_left, q = hasvalue(it.m, a) ? x : y; + const char *ans = haystack + sz - it.num_left, q = hasvalue(it.m, a) ? x : y; while (it.num_left) { if (byte_loader_next(&it) == q) return ans; ans++; @@ -87,7 +87,39 @@ find_either_of_two_chars_simple(uint8_t *haystack, const size_t sz, const uint8_ } #undef SHIFT_OP +static const char* +find_either_of_two_bytes_simd_impl(const char *haystack, const char* needle_, size_t sz) { + size_t extra = (uintptr_t)haystack % sizeof(__m128i); + if (extra) { // need aligned loads for performance so search first few bytes by hand + const char *ans = find_either_of_two_bytes_simple(haystack, MIN(sz, extra), needle_[0], needle_[1]); + if (ans) return ans; + extra = MIN(extra, sz); + sz -= extra; + haystack += extra; + if (!sz) return NULL; + } + const __m128i needle = _mm_load_si128((const __m128i *)needle_); + for (const char* limit = haystack + sz; haystack < limit; haystack += 16) { + const __m128i h = _mm_load_si128((const __m128i *)haystack); + int c = _mm_cmpistri(needle, h, _SIDD_CMP_EQUAL_ANY); + if (c != 16) { + return haystack + c; + } + } + return NULL; +} + +static uint8_t* +find_either_of_two_bytes_simd(uint8_t *haystack, const size_t sz, const uint8_t x, const uint8_t y) { + uint8_t before = haystack[sz]; + haystack[sz] = 0; + char needle[16] = {x, y, 0,0,0,0,0,0,0,0,0,0,0,0,0,0}; + uint8_t *ans = (uint8_t*)find_either_of_two_bytes_simd_impl((char*)haystack, needle, sz); + haystack[sz] = before; + return ans; +} + uint8_t* find_either_of_two_bytes(uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b) { - return find_either_of_two_chars_simple(haystack, sz, a, b); + return find_either_of_two_bytes_simd(haystack, sz, a, b); } diff --git a/kitty/simd-string.h b/kitty/simd-string.h index 717494c12..74f373f51 100644 --- a/kitty/simd-string.h +++ b/kitty/simd-string.h @@ -20,4 +20,8 @@ typedef struct ByteLoader { uint8_t byte_loader_peek(const ByteLoader *self); void byte_loader_init(ByteLoader *self, const uint8_t *buf, unsigned int sz); uint8_t byte_loader_next(ByteLoader *self); + +// Requires haystack[sz] to be writable and 7 bytes to the left of haystack to +// be readable. Returns pointer to first position in haystack that contains +// either of the two chars or NULL if not found. uint8_t* find_either_of_two_bytes(uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b);