From fd4c8e1e2d983097155c6e9715c0e2a45421957c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 17 Nov 2023 13:16:24 +0530 Subject: [PATCH] Get rid of ByteLoader Doesnt move the benchmarks --- kitty/simd-string.c | 96 ++++----------------------------------------- 1 file changed, 7 insertions(+), 89 deletions(-) diff --git a/kitty/simd-string.c b/kitty/simd-string.c index 1707de0ef..beb8d9d9e 100644 --- a/kitty/simd-string.c +++ b/kitty/simd-string.c @@ -19,93 +19,11 @@ _Pragma("clang diagnostic pop") static bool has_sse4_2 = false, has_avx2 = false; -// ByteLoader {{{ -#define BYTE_LOADER_T unsigned long long -typedef struct ByteLoader { - BYTE_LOADER_T m; - unsigned sz_of_next_load, digits_left, num_left; - const uint8_t *next_load_at; -} ByteLoader; -uint8_t byte_loader_peek(const ByteLoader *self); -void byte_loader_init(ByteLoader *self, const uint8_t *buf, unsigned int sz); -uint8_t byte_loader_next(ByteLoader *self); - - -uint8_t -byte_loader_peek(const ByteLoader *self) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return self->m & 0xff; -#define SHIFT_OP >> -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - // no idea if this is correct needs testing - return (self->m >> ((sizeof(self->m) - 1)*8)) & 0xff; -#define SHIFT_OP << -#else -#error "Unsupported endianness" -#endif -} - -void -byte_loader_init(ByteLoader *self, const uint8_t *buf, unsigned int sz) { - size_t extra = ((uintptr_t)buf) % sizeof(BYTE_LOADER_T); - if (extra) { // align loading - buf -= extra; sz += extra; - } - size_t s = MIN(sz, sizeof(self->m)); - self->next_load_at = buf + s; - self->num_left = sz - extra; - self->digits_left = sizeof(self->m) - extra; - self->m = (*((BYTE_LOADER_T*)buf)) SHIFT_OP (8 * extra); - self->sz_of_next_load = sz - s; -} - -uint8_t -byte_loader_next(ByteLoader *self) { - uint8_t ans = byte_loader_peek(self); - self->num_left--; self->digits_left--; self->m = self->m SHIFT_OP 8; - if (!self->digits_left) byte_loader_init(self, self->next_load_at, self->sz_of_next_load); - return ans; -} - -static void -byte_loader_skip(ByteLoader *self) { - if (self->num_left >= sizeof(BYTE_LOADER_T)) { - self->m = *(BYTE_LOADER_T*)self->next_load_at; - self->num_left -= sizeof(BYTE_LOADER_T); - self->digits_left = sizeof(BYTE_LOADER_T); - self->next_load_at += sizeof(BYTE_LOADER_T); - } else { - self->num_left = 0; - } -} -// }}} - // find_either_of_two_bytes {{{ -#define haszero(v) (((v) - 0x0101010101010101ULL) & ~(v) & 0x8080808080808080ULL) -#define prepare_for_hasvalue(n) (~0ULL/255 * (n)) -#define hasvalue(x,n) (haszero((x) ^ (n))) - static const uint8_t* -find_either_of_two_bytes_simple(const uint8_t *haystack, const size_t sz, const uint8_t x, const uint8_t y) { - ByteLoader it; byte_loader_init(&it, (uint8_t*)haystack, sz); - - // first align by testing the first few bytes one at a time - while (it.num_left && it.digits_left < sizeof(BYTE_LOADER_T)) { - const uint8_t ch = byte_loader_next(&it); - if (ch == x || ch == y) return haystack + sz - it.num_left - 1; - } - - const BYTE_LOADER_T a = prepare_for_hasvalue(x), b = prepare_for_hasvalue(y); - while (it.num_left) { - if (hasvalue(it.m, a) || hasvalue(it.m, b)) { - const uint8_t *ans = haystack + sz - it.num_left, q = hasvalue(it.m, a) ? x : y; - while (it.num_left) { - if (byte_loader_next(&it) == q) return ans; - ans++; - } - return NULL; // happens for final word and it.num_left < sizeof(BYTE_LOADER_T) - } - byte_loader_skip(&it); +find_either_of_two_bytes_scalar(const uint8_t *haystack, const size_t sz, const uint8_t x, const uint8_t y) { + for (const uint8_t *limit = haystack + sz; haystack < limit; haystack++) { + if (*haystack == x || *haystack == y) return haystack; } return NULL; } @@ -151,17 +69,17 @@ find_either_of_two_bytes_simple(const uint8_t *haystack, const size_t sz, const static const uint8_t* find_either_of_two_bytes_sse4_2(const uint8_t *haystack, size_t sz, const uint8_t a, const uint8_t b) { - either_of_two(128, find_either_of_two_bytes_simple(haystack, es, a, b)); + either_of_two(128, find_either_of_two_bytes_scalar(haystack, es, a, b)); } static const uint8_t* find_either_of_two_bytes_avx2(const uint8_t *haystack, size_t sz, const uint8_t a, const uint8_t b) { - either_of_two(256, (has_sse4_2 && es > 15) ? find_either_of_two_bytes_sse4_2(haystack, es, a, b) : find_either_of_two_bytes_simple(haystack, es, a, b)); + either_of_two(256, (has_sse4_2 && es > 15) ? find_either_of_two_bytes_sse4_2(haystack, es, a, b) : find_either_of_two_bytes_scalar(haystack, es, a, b)); } -static const uint8_t* (*find_either_of_two_bytes_impl)(const uint8_t*, const size_t, const uint8_t, const uint8_t) = find_either_of_two_bytes_simple; +static const uint8_t* (*find_either_of_two_bytes_impl)(const uint8_t*, const size_t, const uint8_t, const uint8_t) = find_either_of_two_bytes_scalar; const uint8_t* find_either_of_two_bytes(const uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b) { @@ -237,7 +155,7 @@ init_simd(void *x) { } if (has_sse4_2) { A(has_sse4_2, True); - if (find_either_of_two_bytes_impl == find_either_of_two_bytes_simple) find_either_of_two_bytes_impl = find_either_of_two_bytes_sse4_2; + if (find_either_of_two_bytes_impl == find_either_of_two_bytes_scalar) find_either_of_two_bytes_impl = find_either_of_two_bytes_sse4_2; } else { A(has_sse4_2, False); }