mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-11 19:19:35 +02:00
Use simd in a few loops
This commit is contained in:
@@ -61,9 +61,9 @@ byte_loader_skip(ByteLoader *self) {
|
||||
#define prepare_for_hasvalue(n) (~0ULL/255 * (n))
|
||||
#define hasvalue(x,n) (haszero((x) ^ (n)))
|
||||
|
||||
static uint8_t*
|
||||
find_either_of_two_chars_simple(uint8_t *haystack, const size_t sz, const uint8_t x, const uint8_t y) {
|
||||
ByteLoader it; byte_loader_init(&it, haystack, sz);
|
||||
static const char*
|
||||
find_either_of_two_bytes_simple(const char *haystack, const size_t sz, const uint8_t x, const uint8_t y) {
|
||||
ByteLoader it; byte_loader_init(&it, (uint8_t*)haystack, sz);
|
||||
|
||||
// first align by testing the first few bytes one at a time
|
||||
while (it.num_left && it.digits_left < sizeof(BYTE_LOADER_T)) {
|
||||
@@ -74,7 +74,7 @@ find_either_of_two_chars_simple(uint8_t *haystack, const size_t sz, const uint8_
|
||||
const BYTE_LOADER_T a = prepare_for_hasvalue(x), b = prepare_for_hasvalue(y);
|
||||
while (it.num_left) {
|
||||
if (hasvalue(it.m, a) || hasvalue(it.m, b)) {
|
||||
uint8_t *ans = haystack + sz - it.num_left, q = hasvalue(it.m, a) ? x : y;
|
||||
const char *ans = haystack + sz - it.num_left, q = hasvalue(it.m, a) ? x : y;
|
||||
while (it.num_left) {
|
||||
if (byte_loader_next(&it) == q) return ans;
|
||||
ans++;
|
||||
@@ -87,7 +87,39 @@ find_either_of_two_chars_simple(uint8_t *haystack, const size_t sz, const uint8_
|
||||
}
|
||||
#undef SHIFT_OP
|
||||
|
||||
static const char*
|
||||
find_either_of_two_bytes_simd_impl(const char *haystack, const char* needle_, size_t sz) {
|
||||
size_t extra = (uintptr_t)haystack % sizeof(__m128i);
|
||||
if (extra) { // need aligned loads for performance so search first few bytes by hand
|
||||
const char *ans = find_either_of_two_bytes_simple(haystack, MIN(sz, extra), needle_[0], needle_[1]);
|
||||
if (ans) return ans;
|
||||
extra = MIN(extra, sz);
|
||||
sz -= extra;
|
||||
haystack += extra;
|
||||
if (!sz) return NULL;
|
||||
}
|
||||
const __m128i needle = _mm_load_si128((const __m128i *)needle_);
|
||||
for (const char* limit = haystack + sz; haystack < limit; haystack += 16) {
|
||||
const __m128i h = _mm_load_si128((const __m128i *)haystack);
|
||||
int c = _mm_cmpistri(needle, h, _SIDD_CMP_EQUAL_ANY);
|
||||
if (c != 16) {
|
||||
return haystack + c;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static uint8_t*
|
||||
find_either_of_two_bytes_simd(uint8_t *haystack, const size_t sz, const uint8_t x, const uint8_t y) {
|
||||
uint8_t before = haystack[sz];
|
||||
haystack[sz] = 0;
|
||||
char needle[16] = {x, y, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
|
||||
uint8_t *ans = (uint8_t*)find_either_of_two_bytes_simd_impl((char*)haystack, needle, sz);
|
||||
haystack[sz] = before;
|
||||
return ans;
|
||||
}
|
||||
|
||||
uint8_t*
|
||||
find_either_of_two_bytes(uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b) {
|
||||
return find_either_of_two_chars_simple(haystack, sz, a, b);
|
||||
return find_either_of_two_bytes_simd(haystack, sz, a, b);
|
||||
}
|
||||
|
||||
@@ -20,4 +20,8 @@ typedef struct ByteLoader {
|
||||
uint8_t byte_loader_peek(const ByteLoader *self);
|
||||
void byte_loader_init(ByteLoader *self, const uint8_t *buf, unsigned int sz);
|
||||
uint8_t byte_loader_next(ByteLoader *self);
|
||||
|
||||
// Requires haystack[sz] to be writable and 7 bytes to the left of haystack to
|
||||
// be readable. Returns pointer to first position in haystack that contains
|
||||
// either of the two chars or NULL if not found.
|
||||
uint8_t* find_either_of_two_bytes(uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b);
|
||||
|
||||
Reference in New Issue
Block a user