mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-12 11:39:33 +02:00
Turns out the simde implementation of movemask is not slow enough to compensate for the speed bump from 256 bit
This commit is contained in:
@@ -83,12 +83,6 @@ typedef int32_t find_mask_t;
|
||||
static inline int
|
||||
FUNC(is_zero)(const integer_t a) { return simde_mm_testz_si128(a, a); }
|
||||
|
||||
static inline find_mask_t
|
||||
mask_for_find(const integer_t a) { return movemask_epi8(a); }
|
||||
|
||||
static inline unsigned
|
||||
bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); }
|
||||
|
||||
#else
|
||||
|
||||
#define set1_epi8(x) simde_mm256_set1_epi8((char)(x))
|
||||
@@ -116,12 +110,6 @@ bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); }
|
||||
static inline int
|
||||
FUNC(is_zero)(const integer_t a) { return simde_mm256_testz_si256(a, a); }
|
||||
|
||||
static inline find_mask_t
|
||||
mask_for_find(const integer_t a) { return movemask_epi8(a); }
|
||||
|
||||
static inline unsigned
|
||||
bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); }
|
||||
|
||||
static inline integer_t
|
||||
shift_right_by_one_byte(const integer_t A) {
|
||||
return simde_mm256_alignr_epi8(A, simde_mm256_permute2x128_si256(A, A, _MM_SHUFFLE(0, 0, 2, 0)), 16 - 1);
|
||||
@@ -214,6 +202,13 @@ static inline integer_t shuffle_impl256(const integer_t value, const integer_t s
|
||||
#define debug(...)
|
||||
#endif
|
||||
|
||||
static inline find_mask_t
|
||||
mask_for_find(const integer_t a) { return movemask_epi8(a); }
|
||||
|
||||
static inline unsigned
|
||||
bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); }
|
||||
|
||||
|
||||
// }}}
|
||||
|
||||
static inline integer_t
|
||||
|
||||
Reference in New Issue
Block a user