From 8aa1b112b834e96c756913418f2bcd91043b422f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 27 Jan 2024 12:09:16 +0530 Subject: [PATCH] Turns out the simde implementation of movemask is not slow enough to compensate for the speed bump from 256 bit --- kitty/simd-string-impl.h | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/kitty/simd-string-impl.h b/kitty/simd-string-impl.h index 52142043f..4a0fc3181 100644 --- a/kitty/simd-string-impl.h +++ b/kitty/simd-string-impl.h @@ -83,12 +83,6 @@ typedef int32_t find_mask_t; static inline int FUNC(is_zero)(const integer_t a) { return simde_mm_testz_si128(a, a); } -static inline find_mask_t -mask_for_find(const integer_t a) { return movemask_epi8(a); } - -static inline unsigned -bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); } - #else #define set1_epi8(x) simde_mm256_set1_epi8((char)(x)) @@ -116,12 +110,6 @@ bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); } static inline int FUNC(is_zero)(const integer_t a) { return simde_mm256_testz_si256(a, a); } -static inline find_mask_t -mask_for_find(const integer_t a) { return movemask_epi8(a); } - -static inline unsigned -bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); } - static inline integer_t shift_right_by_one_byte(const integer_t A) { return simde_mm256_alignr_epi8(A, simde_mm256_permute2x128_si256(A, A, _MM_SHUFFLE(0, 0, 2, 0)), 16 - 1); @@ -214,6 +202,13 @@ static inline integer_t shuffle_impl256(const integer_t value, const integer_t s #define debug(...) #endif +static inline find_mask_t +mask_for_find(const integer_t a) { return movemask_epi8(a); } + +static inline unsigned +bytes_to_first_match(const find_mask_t m) { return __builtin_ctz(m); } + + // }}} static inline integer_t