diff --git a/kitty/simd-string-impl.h b/kitty/simd-string-impl.h index 44e99588d..ee1184a64 100644 --- a/kitty/simd-string-impl.h +++ b/kitty/simd-string-impl.h @@ -8,6 +8,8 @@ #define BITS 128 #endif +#include "simd-string.h" + #ifdef __clang__ _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wbitwise-instead-of-logical\"") #endif @@ -17,17 +19,18 @@ _Pragma("clang diagnostic pop") #endif +#define CONCAT(A, B) A##B +#define CONCAT_EXPAND(A, B) CONCAT(A,B) +#define FUNC(name) CONCAT_EXPAND(name##_, BITS) +#define integer_t CONCAT_EXPAND(CONCAT_EXPAND(__m, BITS), i) + #if BITS == 128 -#define FUNC(name) name##_##128 -#define integer_t __m128i #define set1_epi8 simde_mm_set1_epi8 #define load_unaligned simde_mm_loadu_si128 #define cmpeq_epi8 simde_mm_cmpeq_epi8 #define or_si simde_mm_or_si128 #define movemask_epi8 simde_mm_movemask_epi8 #else -#define FUNC(name) name##_##256 -#define integer_t __m256i #define set1_epi8 simde_mm256_set1_epi8 #define load_unaligned simde_mm256_loadu_si256 #define cmpeq_epi8 simde_mm256_cmpeq_epi8 @@ -53,6 +56,12 @@ FUNC(find_either_of_two_bytes)(const uint8_t *haystack, const size_t sz, const u return NULL; } +static inline unsigned +FUNC(utf8_decode_to_sentinel)(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) { + (void)d; (void)src; (void)src_sz; (void)sentinel; + return 0; +} + #undef FUNC #undef integer_t @@ -61,3 +70,5 @@ FUNC(find_either_of_two_bytes)(const uint8_t *haystack, const size_t sz, const u #undef cmpeq_epi8 #undef or_si #undef movemask_epi8 +#undef CONCAT +#undef CONCAT_EXPAND diff --git a/kitty/simd-string.c b/kitty/simd-string.c index b7997ba11..1dfb0c878 100644 --- a/kitty/simd-string.c +++ b/kitty/simd-string.c @@ -66,12 +66,6 @@ utf8_decode_to_sentinel_scalar(UTF8Decoder *d, const uint8_t *src, const size_t return num_consumed; } -static unsigned -utf8_decode_to_sentinel_sse4_2(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) { - (void)d; (void)src; (void)src_sz; (void)sentinel; - return 0; -} - static unsigned (*utf8_decode_to_sentinel_impl)(UTF8Decoder *d, const uint8_t *src, const size_t src_sz, const uint8_t sentinel) = utf8_decode_to_sentinel_scalar; unsigned @@ -116,7 +110,9 @@ test_utf8_decode_to_sentinel(PyObject *self UNUSED, PyObject *args) { case 1: consumed = utf8_decode_to_sentinel_scalar(&d, src, src_sz, sentinel); break; case 2: - consumed = utf8_decode_to_sentinel_sse4_2(&d, src, src_sz, sentinel); break; + consumed = utf8_decode_to_sentinel_128(&d, src, src_sz, sentinel); break; + case 3: + consumed = utf8_decode_to_sentinel_256(&d, src, src_sz, sentinel); break; default: consumed = utf8_decode_to_sentinel(&d, src, src_sz, sentinel); break; } @@ -153,13 +149,14 @@ init_simd(void *x) { if (has_avx2) { A(has_avx2, True); find_either_of_two_bytes_impl = find_either_of_two_bytes_256; + /* utf8_decode_to_sentinel_impl = utf8_decode_to_sentinel_256; */ } else { A(has_avx2, False); } if (has_sse4_2) { A(has_sse4_2, True); if (find_either_of_two_bytes_impl == find_either_of_two_bytes_scalar) find_either_of_two_bytes_impl = find_either_of_two_bytes_128; - /* if (utf8_decode_to_sentinel_impl == utf8_decode_to_sentinel_scalar) utf8_decode_to_sentinel_impl = utf8_decode_to_sentinel_sse4_2; */ + /* if (utf8_decode_to_sentinel_impl == utf8_decode_to_sentinel_scalar) utf8_decode_to_sentinel_impl = utf8_decode_to_sentinel_128; */ } else { A(has_sse4_2, False); }