diff --git a/kitty/simd-string-impl.h b/kitty/simd-string-impl.h index 4547a2542..f499181cc 100644 --- a/kitty/simd-string-impl.h +++ b/kitty/simd-string-impl.h @@ -249,9 +249,8 @@ bytes_to_first_match(const integer_t vec) { // }}} static inline integer_t -FUNC(zero_last_n_bytes)(integer_t vec, char n) { +FUNC(zero_last_n_bytes)(integer_t vec, const integer_t index, char n) { const integer_t threshold = set1_epi8(n); - const integer_t index = reverse_numbered_bytes(); const integer_t mask = cmpgt_epi8(threshold, index); return andnot_si(mask, vec); } @@ -431,8 +430,8 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src_data, size_t src_len src_data += d->num_consumed; src_len -= d->num_consumed; } const integer_t esc_vec = set1_epi8(0x1b); - const integer_t zero = create_zero_integer(), one = set1_epi8(1), two = set1_epi8(2), three = set1_epi8(3), four = set1_epi8(4); - const integer_t vec_c2 = set1_epi8(0xc2), vec_e3 = set1_epi8(0xe3), vec_f4 = set1_epi8(0xf4); + const integer_t zero = create_zero_integer(), one = set1_epi8(1), two = set1_epi8(2), three = set1_epi8(3), numbered = numbered_bytes(); + const integer_t reverse_numbered = reverse_numbered_bytes(); const uint8_t *limit = src_data + src_len, *p = src_data, *start_of_current_chunk = src_data; bool sentinel_found = false; unsigned chunk_src_sz = 0; @@ -453,7 +452,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src_data, size_t src_len if (!chunk_src_sz) continue; } else d->num_consumed += chunk_src_sz; - if (chunk_src_sz < sizeof(integer_t)) vec = zero_last_n_bytes(vec, sizeof(integer_t) - chunk_src_sz); + if (chunk_src_sz < sizeof(integer_t)) vec = zero_last_n_bytes(vec, reverse_numbered, sizeof(integer_t) - chunk_src_sz); num_of_trailing_bytes = 0; bool check_for_trailing_bytes = !sentinel_found; @@ -489,12 +488,12 @@ start_classification: const integer_t vec_signed = add_epi8(vec, state); // needed because cmplt_epi8 works only on signed chars const integer_t bytes_indicating_start_of_two_byte_sequence = cmplt_epi8(set1_epi8(0xc0 - 1 - 0x80), vec_signed); - state = blendv_epi8(state, vec_c2, bytes_indicating_start_of_two_byte_sequence); + state = blendv_epi8(state, set1_epi8(0xc2), bytes_indicating_start_of_two_byte_sequence); // state now has 0xc2 on all bytes that start a 2 or more byte sequence and 0x80 on the rest const integer_t bytes_indicating_start_of_three_byte_sequence = cmplt_epi8(set1_epi8(0xe0 - 1 - 0x80), vec_signed); - state = blendv_epi8(state, vec_e3, bytes_indicating_start_of_three_byte_sequence); + state = blendv_epi8(state, set1_epi8(0xe3), bytes_indicating_start_of_three_byte_sequence); const integer_t bytes_indicating_start_of_four_byte_sequence = cmplt_epi8(set1_epi8(0xf0 - 1 - 0x80), vec_signed); - state = blendv_epi8(state, vec_f4, bytes_indicating_start_of_four_byte_sequence); + state = blendv_epi8(state, set1_epi8(0xf4), bytes_indicating_start_of_four_byte_sequence); // state now has 0xc2 on all bytes that start a 2 byte sequence, 0xe3 on start of 3-byte, 0xf4 on 4-byte start and 0x80 on rest debug_register(state); const integer_t mask = and_si(state, set1_epi8(0xf8)); // keep upper 5 bits of state @@ -510,7 +509,7 @@ start_classification: // counts now contains the number of bytes remaining in each utf-8 sequence of 2 or more bytes debug_register(counts); // check for an incomplete trailing utf8 sequence - if (check_for_trailing_bytes && !is_zero(cmplt_epi8(one, and_si(counts, cmpeq_epi8(numbered_bytes(), set1_epi8(chunk_src_sz - 1)))))) { + if (check_for_trailing_bytes && !is_zero(cmplt_epi8(one, and_si(counts, cmpeq_epi8(numbered, set1_epi8(chunk_src_sz - 1)))))) { // The value of counts at the last byte is > 1 indicating we have a trailing incomplete sequence check_for_trailing_bytes = false; if (start_of_current_chunk[chunk_src_sz-1] >= 0xc0) num_of_trailing_bytes = 1; // 2-, 3- and 4-byte characters with only 1 byte left @@ -519,7 +518,7 @@ start_classification: chunk_src_sz -= num_of_trailing_bytes; d->num_consumed -= num_of_trailing_bytes; if (!chunk_src_sz) { abort_with_invalid_utf8(); } - vec = zero_last_n_bytes(vec, sizeof(integer_t) - chunk_src_sz); + vec = zero_last_n_bytes(vec, reverse_numbered, sizeof(integer_t) - chunk_src_sz); goto start_classification; } // Only ASCII chars should have corresponding byte of counts == 0 @@ -564,7 +563,7 @@ start_classification: // The last byte is made up of bits 5 and 6 from count == 3 and 3 bits from count == 4 integer_t output3 = and_si(three, shift_right_by_bits32(vec, 4)); // bits 5 and 6 from count == 3 - const integer_t count4_locations = cmpeq_epi8(counts, four); + const integer_t count4_locations = cmpeq_epi8(counts, set1_epi8(4)); // 3 bits from count == 4 locations, placed at count == 3 locations shifted left by 2 bits output3 = or_si(output3, and_si(set1_epi8(0xfc), @@ -603,7 +602,7 @@ start_classification: #endif #undef move // convert the shifts into a suitable mask for shuffle by adding the byte number to each byte - shifts = add_epi8(shifts, numbered_bytes()); + shifts = add_epi8(shifts, numbered); debug_register(shifts); output1 = shuffle_epi8(output1, shifts);