mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-06 01:05:48 +02:00
Use a branchless check for unicode range
This commit is contained in:
@@ -517,8 +517,7 @@ def get_types(sz: int) -> tuple[str, str]:
|
||||
|
||||
|
||||
def gen_multistage_table(
|
||||
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
|
||||
for_go_type: str, maxval: int = 0
|
||||
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, input_sz: int
|
||||
) -> None:
|
||||
t1_type_sz = getsize(t1)
|
||||
ctype_t1, gotype_t1 = get_types(t1_type_sz)
|
||||
@@ -554,11 +553,10 @@ def gen_multistage_table(
|
||||
g(f'\t{items}')
|
||||
g('}')
|
||||
|
||||
check = f'x = max(0, min(x, {maxval}))' if maxval else ''
|
||||
input_type = get_types(getsize((input_sz,)))[1]
|
||||
g(f'''
|
||||
// Array accessor function that avoids bounds checking
|
||||
func {name}For(x {for_go_type}) {name} {{
|
||||
{check}
|
||||
func {lname}_for(x {input_type}) {name} {{
|
||||
t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
|
||||
t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
|
||||
t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
|
||||
@@ -1146,6 +1144,8 @@ def gen_char_props() -> None:
|
||||
gp = partial(print, file=gof)
|
||||
gp('package wcswidth')
|
||||
gp('import "unsafe"')
|
||||
gp(f'const MAX_UNICODE = {sys.maxunicode}')
|
||||
gp(f'const UNICODE_LIMIT = {sys.maxunicode + 1}')
|
||||
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
|
||||
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
|
||||
cen('// UCBDeclaration {{''{')
|
||||
@@ -1156,8 +1156,8 @@ def gen_char_props() -> None:
|
||||
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
|
||||
gp(CharProps.go_extra())
|
||||
gp(GraphemeSegmentationResult.go_extra())
|
||||
gen_multistage_table(c, gp, t1, t2, t3, t_shift, 'rune', sys.maxunicode)
|
||||
gen_multistage_table(c, gp, g1, g2, g3, g_shift, 'uint16')
|
||||
gen_multistage_table(c, gp, t1, t2, t3, t_shift, len(prop_array)-1)
|
||||
gen_multistage_table(c, gp, g1, g2, g3, g_shift, len(gseg_results)-1)
|
||||
c(GraphemeSegmentationKey.code_to_convert_to_int())
|
||||
c(GraphemeSegmentationState.c_declaration())
|
||||
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
|
||||
|
||||
@@ -8,10 +8,18 @@
|
||||
#include "char-props.h"
|
||||
#include "char-props-data.h"
|
||||
|
||||
static char_type
|
||||
ensure_char_in_range(const char_type value) {
|
||||
// Branchless: if (value > MAX_UNICODE) value = 0
|
||||
const int64_t diff = ((int64_t)value) - ((int64_t)(MAX_UNICODE + 1u));
|
||||
// The right shift gives all ones for negative diff and all zeros for positive diff
|
||||
const char_type mask = diff >> 63;
|
||||
return value & mask;
|
||||
}
|
||||
|
||||
CharProps
|
||||
char_props_for(char_type ch) {
|
||||
if (ch > MAX_UNICODE) ch = 0;
|
||||
ch = ensure_char_in_range(ch);
|
||||
return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]];
|
||||
}
|
||||
|
||||
|
||||
9
tools/wcswidth/char-props-data.go
generated
9
tools/wcswidth/char-props-data.go
generated
@@ -2,6 +2,9 @@ package wcswidth
|
||||
|
||||
import "unsafe"
|
||||
|
||||
const MAX_UNICODE = 1114111
|
||||
const UNICODE_LIMIT = 1114112
|
||||
|
||||
type GraphemeBreakProperty uint8
|
||||
|
||||
const (
|
||||
@@ -387,8 +390,7 @@ var charprops_t3 = [109]CharProps{
|
||||
}
|
||||
|
||||
// Array accessor function that avoids bounds checking
|
||||
func CharPropsFor(x rune) CharProps {
|
||||
x = max(0, min(x, 1114111))
|
||||
func charprops_for(x uint32) CharProps {
|
||||
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t1[0])) + uintptr(x>>charprops_shift)*1)))
|
||||
t1_shifted := (t1 << charprops_shift) + (uintptr(x) & charprops_mask)
|
||||
t2 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t2[0])) + t1_shifted*1)))
|
||||
@@ -1038,8 +1040,7 @@ var graphemesegmentationresult_t3 = [630]GraphemeSegmentationResult{
|
||||
}
|
||||
|
||||
// Array accessor function that avoids bounds checking
|
||||
func GraphemeSegmentationResultFor(x uint16) GraphemeSegmentationResult {
|
||||
|
||||
func graphemesegmentationresult_for(x uint16) GraphemeSegmentationResult {
|
||||
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t1[0])) + uintptr(x>>graphemesegmentationresult_shift)*1)))
|
||||
t1_shifted := (t1 << graphemesegmentationresult_shift) + (uintptr(x) & graphemesegmentationresult_mask)
|
||||
t2 := uintptr(*(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t2[0])) + t1_shifted*2)))
|
||||
|
||||
@@ -7,6 +7,19 @@ import (
|
||||
|
||||
var _ = fmt.Print
|
||||
|
||||
func ensure_char_in_range(value uint32) uint32 {
|
||||
// Branchless: if (value > MAX_UNICODE) value = 0
|
||||
diff := int64(value) - UNICODE_LIMIT
|
||||
// The right shift gives all ones for negative diff and all zeros for positive diff
|
||||
mask := uint32(diff >> 63)
|
||||
return value & mask
|
||||
}
|
||||
|
||||
func CharPropsFor(ch rune) CharProps {
|
||||
q := ensure_char_in_range(uint32(ch))
|
||||
return charprops_for(q)
|
||||
}
|
||||
|
||||
func IteratorOverGraphemes(text string) iter.Seq[string] {
|
||||
var s GraphemeSegmentationResult
|
||||
start_pos := 0
|
||||
@@ -39,7 +52,7 @@ func (s *GraphemeSegmentationResult) Reset() {
|
||||
|
||||
func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult {
|
||||
key := grapheme_segmentation_key(s, ch)
|
||||
return GraphemeSegmentationResultFor(key)
|
||||
return graphemesegmentationresult_for(key)
|
||||
}
|
||||
|
||||
func Runewidth(code rune) int {
|
||||
|
||||
Reference in New Issue
Block a user