diff --git a/gen/wcwidth.py b/gen/wcwidth.py index 945099feb..c4083766e 100755 --- a/gen/wcwidth.py +++ b/gen/wcwidth.py @@ -517,8 +517,7 @@ def get_types(sz: int) -> tuple[str, str]: def gen_multistage_table( - c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, - for_go_type: str, maxval: int = 0 + c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, input_sz: int ) -> None: t1_type_sz = getsize(t1) ctype_t1, gotype_t1 = get_types(t1_type_sz) @@ -554,11 +553,10 @@ def gen_multistage_table( g(f'\t{items}') g('}') - check = f'x = max(0, min(x, {maxval}))' if maxval else '' + input_type = get_types(getsize((input_sz,)))[1] g(f''' // Array accessor function that avoids bounds checking -func {name}For(x {for_go_type}) {name} {{ - {check} +func {lname}_for(x {input_type}) {name} {{ t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz}))) t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask) t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz}))) @@ -1146,6 +1144,8 @@ def gen_char_props() -> None: gp = partial(print, file=gof) gp('package wcswidth') gp('import "unsafe"') + gp(f'const MAX_UNICODE = {sys.maxunicode}') + gp(f'const UNICODE_LIMIT = {sys.maxunicode + 1}') generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_') generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_') cen('// UCBDeclaration {{''{') @@ -1156,8 +1156,8 @@ def gen_char_props() -> None: gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1]) gp(CharProps.go_extra()) gp(GraphemeSegmentationResult.go_extra()) - gen_multistage_table(c, gp, t1, t2, t3, t_shift, 'rune', sys.maxunicode) - gen_multistage_table(c, gp, g1, g2, g3, g_shift, 'uint16') + gen_multistage_table(c, gp, t1, t2, t3, t_shift, len(prop_array)-1) + gen_multistage_table(c, gp, g1, g2, g3, g_shift, len(gseg_results)-1) c(GraphemeSegmentationKey.code_to_convert_to_int()) c(GraphemeSegmentationState.c_declaration()) gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True)) diff --git a/kitty/char-props.c b/kitty/char-props.c index dcf31a92e..b84e8a888 100644 --- a/kitty/char-props.c +++ b/kitty/char-props.c @@ -8,10 +8,18 @@ #include "char-props.h" #include "char-props-data.h" +static char_type +ensure_char_in_range(const char_type value) { + // Branchless: if (value > MAX_UNICODE) value = 0 + const int64_t diff = ((int64_t)value) - ((int64_t)(MAX_UNICODE + 1u)); + // The right shift gives all ones for negative diff and all zeros for positive diff + const char_type mask = diff >> 63; + return value & mask; +} CharProps char_props_for(char_type ch) { - if (ch > MAX_UNICODE) ch = 0; + ch = ensure_char_in_range(ch); return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]]; } diff --git a/tools/wcswidth/char-props-data.go b/tools/wcswidth/char-props-data.go index 310261dcc..03fdb2f51 100644 --- a/tools/wcswidth/char-props-data.go +++ b/tools/wcswidth/char-props-data.go @@ -2,6 +2,9 @@ package wcswidth import "unsafe" +const MAX_UNICODE = 1114111 +const UNICODE_LIMIT = 1114112 + type GraphemeBreakProperty uint8 const ( @@ -387,8 +390,7 @@ var charprops_t3 = [109]CharProps{ } // Array accessor function that avoids bounds checking -func CharPropsFor(x rune) CharProps { - x = max(0, min(x, 1114111)) +func charprops_for(x uint32) CharProps { t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t1[0])) + uintptr(x>>charprops_shift)*1))) t1_shifted := (t1 << charprops_shift) + (uintptr(x) & charprops_mask) t2 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t2[0])) + t1_shifted*1))) @@ -1038,8 +1040,7 @@ var graphemesegmentationresult_t3 = [630]GraphemeSegmentationResult{ } // Array accessor function that avoids bounds checking -func GraphemeSegmentationResultFor(x uint16) GraphemeSegmentationResult { - +func graphemesegmentationresult_for(x uint16) GraphemeSegmentationResult { t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t1[0])) + uintptr(x>>graphemesegmentationresult_shift)*1))) t1_shifted := (t1 << graphemesegmentationresult_shift) + (uintptr(x) & graphemesegmentationresult_mask) t2 := uintptr(*(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t2[0])) + t1_shifted*2))) diff --git a/tools/wcswidth/char-props.go b/tools/wcswidth/char-props.go index cf664f96b..3810c3d50 100644 --- a/tools/wcswidth/char-props.go +++ b/tools/wcswidth/char-props.go @@ -7,6 +7,19 @@ import ( var _ = fmt.Print +func ensure_char_in_range(value uint32) uint32 { + // Branchless: if (value > MAX_UNICODE) value = 0 + diff := int64(value) - UNICODE_LIMIT + // The right shift gives all ones for negative diff and all zeros for positive diff + mask := uint32(diff >> 63) + return value & mask +} + +func CharPropsFor(ch rune) CharProps { + q := ensure_char_in_range(uint32(ch)) + return charprops_for(q) +} + func IteratorOverGraphemes(text string) iter.Seq[string] { var s GraphemeSegmentationResult start_pos := 0 @@ -39,7 +52,7 @@ func (s *GraphemeSegmentationResult) Reset() { func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult { key := grapheme_segmentation_key(s, ch) - return GraphemeSegmentationResultFor(key) + return graphemesegmentationresult_for(key) } func Runewidth(code rune) int {