mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 14:18:26 +02:00
Use a branchless check for unicode range
This commit is contained in:
@@ -517,8 +517,7 @@ def get_types(sz: int) -> tuple[str, str]:
|
|||||||
|
|
||||||
|
|
||||||
def gen_multistage_table(
|
def gen_multistage_table(
|
||||||
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
|
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, input_sz: int
|
||||||
for_go_type: str, maxval: int = 0
|
|
||||||
) -> None:
|
) -> None:
|
||||||
t1_type_sz = getsize(t1)
|
t1_type_sz = getsize(t1)
|
||||||
ctype_t1, gotype_t1 = get_types(t1_type_sz)
|
ctype_t1, gotype_t1 = get_types(t1_type_sz)
|
||||||
@@ -554,11 +553,10 @@ def gen_multistage_table(
|
|||||||
g(f'\t{items}')
|
g(f'\t{items}')
|
||||||
g('}')
|
g('}')
|
||||||
|
|
||||||
check = f'x = max(0, min(x, {maxval}))' if maxval else ''
|
input_type = get_types(getsize((input_sz,)))[1]
|
||||||
g(f'''
|
g(f'''
|
||||||
// Array accessor function that avoids bounds checking
|
// Array accessor function that avoids bounds checking
|
||||||
func {name}For(x {for_go_type}) {name} {{
|
func {lname}_for(x {input_type}) {name} {{
|
||||||
{check}
|
|
||||||
t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
|
t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
|
||||||
t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
|
t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
|
||||||
t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
|
t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
|
||||||
@@ -1146,6 +1144,8 @@ def gen_char_props() -> None:
|
|||||||
gp = partial(print, file=gof)
|
gp = partial(print, file=gof)
|
||||||
gp('package wcswidth')
|
gp('package wcswidth')
|
||||||
gp('import "unsafe"')
|
gp('import "unsafe"')
|
||||||
|
gp(f'const MAX_UNICODE = {sys.maxunicode}')
|
||||||
|
gp(f'const UNICODE_LIMIT = {sys.maxunicode + 1}')
|
||||||
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
|
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
|
||||||
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
|
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
|
||||||
cen('// UCBDeclaration {{''{')
|
cen('// UCBDeclaration {{''{')
|
||||||
@@ -1156,8 +1156,8 @@ def gen_char_props() -> None:
|
|||||||
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
|
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
|
||||||
gp(CharProps.go_extra())
|
gp(CharProps.go_extra())
|
||||||
gp(GraphemeSegmentationResult.go_extra())
|
gp(GraphemeSegmentationResult.go_extra())
|
||||||
gen_multistage_table(c, gp, t1, t2, t3, t_shift, 'rune', sys.maxunicode)
|
gen_multistage_table(c, gp, t1, t2, t3, t_shift, len(prop_array)-1)
|
||||||
gen_multistage_table(c, gp, g1, g2, g3, g_shift, 'uint16')
|
gen_multistage_table(c, gp, g1, g2, g3, g_shift, len(gseg_results)-1)
|
||||||
c(GraphemeSegmentationKey.code_to_convert_to_int())
|
c(GraphemeSegmentationKey.code_to_convert_to_int())
|
||||||
c(GraphemeSegmentationState.c_declaration())
|
c(GraphemeSegmentationState.c_declaration())
|
||||||
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
|
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
|
||||||
|
|||||||
@@ -8,10 +8,18 @@
|
|||||||
#include "char-props.h"
|
#include "char-props.h"
|
||||||
#include "char-props-data.h"
|
#include "char-props-data.h"
|
||||||
|
|
||||||
|
static char_type
|
||||||
|
ensure_char_in_range(const char_type value) {
|
||||||
|
// Branchless: if (value > MAX_UNICODE) value = 0
|
||||||
|
const int64_t diff = ((int64_t)value) - ((int64_t)(MAX_UNICODE + 1u));
|
||||||
|
// The right shift gives all ones for negative diff and all zeros for positive diff
|
||||||
|
const char_type mask = diff >> 63;
|
||||||
|
return value & mask;
|
||||||
|
}
|
||||||
|
|
||||||
CharProps
|
CharProps
|
||||||
char_props_for(char_type ch) {
|
char_props_for(char_type ch) {
|
||||||
if (ch > MAX_UNICODE) ch = 0;
|
ch = ensure_char_in_range(ch);
|
||||||
return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]];
|
return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
9
tools/wcswidth/char-props-data.go
generated
9
tools/wcswidth/char-props-data.go
generated
@@ -2,6 +2,9 @@ package wcswidth
|
|||||||
|
|
||||||
import "unsafe"
|
import "unsafe"
|
||||||
|
|
||||||
|
const MAX_UNICODE = 1114111
|
||||||
|
const UNICODE_LIMIT = 1114112
|
||||||
|
|
||||||
type GraphemeBreakProperty uint8
|
type GraphemeBreakProperty uint8
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -387,8 +390,7 @@ var charprops_t3 = [109]CharProps{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Array accessor function that avoids bounds checking
|
// Array accessor function that avoids bounds checking
|
||||||
func CharPropsFor(x rune) CharProps {
|
func charprops_for(x uint32) CharProps {
|
||||||
x = max(0, min(x, 1114111))
|
|
||||||
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t1[0])) + uintptr(x>>charprops_shift)*1)))
|
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t1[0])) + uintptr(x>>charprops_shift)*1)))
|
||||||
t1_shifted := (t1 << charprops_shift) + (uintptr(x) & charprops_mask)
|
t1_shifted := (t1 << charprops_shift) + (uintptr(x) & charprops_mask)
|
||||||
t2 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t2[0])) + t1_shifted*1)))
|
t2 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t2[0])) + t1_shifted*1)))
|
||||||
@@ -1038,8 +1040,7 @@ var graphemesegmentationresult_t3 = [630]GraphemeSegmentationResult{
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Array accessor function that avoids bounds checking
|
// Array accessor function that avoids bounds checking
|
||||||
func GraphemeSegmentationResultFor(x uint16) GraphemeSegmentationResult {
|
func graphemesegmentationresult_for(x uint16) GraphemeSegmentationResult {
|
||||||
|
|
||||||
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t1[0])) + uintptr(x>>graphemesegmentationresult_shift)*1)))
|
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t1[0])) + uintptr(x>>graphemesegmentationresult_shift)*1)))
|
||||||
t1_shifted := (t1 << graphemesegmentationresult_shift) + (uintptr(x) & graphemesegmentationresult_mask)
|
t1_shifted := (t1 << graphemesegmentationresult_shift) + (uintptr(x) & graphemesegmentationresult_mask)
|
||||||
t2 := uintptr(*(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t2[0])) + t1_shifted*2)))
|
t2 := uintptr(*(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t2[0])) + t1_shifted*2)))
|
||||||
|
|||||||
@@ -7,6 +7,19 @@ import (
|
|||||||
|
|
||||||
var _ = fmt.Print
|
var _ = fmt.Print
|
||||||
|
|
||||||
|
func ensure_char_in_range(value uint32) uint32 {
|
||||||
|
// Branchless: if (value > MAX_UNICODE) value = 0
|
||||||
|
diff := int64(value) - UNICODE_LIMIT
|
||||||
|
// The right shift gives all ones for negative diff and all zeros for positive diff
|
||||||
|
mask := uint32(diff >> 63)
|
||||||
|
return value & mask
|
||||||
|
}
|
||||||
|
|
||||||
|
func CharPropsFor(ch rune) CharProps {
|
||||||
|
q := ensure_char_in_range(uint32(ch))
|
||||||
|
return charprops_for(q)
|
||||||
|
}
|
||||||
|
|
||||||
func IteratorOverGraphemes(text string) iter.Seq[string] {
|
func IteratorOverGraphemes(text string) iter.Seq[string] {
|
||||||
var s GraphemeSegmentationResult
|
var s GraphemeSegmentationResult
|
||||||
start_pos := 0
|
start_pos := 0
|
||||||
@@ -39,7 +52,7 @@ func (s *GraphemeSegmentationResult) Reset() {
|
|||||||
|
|
||||||
func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult {
|
func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult {
|
||||||
key := grapheme_segmentation_key(s, ch)
|
key := grapheme_segmentation_key(s, ch)
|
||||||
return GraphemeSegmentationResultFor(key)
|
return graphemesegmentationresult_for(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
func Runewidth(code rune) int {
|
func Runewidth(code rune) int {
|
||||||
|
|||||||
Reference in New Issue
Block a user