Remove bounds checking for unicode table access in Go

This commit is contained in:
Kovid Goyal
2025-04-01 10:41:17 +05:30
parent de1adeee5e
commit 6ecd78d9db
5 changed files with 48 additions and 14 deletions

View File

@@ -506,6 +506,10 @@ class Property(Protocol):
def as_go(self) -> str:
return ''
@classmethod
def bitsize(cls) -> int:
return 0
def get_types(sz: int) -> tuple[str, str]:
sz *= 8
@@ -514,11 +518,15 @@ def get_types(sz: int) -> tuple[str, str]:
def gen_multistage_table(
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
for_go_type: str, maxval: int = 0
) -> None:
ctype_t1, gotype_t1 = get_types(getsize(t1))
t1_type_sz = getsize(t1)
ctype_t1, gotype_t1 = get_types(t1_type_sz)
mask = mask_for(shift)
name = t3[0].__class__.__name__
ctype_t2, gotype_t2 = get_types(getsize(tuple(range(len(t3)))))
t2_type_sz = getsize(tuple(range(len(t3))))
ctype_t2, gotype_t2 = get_types(t2_type_sz)
t3_type_sz = t3[0].bitsize() // 8
c(f'static const char_type {name}_mask = {mask}u;')
c(f'static const char_type {name}_shift = {shift}u;')
c(f'static const {ctype_t1} {name}_t1[{len(t1)}] = ''{')
@@ -546,6 +554,17 @@ def gen_multistage_table(
g(f'\t{items}')
g('}')
check = f'x = max(0, min(x, {maxval}))' if maxval else ''
g(f'''
// Array accessor function that avoids bounds checking
func {name}For(x {for_go_type}) {name} {{
{check}
t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
return *(*{name})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t3[0])) + t2*{t3_type_sz}))
}}
''')
width_shift = 4
@@ -919,7 +938,6 @@ func (r GraphemeSegmentationResult) State() (ans {base_type}) {{
return bitfield_declaration_as_c('GraphemeSegmentationResult', fields, {'state': bits})
class CharProps(NamedTuple):
width: int = 3
@@ -1127,17 +1145,19 @@ def gen_char_props() -> None:
with create_header('kitty/char-props-data.h', include_data_types=False) as c, open('tools/wcswidth/char-props-data.go', 'w') as gof:
gp = partial(print, file=gof)
gp('package wcswidth')
gp('import "unsafe"')
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
cen('// UCBDeclaration {{''{')
cen(f'#define MAX_UNICODE ({sys.maxunicode}u)')
generate_enum(cen, gp, 'UnicodeCategory', 'Cn', *class_maps, prefix='UC_')
cen('// EndUCBDeclaration }}''}')
gp(make_bitfield('tools/wcswidth', 'CharProps', *CharProps.go_fields(), add_package=False)[1])
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
gp(CharProps.go_extra())
gp(GraphemeSegmentationResult.go_extra())
gen_multistage_table(c, gp, t1, t2, t3, t_shift)
gen_multistage_table(c, gp, g1, g2, g3, g_shift)
gen_multistage_table(c, gp, t1, t2, t3, t_shift, 'rune', sys.maxunicode)
gen_multistage_table(c, gp, g1, g2, g3, g_shift, 'uint16')
c(GraphemeSegmentationKey.code_to_convert_to_int())
c(GraphemeSegmentationState.c_declaration())
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))