mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-06 01:05:48 +02:00
Remove bounds checking for unicode table access in Go
This commit is contained in:
@@ -506,6 +506,10 @@ class Property(Protocol):
|
||||
def as_go(self) -> str:
|
||||
return ''
|
||||
|
||||
@classmethod
|
||||
def bitsize(cls) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
def get_types(sz: int) -> tuple[str, str]:
|
||||
sz *= 8
|
||||
@@ -514,11 +518,15 @@ def get_types(sz: int) -> tuple[str, str]:
|
||||
|
||||
def gen_multistage_table(
|
||||
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
|
||||
for_go_type: str, maxval: int = 0
|
||||
) -> None:
|
||||
ctype_t1, gotype_t1 = get_types(getsize(t1))
|
||||
t1_type_sz = getsize(t1)
|
||||
ctype_t1, gotype_t1 = get_types(t1_type_sz)
|
||||
mask = mask_for(shift)
|
||||
name = t3[0].__class__.__name__
|
||||
ctype_t2, gotype_t2 = get_types(getsize(tuple(range(len(t3)))))
|
||||
t2_type_sz = getsize(tuple(range(len(t3))))
|
||||
ctype_t2, gotype_t2 = get_types(t2_type_sz)
|
||||
t3_type_sz = t3[0].bitsize() // 8
|
||||
c(f'static const char_type {name}_mask = {mask}u;')
|
||||
c(f'static const char_type {name}_shift = {shift}u;')
|
||||
c(f'static const {ctype_t1} {name}_t1[{len(t1)}] = ''{')
|
||||
@@ -546,6 +554,17 @@ def gen_multistage_table(
|
||||
g(f'\t{items}')
|
||||
g('}')
|
||||
|
||||
check = f'x = max(0, min(x, {maxval}))' if maxval else ''
|
||||
g(f'''
|
||||
// Array accessor function that avoids bounds checking
|
||||
func {name}For(x {for_go_type}) {name} {{
|
||||
{check}
|
||||
t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
|
||||
t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
|
||||
t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
|
||||
return *(*{name})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t3[0])) + t2*{t3_type_sz}))
|
||||
}}
|
||||
''')
|
||||
|
||||
width_shift = 4
|
||||
|
||||
@@ -919,7 +938,6 @@ func (r GraphemeSegmentationResult) State() (ans {base_type}) {{
|
||||
return bitfield_declaration_as_c('GraphemeSegmentationResult', fields, {'state': bits})
|
||||
|
||||
|
||||
|
||||
class CharProps(NamedTuple):
|
||||
|
||||
width: int = 3
|
||||
@@ -1127,17 +1145,19 @@ def gen_char_props() -> None:
|
||||
with create_header('kitty/char-props-data.h', include_data_types=False) as c, open('tools/wcswidth/char-props-data.go', 'w') as gof:
|
||||
gp = partial(print, file=gof)
|
||||
gp('package wcswidth')
|
||||
gp('import "unsafe"')
|
||||
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
|
||||
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
|
||||
cen('// UCBDeclaration {{''{')
|
||||
cen(f'#define MAX_UNICODE ({sys.maxunicode}u)')
|
||||
generate_enum(cen, gp, 'UnicodeCategory', 'Cn', *class_maps, prefix='UC_')
|
||||
cen('// EndUCBDeclaration }}''}')
|
||||
gp(make_bitfield('tools/wcswidth', 'CharProps', *CharProps.go_fields(), add_package=False)[1])
|
||||
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
|
||||
gp(CharProps.go_extra())
|
||||
gp(GraphemeSegmentationResult.go_extra())
|
||||
gen_multistage_table(c, gp, t1, t2, t3, t_shift)
|
||||
gen_multistage_table(c, gp, g1, g2, g3, g_shift)
|
||||
gen_multistage_table(c, gp, t1, t2, t3, t_shift, 'rune', sys.maxunicode)
|
||||
gen_multistage_table(c, gp, g1, g2, g3, g_shift, 'uint16')
|
||||
c(GraphemeSegmentationKey.code_to_convert_to_int())
|
||||
c(GraphemeSegmentationState.c_declaration())
|
||||
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
|
||||
CharProps
|
||||
char_props_for(char_type ch) {
|
||||
if (ch > MAX_UNICODE) ch = 0;
|
||||
return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]];
|
||||
}
|
||||
|
||||
|
||||
@@ -104,6 +104,7 @@ static_assert(sizeof(GraphemeSegmentationResult) == sizeof(uint16_t), "Fix the o
|
||||
// EndGraphemeSegmentationResultDeclaration }}}
|
||||
|
||||
// UCBDeclaration {{{
|
||||
#define MAX_UNICODE (1114111u)
|
||||
typedef enum UnicodeCategory {
|
||||
UC_Cn,
|
||||
UC_Cc,
|
||||
|
||||
20
tools/wcswidth/char-props-data.go
generated
20
tools/wcswidth/char-props-data.go
generated
@@ -1,5 +1,7 @@
|
||||
package wcswidth
|
||||
|
||||
import "unsafe"
|
||||
|
||||
type GraphemeBreakProperty uint8
|
||||
|
||||
const (
|
||||
@@ -384,6 +386,15 @@ var charprops_t3 = [109]CharProps{
|
||||
((0 & 0b1) << 0) | ((CharProps(ICB_Extend) & 0b11) << 1) | ((CharProps(GBP_Extend) & 0b1111) << 3) | ((0 & 0b1) << 7) | ((0 & 0b1) << 8) | ((1 & 0b1) << 9) | ((0 & 0b1) << 10) | ((1 & 0b1) << 11) | ((0 & 0b1) << 12) | ((0 & 0b1) << 13) | ((CharProps(UC_Cf) & 0b11111) << 14) | ((0 & 0b1) << 19) | ((4 & 0b111) << 20), // 108
|
||||
}
|
||||
|
||||
// Array accessor function that avoids bounds checking
|
||||
func CharPropsFor(x rune) CharProps {
|
||||
x = max(0, min(x, 1114111))
|
||||
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t1[0])) + uintptr(x>>charprops_shift)*1)))
|
||||
t1_shifted := (t1 << charprops_shift) + (uintptr(x) & charprops_mask)
|
||||
t2 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t2[0])) + t1_shifted*1)))
|
||||
return *(*CharProps)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t3[0])) + t2*4))
|
||||
}
|
||||
|
||||
const graphemesegmentationresult_mask = 15
|
||||
const graphemesegmentationresult_shift = 4
|
||||
|
||||
@@ -1026,6 +1037,15 @@ var graphemesegmentationresult_t3 = [630]GraphemeSegmentationResult{
|
||||
((GraphemeSegmentationResult(GBP_ZWJ) & 0b1111) << 0) | ((1 & 0b1) << 4) | ((0 & 0b1) << 5) | ((1 & 0b1) << 6) | ((1 & 0b1) << 7) | ((1 & 0b1) << 8) | ((0 & 0b1) << 9), // 629
|
||||
}
|
||||
|
||||
// Array accessor function that avoids bounds checking
|
||||
func GraphemeSegmentationResultFor(x uint16) GraphemeSegmentationResult {
|
||||
|
||||
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t1[0])) + uintptr(x>>graphemesegmentationresult_shift)*1)))
|
||||
t1_shifted := (t1 << graphemesegmentationresult_shift) + (uintptr(x) & graphemesegmentationresult_mask)
|
||||
t2 := uintptr(*(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t2[0])) + t1_shifted*2)))
|
||||
return *(*GraphemeSegmentationResult)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t3[0])) + t2*2))
|
||||
}
|
||||
|
||||
func grapheme_segmentation_key(r GraphemeSegmentationResult, ch CharProps) uint16 {
|
||||
return (r.State() << 7) | ch.GraphemeSegmentationProperty()
|
||||
}
|
||||
|
||||
@@ -7,10 +7,6 @@ import (
|
||||
|
||||
var _ = fmt.Print
|
||||
|
||||
func CharPropsFor(ch rune) CharProps {
|
||||
return charprops_t3[charprops_t2[(rune(charprops_t1[ch>>charprops_shift])<<charprops_shift)+(ch&charprops_mask)]]
|
||||
}
|
||||
|
||||
func IteratorOverGraphemes(text string) iter.Seq[string] {
|
||||
var s GraphemeSegmentationResult
|
||||
start_pos := 0
|
||||
@@ -43,11 +39,7 @@ func (s *GraphemeSegmentationResult) Reset() {
|
||||
|
||||
func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult {
|
||||
key := grapheme_segmentation_key(s, ch)
|
||||
t1 := uint16(graphemesegmentationresult_t1[key>>graphemesegmentationresult_shift]) << graphemesegmentationresult_shift
|
||||
t2 := graphemesegmentationresult_t2[t1+key&graphemesegmentationresult_mask]
|
||||
ans := graphemesegmentationresult_t3[t2]
|
||||
// fmt.Printf("state: %d gsp: %d -> key: %d t1: %d -> add_to_cell: %d\n", s.State(), ch.GraphemeSegmentationProperty(), key, t1, ans.Add_to_current_cell())
|
||||
return ans
|
||||
return GraphemeSegmentationResultFor(key)
|
||||
}
|
||||
|
||||
func Runewidth(code rune) int {
|
||||
|
||||
Reference in New Issue
Block a user