mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 22:28:24 +02:00
Remove bounds checking for unicode table access in Go
This commit is contained in:
@@ -506,6 +506,10 @@ class Property(Protocol):
|
|||||||
def as_go(self) -> str:
|
def as_go(self) -> str:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def bitsize(cls) -> int:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
def get_types(sz: int) -> tuple[str, str]:
|
def get_types(sz: int) -> tuple[str, str]:
|
||||||
sz *= 8
|
sz *= 8
|
||||||
@@ -514,11 +518,15 @@ def get_types(sz: int) -> tuple[str, str]:
|
|||||||
|
|
||||||
def gen_multistage_table(
|
def gen_multistage_table(
|
||||||
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
|
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
|
||||||
|
for_go_type: str, maxval: int = 0
|
||||||
) -> None:
|
) -> None:
|
||||||
ctype_t1, gotype_t1 = get_types(getsize(t1))
|
t1_type_sz = getsize(t1)
|
||||||
|
ctype_t1, gotype_t1 = get_types(t1_type_sz)
|
||||||
mask = mask_for(shift)
|
mask = mask_for(shift)
|
||||||
name = t3[0].__class__.__name__
|
name = t3[0].__class__.__name__
|
||||||
ctype_t2, gotype_t2 = get_types(getsize(tuple(range(len(t3)))))
|
t2_type_sz = getsize(tuple(range(len(t3))))
|
||||||
|
ctype_t2, gotype_t2 = get_types(t2_type_sz)
|
||||||
|
t3_type_sz = t3[0].bitsize() // 8
|
||||||
c(f'static const char_type {name}_mask = {mask}u;')
|
c(f'static const char_type {name}_mask = {mask}u;')
|
||||||
c(f'static const char_type {name}_shift = {shift}u;')
|
c(f'static const char_type {name}_shift = {shift}u;')
|
||||||
c(f'static const {ctype_t1} {name}_t1[{len(t1)}] = ''{')
|
c(f'static const {ctype_t1} {name}_t1[{len(t1)}] = ''{')
|
||||||
@@ -546,6 +554,17 @@ def gen_multistage_table(
|
|||||||
g(f'\t{items}')
|
g(f'\t{items}')
|
||||||
g('}')
|
g('}')
|
||||||
|
|
||||||
|
check = f'x = max(0, min(x, {maxval}))' if maxval else ''
|
||||||
|
g(f'''
|
||||||
|
// Array accessor function that avoids bounds checking
|
||||||
|
func {name}For(x {for_go_type}) {name} {{
|
||||||
|
{check}
|
||||||
|
t1 := uintptr(*(*{gotype_t1})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t1[0])) + uintptr(x>>{lname}_shift)*{t1_type_sz})))
|
||||||
|
t1_shifted := (t1 << {lname}_shift) + (uintptr(x) & {lname}_mask)
|
||||||
|
t2 := uintptr(*(*{gotype_t2})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t2[0])) + t1_shifted*{t2_type_sz})))
|
||||||
|
return *(*{name})(unsafe.Pointer(uintptr(unsafe.Pointer(&{lname}_t3[0])) + t2*{t3_type_sz}))
|
||||||
|
}}
|
||||||
|
''')
|
||||||
|
|
||||||
width_shift = 4
|
width_shift = 4
|
||||||
|
|
||||||
@@ -919,7 +938,6 @@ func (r GraphemeSegmentationResult) State() (ans {base_type}) {{
|
|||||||
return bitfield_declaration_as_c('GraphemeSegmentationResult', fields, {'state': bits})
|
return bitfield_declaration_as_c('GraphemeSegmentationResult', fields, {'state': bits})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CharProps(NamedTuple):
|
class CharProps(NamedTuple):
|
||||||
|
|
||||||
width: int = 3
|
width: int = 3
|
||||||
@@ -1127,17 +1145,19 @@ def gen_char_props() -> None:
|
|||||||
with create_header('kitty/char-props-data.h', include_data_types=False) as c, open('tools/wcswidth/char-props-data.go', 'w') as gof:
|
with create_header('kitty/char-props-data.h', include_data_types=False) as c, open('tools/wcswidth/char-props-data.go', 'w') as gof:
|
||||||
gp = partial(print, file=gof)
|
gp = partial(print, file=gof)
|
||||||
gp('package wcswidth')
|
gp('package wcswidth')
|
||||||
|
gp('import "unsafe"')
|
||||||
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
|
generate_enum(c, gp, 'GraphemeBreakProperty', *grapheme_segmentation_maps, prefix='GBP_')
|
||||||
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
|
generate_enum(c, gp, 'IndicConjunctBreak', *incb_map, prefix='ICB_')
|
||||||
cen('// UCBDeclaration {{''{')
|
cen('// UCBDeclaration {{''{')
|
||||||
|
cen(f'#define MAX_UNICODE ({sys.maxunicode}u)')
|
||||||
generate_enum(cen, gp, 'UnicodeCategory', 'Cn', *class_maps, prefix='UC_')
|
generate_enum(cen, gp, 'UnicodeCategory', 'Cn', *class_maps, prefix='UC_')
|
||||||
cen('// EndUCBDeclaration }}''}')
|
cen('// EndUCBDeclaration }}''}')
|
||||||
gp(make_bitfield('tools/wcswidth', 'CharProps', *CharProps.go_fields(), add_package=False)[1])
|
gp(make_bitfield('tools/wcswidth', 'CharProps', *CharProps.go_fields(), add_package=False)[1])
|
||||||
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
|
gp(make_bitfield('tools/wcswidth', 'GraphemeSegmentationResult', *GraphemeSegmentationResult.go_fields(), add_package=False)[1])
|
||||||
gp(CharProps.go_extra())
|
gp(CharProps.go_extra())
|
||||||
gp(GraphemeSegmentationResult.go_extra())
|
gp(GraphemeSegmentationResult.go_extra())
|
||||||
gen_multistage_table(c, gp, t1, t2, t3, t_shift)
|
gen_multistage_table(c, gp, t1, t2, t3, t_shift, 'rune', sys.maxunicode)
|
||||||
gen_multistage_table(c, gp, g1, g2, g3, g_shift)
|
gen_multistage_table(c, gp, g1, g2, g3, g_shift, 'uint16')
|
||||||
c(GraphemeSegmentationKey.code_to_convert_to_int())
|
c(GraphemeSegmentationKey.code_to_convert_to_int())
|
||||||
c(GraphemeSegmentationState.c_declaration())
|
c(GraphemeSegmentationState.c_declaration())
|
||||||
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
|
gp(GraphemeSegmentationKey.code_to_convert_to_int(for_go=True))
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
|
|
||||||
CharProps
|
CharProps
|
||||||
char_props_for(char_type ch) {
|
char_props_for(char_type ch) {
|
||||||
|
if (ch > MAX_UNICODE) ch = 0;
|
||||||
return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]];
|
return CharProps_t3[CharProps_t2[(CharProps_t1[ch >> CharProps_shift] << CharProps_shift) + (ch & CharProps_mask)]];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -104,6 +104,7 @@ static_assert(sizeof(GraphemeSegmentationResult) == sizeof(uint16_t), "Fix the o
|
|||||||
// EndGraphemeSegmentationResultDeclaration }}}
|
// EndGraphemeSegmentationResultDeclaration }}}
|
||||||
|
|
||||||
// UCBDeclaration {{{
|
// UCBDeclaration {{{
|
||||||
|
#define MAX_UNICODE (1114111u)
|
||||||
typedef enum UnicodeCategory {
|
typedef enum UnicodeCategory {
|
||||||
UC_Cn,
|
UC_Cn,
|
||||||
UC_Cc,
|
UC_Cc,
|
||||||
|
|||||||
20
tools/wcswidth/char-props-data.go
generated
20
tools/wcswidth/char-props-data.go
generated
@@ -1,5 +1,7 @@
|
|||||||
package wcswidth
|
package wcswidth
|
||||||
|
|
||||||
|
import "unsafe"
|
||||||
|
|
||||||
type GraphemeBreakProperty uint8
|
type GraphemeBreakProperty uint8
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -384,6 +386,15 @@ var charprops_t3 = [109]CharProps{
|
|||||||
((0 & 0b1) << 0) | ((CharProps(ICB_Extend) & 0b11) << 1) | ((CharProps(GBP_Extend) & 0b1111) << 3) | ((0 & 0b1) << 7) | ((0 & 0b1) << 8) | ((1 & 0b1) << 9) | ((0 & 0b1) << 10) | ((1 & 0b1) << 11) | ((0 & 0b1) << 12) | ((0 & 0b1) << 13) | ((CharProps(UC_Cf) & 0b11111) << 14) | ((0 & 0b1) << 19) | ((4 & 0b111) << 20), // 108
|
((0 & 0b1) << 0) | ((CharProps(ICB_Extend) & 0b11) << 1) | ((CharProps(GBP_Extend) & 0b1111) << 3) | ((0 & 0b1) << 7) | ((0 & 0b1) << 8) | ((1 & 0b1) << 9) | ((0 & 0b1) << 10) | ((1 & 0b1) << 11) | ((0 & 0b1) << 12) | ((0 & 0b1) << 13) | ((CharProps(UC_Cf) & 0b11111) << 14) | ((0 & 0b1) << 19) | ((4 & 0b111) << 20), // 108
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Array accessor function that avoids bounds checking
|
||||||
|
func CharPropsFor(x rune) CharProps {
|
||||||
|
x = max(0, min(x, 1114111))
|
||||||
|
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t1[0])) + uintptr(x>>charprops_shift)*1)))
|
||||||
|
t1_shifted := (t1 << charprops_shift) + (uintptr(x) & charprops_mask)
|
||||||
|
t2 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t2[0])) + t1_shifted*1)))
|
||||||
|
return *(*CharProps)(unsafe.Pointer(uintptr(unsafe.Pointer(&charprops_t3[0])) + t2*4))
|
||||||
|
}
|
||||||
|
|
||||||
const graphemesegmentationresult_mask = 15
|
const graphemesegmentationresult_mask = 15
|
||||||
const graphemesegmentationresult_shift = 4
|
const graphemesegmentationresult_shift = 4
|
||||||
|
|
||||||
@@ -1026,6 +1037,15 @@ var graphemesegmentationresult_t3 = [630]GraphemeSegmentationResult{
|
|||||||
((GraphemeSegmentationResult(GBP_ZWJ) & 0b1111) << 0) | ((1 & 0b1) << 4) | ((0 & 0b1) << 5) | ((1 & 0b1) << 6) | ((1 & 0b1) << 7) | ((1 & 0b1) << 8) | ((0 & 0b1) << 9), // 629
|
((GraphemeSegmentationResult(GBP_ZWJ) & 0b1111) << 0) | ((1 & 0b1) << 4) | ((0 & 0b1) << 5) | ((1 & 0b1) << 6) | ((1 & 0b1) << 7) | ((1 & 0b1) << 8) | ((0 & 0b1) << 9), // 629
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Array accessor function that avoids bounds checking
|
||||||
|
func GraphemeSegmentationResultFor(x uint16) GraphemeSegmentationResult {
|
||||||
|
|
||||||
|
t1 := uintptr(*(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t1[0])) + uintptr(x>>graphemesegmentationresult_shift)*1)))
|
||||||
|
t1_shifted := (t1 << graphemesegmentationresult_shift) + (uintptr(x) & graphemesegmentationresult_mask)
|
||||||
|
t2 := uintptr(*(*uint16)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t2[0])) + t1_shifted*2)))
|
||||||
|
return *(*GraphemeSegmentationResult)(unsafe.Pointer(uintptr(unsafe.Pointer(&graphemesegmentationresult_t3[0])) + t2*2))
|
||||||
|
}
|
||||||
|
|
||||||
func grapheme_segmentation_key(r GraphemeSegmentationResult, ch CharProps) uint16 {
|
func grapheme_segmentation_key(r GraphemeSegmentationResult, ch CharProps) uint16 {
|
||||||
return (r.State() << 7) | ch.GraphemeSegmentationProperty()
|
return (r.State() << 7) | ch.GraphemeSegmentationProperty()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,10 +7,6 @@ import (
|
|||||||
|
|
||||||
var _ = fmt.Print
|
var _ = fmt.Print
|
||||||
|
|
||||||
func CharPropsFor(ch rune) CharProps {
|
|
||||||
return charprops_t3[charprops_t2[(rune(charprops_t1[ch>>charprops_shift])<<charprops_shift)+(ch&charprops_mask)]]
|
|
||||||
}
|
|
||||||
|
|
||||||
func IteratorOverGraphemes(text string) iter.Seq[string] {
|
func IteratorOverGraphemes(text string) iter.Seq[string] {
|
||||||
var s GraphemeSegmentationResult
|
var s GraphemeSegmentationResult
|
||||||
start_pos := 0
|
start_pos := 0
|
||||||
@@ -43,11 +39,7 @@ func (s *GraphemeSegmentationResult) Reset() {
|
|||||||
|
|
||||||
func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult {
|
func (s GraphemeSegmentationResult) Step(ch CharProps) GraphemeSegmentationResult {
|
||||||
key := grapheme_segmentation_key(s, ch)
|
key := grapheme_segmentation_key(s, ch)
|
||||||
t1 := uint16(graphemesegmentationresult_t1[key>>graphemesegmentationresult_shift]) << graphemesegmentationresult_shift
|
return GraphemeSegmentationResultFor(key)
|
||||||
t2 := graphemesegmentationresult_t2[t1+key&graphemesegmentationresult_mask]
|
|
||||||
ans := graphemesegmentationresult_t3[t2]
|
|
||||||
// fmt.Printf("state: %d gsp: %d -> key: %d t1: %d -> add_to_cell: %d\n", s.State(), ch.GraphemeSegmentationProperty(), key, t1, ans.Add_to_current_cell())
|
|
||||||
return ans
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func Runewidth(code rune) int {
|
func Runewidth(code rune) int {
|
||||||
|
|||||||
Reference in New Issue
Block a user