mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 14:18:26 +02:00
More work on seg lookup tables
This commit is contained in:
@@ -447,7 +447,11 @@ def getsize(data: Iterable[int]) -> Literal[1, 2, 4]:
|
|||||||
return 4
|
return 4
|
||||||
|
|
||||||
|
|
||||||
def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift: int = 0) -> tuple[list[int], list[int], list[T], int, int, int]:
|
def mask_for(bits: int) -> int:
|
||||||
|
return ~((~0) << bits)
|
||||||
|
|
||||||
|
|
||||||
|
def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift: int = 0) -> tuple[list[int], list[int], list[T], int, int]:
|
||||||
if use_fixed_shift:
|
if use_fixed_shift:
|
||||||
candidates = range(use_fixed_shift, use_fixed_shift + 1)
|
candidates = range(use_fixed_shift, use_fixed_shift + 1)
|
||||||
else:
|
else:
|
||||||
@@ -487,8 +491,7 @@ def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift
|
|||||||
best = t1, t2, shift
|
best = t1, t2, shift
|
||||||
bytesz = b
|
bytesz = b
|
||||||
t1, t2, shift = best
|
t1, t2, shift = best
|
||||||
mask = ~((~0) << shift)
|
return t1, t2, t3, shift, bytesz
|
||||||
return t1, t2, t3, shift, mask, bytesz
|
|
||||||
|
|
||||||
|
|
||||||
class Property(Protocol):
|
class Property(Protocol):
|
||||||
@@ -507,9 +510,10 @@ def get_types(sz: int) -> tuple[str, str]:
|
|||||||
|
|
||||||
|
|
||||||
def gen_multistage_table(
|
def gen_multistage_table(
|
||||||
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, mask: int
|
c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int,
|
||||||
) -> None:
|
) -> None:
|
||||||
ctype_t1, gotype_t1 = get_types(getsize(t1))
|
ctype_t1, gotype_t1 = get_types(getsize(t1))
|
||||||
|
mask = mask_for(shift)
|
||||||
name = t3[0].__class__.__name__
|
name = t3[0].__class__.__name__
|
||||||
ctype_t2, gotype_t2 = get_types(getsize(tuple(range(len(t3)))))
|
ctype_t2, gotype_t2 = get_types(getsize(tuple(range(len(t3)))))
|
||||||
c(f'static const char_type {name}_mask = {mask}u;')
|
c(f'static const char_type {name}_mask = {mask}u;')
|
||||||
@@ -565,7 +569,7 @@ def bitfield_from_int(
|
|||||||
# first field is most significant, last field is least significant
|
# first field is most significant, last field is least significant
|
||||||
args: dict[str, str | bool] = {}
|
args: dict[str, str | bool] = {}
|
||||||
for f, shift in fields.items():
|
for f, shift in fields.items():
|
||||||
mask = ~((~0) << shift)
|
mask = mask_for(shift)
|
||||||
val = x & mask
|
val = x & mask
|
||||||
if shift == 1:
|
if shift == 1:
|
||||||
args[f] = bool(val)
|
args[f] = bool(val)
|
||||||
@@ -631,24 +635,37 @@ control_grapheme_breaks = 'CR', 'LF', 'Control'
|
|||||||
linker_or_extend = 'Linker', 'Extend'
|
linker_or_extend = 'Linker', 'Extend'
|
||||||
|
|
||||||
|
|
||||||
def bitfield_declaration_as_c(name: str, fields: dict[str, int]) -> str:
|
def bitfield_declaration_as_c(name: str, fields: dict[str, int], *alternate_fields: dict[str, int]) -> str:
|
||||||
bits = sum(fields.values())
|
# empty in MSB, then top to bottom with bottom at LSB
|
||||||
base_type = f'uint{clamped_bitsize(bits)}_t'
|
base_size = clamped_bitsize(sum(fields.values()))
|
||||||
ans = [f'// {name}Declaration', f'// Uses {bits} bits', f'typedef union {name} {{', ' struct {']
|
base_type = f'uint{base_size}_t'
|
||||||
ans.append('#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__')
|
ans = [f'// {name}Declaration: uses {sum(fields.values())} bits {{''{{', f'typedef union {name} {{']
|
||||||
|
def struct(fields: dict[str, int]) -> Iterator[str]:
|
||||||
|
if not fields:
|
||||||
|
return
|
||||||
|
empty = base_size - sum(fields.values())
|
||||||
|
yield ' struct __attribute__((packed)) {'
|
||||||
|
yield '#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__'
|
||||||
for f, width in reversed(fields.items()):
|
for f, width in reversed(fields.items()):
|
||||||
ans.append(f' uint8_t {f} : {width};')
|
yield f' uint{clamped_bitsize(width)}_t {f} : {width};'
|
||||||
ans.append('#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__')
|
if empty:
|
||||||
|
yield f' uint{clamped_bitsize(empty)}_t : {empty};'
|
||||||
|
yield '#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__'
|
||||||
|
if empty:
|
||||||
|
yield f' uint{clamped_bitsize(empty)}_t : {empty};'
|
||||||
for f, width in fields.items():
|
for f, width in fields.items():
|
||||||
ans.append(f' uint8_t {f} : {width};')
|
yield f' uint{clamped_bitsize(width)}_t {f} : {width};'
|
||||||
ans.append('#else')
|
yield '#else'
|
||||||
ans.append('#error "Unsupported endianness"')
|
yield '#error "Unsupported endianness"'
|
||||||
ans.append('#endif')
|
yield '#endif'
|
||||||
ans.append(' };')
|
yield ' };'
|
||||||
|
ans.extend(struct(fields))
|
||||||
|
for fields in alternate_fields:
|
||||||
|
ans.extend(struct(fields))
|
||||||
ans.append(f' {base_type} val;')
|
ans.append(f' {base_type} val;')
|
||||||
ans.append(f'}} {name};')
|
ans.append(f'}} {name};')
|
||||||
ans.append(f'static_assert(sizeof({name}) == sizeof({base_type}), "Fix the ordering of {name}");')
|
ans.append(f'static_assert(sizeof({name}) == sizeof({base_type}), "Fix the ordering of {name}");')
|
||||||
ans.append(f'// End{name}Declaration')
|
ans.append(f'// End{name}Declaration }}''}}')
|
||||||
return '\n'.join(ans)
|
return '\n'.join(ans)
|
||||||
|
|
||||||
|
|
||||||
@@ -779,20 +796,18 @@ class GraphemeSegmentationKey(NamedTuple):
|
|||||||
@classmethod
|
@classmethod
|
||||||
def from_int(cls, x: int) -> 'GraphemeSegmentationKey':
|
def from_int(cls, x: int) -> 'GraphemeSegmentationKey':
|
||||||
shift = cls.char.used_bits()
|
shift = cls.char.used_bits()
|
||||||
mask = ~((~0) << shift)
|
mask = mask_for(shift)
|
||||||
state = GraphemeSegmentationState.from_int(x >> shift)
|
state = GraphemeSegmentationState.from_int(x >> shift)
|
||||||
char = GraphemeSegmentationProps.from_int(x & mask)
|
char = GraphemeSegmentationProps.from_int(x & mask)
|
||||||
return GraphemeSegmentationKey(state, char)
|
return GraphemeSegmentationKey(state, char)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def as_int(cls, for_go: bool = False) -> str:
|
def as_int(cls, for_go: bool = False) -> str:
|
||||||
cp = bit_field_as_int(
|
lines = []
|
||||||
'CharProps', {f: int(CharProps._field_defaults[f]) for f in cls.char._fields}, function_name_suffix='_as_key', for_go=for_go)
|
|
||||||
lines = [cp, '']
|
|
||||||
shift = cls.char.used_bits()
|
shift = cls.char.used_bits()
|
||||||
base_type = f'uint{cls.state.bitsize()}_t'
|
base_type = f'uint{cls.state.bitsize()}_t'
|
||||||
lines.append(f'static inline {base_type} {cls.__name__}(GraphemeSegmentation state, CharProps ch)' '{')
|
lines.append(f'static inline {base_type} {cls.__name__}(GraphemeSegmentation state, CharProps ch)' '{')
|
||||||
lines.append(f'\treturn (state.val << {shift}) | (CharProps_as_key(ch)));')
|
lines.append(f'\treturn (state.val << {shift}) | ch.grapheme_segmentation_property;')
|
||||||
lines.append('}')
|
lines.append('}')
|
||||||
return '\n'.join(lines)
|
return '\n'.join(lines)
|
||||||
|
|
||||||
@@ -847,11 +862,8 @@ class GraphemeSegmentationResult(NamedTuple):
|
|||||||
class CharProps(NamedTuple):
|
class CharProps(NamedTuple):
|
||||||
|
|
||||||
width: int = 3
|
width: int = 3
|
||||||
is_extended_pictographic: bool = True
|
|
||||||
grapheme_break: str = '' # set at runtime
|
|
||||||
indic_conjunct_break: str = '' # set at runtime
|
|
||||||
category: str = '' # set at runtime
|
|
||||||
is_emoji: bool = True
|
is_emoji: bool = True
|
||||||
|
category: str = '' # set at runtime
|
||||||
is_emoji_presentation_base: bool = True
|
is_emoji_presentation_base: bool = True
|
||||||
|
|
||||||
# derived properties for fast lookup
|
# derived properties for fast lookup
|
||||||
@@ -862,6 +874,11 @@ class CharProps(NamedTuple):
|
|||||||
is_word_char: bool = True
|
is_word_char: bool = True
|
||||||
is_punctuation: bool = True
|
is_punctuation: bool = True
|
||||||
|
|
||||||
|
# needed for grapheme segmentation set as LSB bits for easy conversion to GraphemeSegmentationProps
|
||||||
|
grapheme_break: str = '' # set at runtime
|
||||||
|
indic_conjunct_break: str = '' # set at runtime
|
||||||
|
is_extended_pictographic: bool = True
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def bitsize(cls) -> int:
|
def bitsize(cls) -> int:
|
||||||
ans = sum(int(cls._field_defaults[f]) for f in cls._fields)
|
ans = sum(int(cls._field_defaults[f]) for f in cls._fields)
|
||||||
@@ -921,20 +938,15 @@ class CharProps(NamedTuple):
|
|||||||
return '{' + ', '.join(parts) + '}'
|
return '{' + ', '.join(parts) + '}'
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def c_declaration(cls) -> str:
|
def fields(cls) -> dict[str, int]:
|
||||||
base_type = f'uint{cls.bitsize()}_t'
|
return {'shifted_width' if f == 'width' else f: int(cls._field_defaults[f]) for f in cls._fields}
|
||||||
bits = sum(int(cls._field_defaults[f]) for f in cls._fields)
|
|
||||||
ans = ['// CharPropsDeclaration', f'// Uses {bits} bits', 'typedef union CharProps {', ' struct {']
|
|
||||||
|
|
||||||
for f in cls._fields:
|
@classmethod
|
||||||
n = 'shifted_width' if f == 'width' else f
|
def c_declaration(cls) -> str:
|
||||||
ans.append(f' uint8_t {n} : {int(cls._field_defaults[f])};')
|
alternate = {
|
||||||
ans.append(' };')
|
'grapheme_segmentation_property': sum(int(cls._field_defaults[f]) for f in GraphemeSegmentationProps._fields)
|
||||||
ans.append(f' {base_type} val;')
|
}
|
||||||
ans.append('} CharProps;')
|
return bitfield_declaration_as_c(cls.__name__, cls.fields(), alternate)
|
||||||
ans.append(f'static_assert(sizeof(CharProps) == sizeof({base_type}), "Fix the ordering of CharProps");')
|
|
||||||
ans.append('// EndCharPropsDeclaration')
|
|
||||||
return '\n'.join(ans)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_enum(p: Callable[..., None], gp: Callable[..., None], name: str, *items: str, prefix: str = '') -> None:
|
def generate_enum(p: Callable[..., None], gp: Callable[..., None], name: str, *items: str, prefix: str = '') -> None:
|
||||||
@@ -968,7 +980,7 @@ def top_level_category(q: str) -> set[int]:
|
|||||||
|
|
||||||
def patch_declaration(name: str, decl: str, raw: str) -> str:
|
def patch_declaration(name: str, decl: str, raw: str) -> str:
|
||||||
begin = f'// {name}Declaration'
|
begin = f'// {name}Declaration'
|
||||||
end = f'// End{name}Declaration'
|
end = f'// End{name}Declaration }}''}}'
|
||||||
return re.sub(rf'{begin}.+?{end}', decl.rstrip(), raw, flags=re.DOTALL)
|
return re.sub(rf'{begin}.+?{end}', decl.rstrip(), raw, flags=re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
@@ -1021,7 +1033,7 @@ def gen_char_props() -> None:
|
|||||||
grapheme_break=x.grapheme_break, indic_conjunct_break=x.indic_conjunct_break,
|
grapheme_break=x.grapheme_break, indic_conjunct_break=x.indic_conjunct_break,
|
||||||
is_extended_pictographic=x.is_extended_pictographic) for x in prop_array)
|
is_extended_pictographic=x.is_extended_pictographic) for x in prop_array)
|
||||||
test_grapheme_segmentation(gsprops)
|
test_grapheme_segmentation(gsprops)
|
||||||
t1, t2, t3, shift, mask, bytesz = splitbins(prop_array, CharProps.bitsize() // 8)
|
t1, t2, t3, shift, bytesz = splitbins(prop_array, CharProps.bitsize() // 8)
|
||||||
print(f'Size of character properties table: {bytesz/1024:.1f}KB')
|
print(f'Size of character properties table: {bytesz/1024:.1f}KB')
|
||||||
|
|
||||||
from .bitfields import make_bitfield
|
from .bitfields import make_bitfield
|
||||||
@@ -1041,7 +1053,7 @@ def gen_char_props() -> None:
|
|||||||
func (s CharProps) Width() int {{
|
func (s CharProps) Width() int {{
|
||||||
return int(s.Shifted_width()) - {width_shift}
|
return int(s.Shifted_width()) - {width_shift}
|
||||||
}}''')
|
}}''')
|
||||||
gen_multistage_table(c, gp, t1, t2, t3, shift, mask)
|
gen_multistage_table(c, gp, t1, t2, t3, shift)
|
||||||
gofmt(gof.name)
|
gofmt(gof.name)
|
||||||
with open('kitty/char-props.h', 'r+') as f:
|
with open('kitty/char-props.h', 'r+') as f:
|
||||||
raw = f.read()
|
raw = f.read()
|
||||||
|
|||||||
Reference in New Issue
Block a user