diff --git a/gen/wcwidth.py b/gen/wcwidth.py index b1bdb1b20..a91ceabd6 100755 --- a/gen/wcwidth.py +++ b/gen/wcwidth.py @@ -447,7 +447,11 @@ def getsize(data: Iterable[int]) -> Literal[1, 2, 4]: return 4 -def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift: int = 0) -> tuple[list[int], list[int], list[T], int, int, int]: +def mask_for(bits: int) -> int: + return ~((~0) << bits) + + +def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift: int = 0) -> tuple[list[int], list[int], list[T], int, int]: if use_fixed_shift: candidates = range(use_fixed_shift, use_fixed_shift + 1) else: @@ -487,8 +491,7 @@ def splitbins[T: Hashable](t: tuple[T, ...], property_size: int, use_fixed_shift best = t1, t2, shift bytesz = b t1, t2, shift = best - mask = ~((~0) << shift) - return t1, t2, t3, shift, mask, bytesz + return t1, t2, t3, shift, bytesz class Property(Protocol): @@ -507,9 +510,10 @@ def get_types(sz: int) -> tuple[str, str]: def gen_multistage_table( - c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, mask: int + c: Callable[..., None], g: Callable[..., None], t1: Sequence[int], t2: Sequence[int], t3: Sequence[Property], shift: int, ) -> None: ctype_t1, gotype_t1 = get_types(getsize(t1)) + mask = mask_for(shift) name = t3[0].__class__.__name__ ctype_t2, gotype_t2 = get_types(getsize(tuple(range(len(t3))))) c(f'static const char_type {name}_mask = {mask}u;') @@ -565,7 +569,7 @@ def bitfield_from_int( # first field is most significant, last field is least significant args: dict[str, str | bool] = {} for f, shift in fields.items(): - mask = ~((~0) << shift) + mask = mask_for(shift) val = x & mask if shift == 1: args[f] = bool(val) @@ -631,24 +635,37 @@ control_grapheme_breaks = 'CR', 'LF', 'Control' linker_or_extend = 'Linker', 'Extend' -def bitfield_declaration_as_c(name: str, fields: dict[str, int]) -> str: - bits = sum(fields.values()) - base_type = f'uint{clamped_bitsize(bits)}_t' - ans = [f'// {name}Declaration', f'// Uses {bits} bits', f'typedef union {name} {{', ' struct {'] - ans.append('#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__') - for f, width in reversed(fields.items()): - ans.append(f' uint8_t {f} : {width};') - ans.append('#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__') - for f, width in fields.items(): - ans.append(f' uint8_t {f} : {width};') - ans.append('#else') - ans.append('#error "Unsupported endianness"') - ans.append('#endif') - ans.append(' };') +def bitfield_declaration_as_c(name: str, fields: dict[str, int], *alternate_fields: dict[str, int]) -> str: + # empty in MSB, then top to bottom with bottom at LSB + base_size = clamped_bitsize(sum(fields.values())) + base_type = f'uint{base_size}_t' + ans = [f'// {name}Declaration: uses {sum(fields.values())} bits {{''{{', f'typedef union {name} {{'] + def struct(fields: dict[str, int]) -> Iterator[str]: + if not fields: + return + empty = base_size - sum(fields.values()) + yield ' struct __attribute__((packed)) {' + yield '#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__' + for f, width in reversed(fields.items()): + yield f' uint{clamped_bitsize(width)}_t {f} : {width};' + if empty: + yield f' uint{clamped_bitsize(empty)}_t : {empty};' + yield '#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__' + if empty: + yield f' uint{clamped_bitsize(empty)}_t : {empty};' + for f, width in fields.items(): + yield f' uint{clamped_bitsize(width)}_t {f} : {width};' + yield '#else' + yield '#error "Unsupported endianness"' + yield '#endif' + yield ' };' + ans.extend(struct(fields)) + for fields in alternate_fields: + ans.extend(struct(fields)) ans.append(f' {base_type} val;') ans.append(f'}} {name};') ans.append(f'static_assert(sizeof({name}) == sizeof({base_type}), "Fix the ordering of {name}");') - ans.append(f'// End{name}Declaration') + ans.append(f'// End{name}Declaration }}''}}') return '\n'.join(ans) @@ -779,20 +796,18 @@ class GraphemeSegmentationKey(NamedTuple): @classmethod def from_int(cls, x: int) -> 'GraphemeSegmentationKey': shift = cls.char.used_bits() - mask = ~((~0) << shift) + mask = mask_for(shift) state = GraphemeSegmentationState.from_int(x >> shift) char = GraphemeSegmentationProps.from_int(x & mask) return GraphemeSegmentationKey(state, char) @classmethod def as_int(cls, for_go: bool = False) -> str: - cp = bit_field_as_int( - 'CharProps', {f: int(CharProps._field_defaults[f]) for f in cls.char._fields}, function_name_suffix='_as_key', for_go=for_go) - lines = [cp, ''] + lines = [] shift = cls.char.used_bits() base_type = f'uint{cls.state.bitsize()}_t' lines.append(f'static inline {base_type} {cls.__name__}(GraphemeSegmentation state, CharProps ch)' '{') - lines.append(f'\treturn (state.val << {shift}) | (CharProps_as_key(ch)));') + lines.append(f'\treturn (state.val << {shift}) | ch.grapheme_segmentation_property;') lines.append('}') return '\n'.join(lines) @@ -847,11 +862,8 @@ class GraphemeSegmentationResult(NamedTuple): class CharProps(NamedTuple): width: int = 3 - is_extended_pictographic: bool = True - grapheme_break: str = '' # set at runtime - indic_conjunct_break: str = '' # set at runtime - category: str = '' # set at runtime is_emoji: bool = True + category: str = '' # set at runtime is_emoji_presentation_base: bool = True # derived properties for fast lookup @@ -862,6 +874,11 @@ class CharProps(NamedTuple): is_word_char: bool = True is_punctuation: bool = True + # needed for grapheme segmentation set as LSB bits for easy conversion to GraphemeSegmentationProps + grapheme_break: str = '' # set at runtime + indic_conjunct_break: str = '' # set at runtime + is_extended_pictographic: bool = True + @classmethod def bitsize(cls) -> int: ans = sum(int(cls._field_defaults[f]) for f in cls._fields) @@ -921,20 +938,15 @@ class CharProps(NamedTuple): return '{' + ', '.join(parts) + '}' @classmethod - def c_declaration(cls) -> str: - base_type = f'uint{cls.bitsize()}_t' - bits = sum(int(cls._field_defaults[f]) for f in cls._fields) - ans = ['// CharPropsDeclaration', f'// Uses {bits} bits', 'typedef union CharProps {', ' struct {'] + def fields(cls) -> dict[str, int]: + return {'shifted_width' if f == 'width' else f: int(cls._field_defaults[f]) for f in cls._fields} - for f in cls._fields: - n = 'shifted_width' if f == 'width' else f - ans.append(f' uint8_t {n} : {int(cls._field_defaults[f])};') - ans.append(' };') - ans.append(f' {base_type} val;') - ans.append('} CharProps;') - ans.append(f'static_assert(sizeof(CharProps) == sizeof({base_type}), "Fix the ordering of CharProps");') - ans.append('// EndCharPropsDeclaration') - return '\n'.join(ans) + @classmethod + def c_declaration(cls) -> str: + alternate = { + 'grapheme_segmentation_property': sum(int(cls._field_defaults[f]) for f in GraphemeSegmentationProps._fields) + } + return bitfield_declaration_as_c(cls.__name__, cls.fields(), alternate) def generate_enum(p: Callable[..., None], gp: Callable[..., None], name: str, *items: str, prefix: str = '') -> None: @@ -968,7 +980,7 @@ def top_level_category(q: str) -> set[int]: def patch_declaration(name: str, decl: str, raw: str) -> str: begin = f'// {name}Declaration' - end = f'// End{name}Declaration' + end = f'// End{name}Declaration }}''}}' return re.sub(rf'{begin}.+?{end}', decl.rstrip(), raw, flags=re.DOTALL) @@ -1021,7 +1033,7 @@ def gen_char_props() -> None: grapheme_break=x.grapheme_break, indic_conjunct_break=x.indic_conjunct_break, is_extended_pictographic=x.is_extended_pictographic) for x in prop_array) test_grapheme_segmentation(gsprops) - t1, t2, t3, shift, mask, bytesz = splitbins(prop_array, CharProps.bitsize() // 8) + t1, t2, t3, shift, bytesz = splitbins(prop_array, CharProps.bitsize() // 8) print(f'Size of character properties table: {bytesz/1024:.1f}KB') from .bitfields import make_bitfield @@ -1041,7 +1053,7 @@ def gen_char_props() -> None: func (s CharProps) Width() int {{ return int(s.Shifted_width()) - {width_shift} }}''') - gen_multistage_table(c, gp, t1, t2, t3, shift, mask) + gen_multistage_table(c, gp, t1, t2, t3, shift) gofmt(gof.name) with open('kitty/char-props.h', 'r+') as f: raw = f.read()