mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-06 01:05:48 +02:00
Implement grapheme seg algo in Go
This commit is contained in:
@@ -675,8 +675,8 @@ def gen_multistage_table(
|
||||
case 4:
|
||||
ctype = 'uint32_t'
|
||||
gotype = 'uint32'
|
||||
c(f'static const unsigned {name}_mask = {mask}u;')
|
||||
c(f'static const unsigned {name}_shift = {shift}u;')
|
||||
c(f'static const char_type {name}_mask = {mask}u;')
|
||||
c(f'static const char_type {name}_shift = {shift}u;')
|
||||
c(f'static const {ctype} {name}_t1[{len(t1)}] = ''{')
|
||||
c(f'\t{", ".join(map(str, t1))}')
|
||||
c('};')
|
||||
@@ -807,6 +807,10 @@ def gen_char_props() -> None:
|
||||
generate_enum(c, gp, 'IndicConjunctBreak', 'None', *incb_map, prefix='ICB_')
|
||||
bf = make_bitfield('tools/wcswidth', 'CharProps', *CharProps().go_fields, add_package=False)[1]
|
||||
gp(bf)
|
||||
gp(f'''
|
||||
func (s CharProps) Width() int {{
|
||||
return int(s.Shifted_width()) - {width_shift}
|
||||
}}''')
|
||||
gen_multistage_table(c, gp, t1, t2, shift, mask)
|
||||
gofmt(gof.name)
|
||||
|
||||
|
||||
4
kitty/char-props-data.h
generated
4
kitty/char-props-data.h
generated
File diff suppressed because one or more lines are too long
4
tools/wcswidth/char-props-data.go
generated
4
tools/wcswidth/char-props-data.go
generated
@@ -86,6 +86,10 @@ func (s *CharProps) Set_shifted_width(val uint8) {
|
||||
*s |= CharProps(val&0b111) << 7
|
||||
}
|
||||
|
||||
func (s CharProps) Width() int {
|
||||
return int(s.Shifted_width()) - 4
|
||||
}
|
||||
|
||||
const charprops_mask = 127
|
||||
const charprops_shift = 7
|
||||
|
||||
|
||||
101
tools/wcswidth/char-props.go
Normal file
101
tools/wcswidth/char-props.go
Normal file
@@ -0,0 +1,101 @@
|
||||
package wcswidth
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
var _ = fmt.Print
|
||||
|
||||
type GraphemeSegmentationState struct {
|
||||
last_char_prop GraphemeBreakProperty
|
||||
|
||||
/* True if the last character ends a sequence of Indic_Conjunct_Break
|
||||
values: consonant {extend|linker}* */
|
||||
incb_consonant_extended bool
|
||||
/* True if the last character ends a sequence of Indic_Conjunct_Break
|
||||
values: consonant {extend|linker}* linker */
|
||||
incb_consonant_extended_linker bool
|
||||
/* True if the last character ends a sequence of Indic_Conjunct_Break
|
||||
values: consonant {extend|linker}* linker {extend|linker}* */
|
||||
incb_consonant_extended_linker_extended bool
|
||||
|
||||
/* True if the last character ends an emoji modifier sequence
|
||||
\p{Extended_Pictographic} Extend*. */
|
||||
emoji_modifier_sequence bool
|
||||
/* True if the last character was immediately preceded by an
|
||||
emoji modifier sequence \p{Extended_Pictographic} Extend*. */
|
||||
emoji_modifier_sequence_before_last_char bool
|
||||
|
||||
/* Number of consecutive regional indicator (RI) characters seen
|
||||
immediately before the current point. */
|
||||
ri_count uint
|
||||
}
|
||||
|
||||
func Char_props_for(ch rune) CharProps {
|
||||
return charprops_t2[(rune(charprops_t1[ch>>charprops_shift])<<charprops_shift)+(ch&charprops_mask)]
|
||||
}
|
||||
|
||||
func (i IndicConjunctBreak) is_linker_or_extend() bool {
|
||||
return i == ICB_Linker || i == ICB_Extend
|
||||
}
|
||||
|
||||
func (s *GraphemeSegmentationState) Step(ch CharProps) bool {
|
||||
// Grapheme segmentation as per UAX29-C1-1 as defined in https://www.unicode.org/reports/tr29/
|
||||
// Returns true iff ch should be added to the current cell based on s which
|
||||
// must reflect the state of the current cell. s is updated by ch.
|
||||
prop := GraphemeBreakProperty(ch.Grapheme_break())
|
||||
incb := IndicConjunctBreak(ch.Indic_conjunct_break())
|
||||
add_to_cell := false
|
||||
if s.last_char_prop == GBP_AtStart {
|
||||
add_to_cell = true
|
||||
} else {
|
||||
/* No break between CR and LF (GB3). */
|
||||
if s.last_char_prop == GBP_CR && prop == GBP_LF {
|
||||
add_to_cell = true
|
||||
} else if
|
||||
/* Break before and after newlines (GB4, GB5). */
|
||||
(s.last_char_prop == GBP_CR || s.last_char_prop == GBP_LF || s.last_char_prop == GBP_Control) ||
|
||||
(prop == GBP_CR || prop == GBP_LF || prop == GBP_Control) {
|
||||
} else if
|
||||
/* No break between Hangul syllable sequences (GB6, GB7, GB8). */
|
||||
(s.last_char_prop == GBP_L && (prop == GBP_L || prop == GBP_V || prop == GBP_LV || prop == GBP_LVT)) ||
|
||||
((s.last_char_prop == GBP_LV || s.last_char_prop == GBP_V) && (prop == GBP_V || prop == GBP_T)) ||
|
||||
((s.last_char_prop == GBP_LVT || s.last_char_prop == GBP_T) && prop == GBP_T) {
|
||||
add_to_cell = true
|
||||
} else if
|
||||
/* No break before: extending characters or ZWJ (GB9), SpacingMarks (GB9a), Prepend characters (GB9b) */
|
||||
prop == GBP_Extend || prop == GBP_ZWJ || prop == GBP_SpacingMark || s.last_char_prop == GBP_Prepend {
|
||||
add_to_cell = true
|
||||
} else if
|
||||
/* No break within certain combinations of Indic_Conjunct_Break values:
|
||||
* Between consonant {extend|linker}* linker {extend|linker}* and consonant (GB9c). */
|
||||
s.incb_consonant_extended_linker_extended && incb == ICB_Consonant {
|
||||
add_to_cell = true
|
||||
} else if
|
||||
/* No break within emoji modifier sequences or emoji zwj sequences (GB11). */
|
||||
s.last_char_prop == GBP_ZWJ && s.emoji_modifier_sequence_before_last_char && (ch.Is_extended_pictographic() == 1) {
|
||||
add_to_cell = true
|
||||
} else if
|
||||
/* No break between RI if there is an odd number of RI characters before (GB12, GB13). */
|
||||
prop == GBP_Regional_Indicator && (s.ri_count%2) != 0 {
|
||||
add_to_cell = true
|
||||
} else
|
||||
/* Break everywhere else */
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
s.incb_consonant_extended_linker = s.incb_consonant_extended && incb == ICB_Linker
|
||||
s.incb_consonant_extended_linker_extended = (s.incb_consonant_extended_linker || (s.incb_consonant_extended_linker_extended && incb.is_linker_or_extend()))
|
||||
s.incb_consonant_extended = (incb == ICB_Consonant || (s.incb_consonant_extended && incb.is_linker_or_extend()))
|
||||
s.emoji_modifier_sequence_before_last_char = s.emoji_modifier_sequence
|
||||
s.emoji_modifier_sequence = (s.emoji_modifier_sequence && prop == GBP_Extend) || (ch.Is_extended_pictographic() == 1)
|
||||
s.last_char_prop = prop
|
||||
|
||||
if prop == GBP_Regional_Indicator {
|
||||
s.ri_count++
|
||||
} else {
|
||||
s.ri_count = 0
|
||||
}
|
||||
return add_to_cell
|
||||
}
|
||||
Reference in New Issue
Block a user