mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 22:28:24 +02:00
Port wcswidth to use grapheme segmentation
This commit is contained in:
@@ -7,76 +7,56 @@
|
|||||||
|
|
||||||
#include "char-props.h"
|
#include "char-props.h"
|
||||||
#include "wcswidth.h"
|
#include "wcswidth.h"
|
||||||
#include "unicode-data.h"
|
|
||||||
|
|
||||||
void
|
void
|
||||||
initialize_wcs_state(WCSState *state) {
|
initialize_wcs_state(WCSState *state) {
|
||||||
zero_at_ptr(state);
|
zero_at_ptr(state);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool
|
|
||||||
is_flag_pair(char_type a, char_type b) {
|
|
||||||
return is_flag_codepoint(a) && is_flag_codepoint(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool
|
|
||||||
is_emoji_presentation_base(char_type ch) {
|
|
||||||
return char_props_for(ch).is_emoji_presentation_base == 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
wcswidth_step(WCSState *state, const char_type ch) {
|
wcswidth_step(WCSState *state, const char_type ch) {
|
||||||
int ans = 0;
|
int ans = 0;
|
||||||
switch (state->parser_state) {
|
switch (state->parser_state) {
|
||||||
case IN_CSI: {
|
case IN_CSI: {
|
||||||
state->prev_width = 0;
|
state->prev_width = 0;
|
||||||
if (0x40 <= ch && ch <= 0x7e) state->parser_state = NORMAL;
|
if (0x40 <= ch && ch <= 0x7e) { state->parser_state = NORMAL; state->can_combine = false; }
|
||||||
} break;
|
} break;
|
||||||
case IN_ST_TERMINATED: {
|
case IN_ST_TERMINATED: {
|
||||||
state->prev_width = 0;
|
state->prev_width = 0;
|
||||||
if (ch == 0x9c || (ch == '\\' && state->prev_ch == 0x1b)) state->parser_state = NORMAL;
|
if (ch == '\a' || (ch == '\\' && state->prev_ch == 0x1b)) { state->parser_state = NORMAL; state->can_combine = false; }
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case FLAG_PAIR_STARTED: {
|
|
||||||
state->parser_state = NORMAL;
|
|
||||||
if (is_flag_pair(state->prev_ch, ch)) break;
|
|
||||||
} /* fallthrough */
|
|
||||||
|
|
||||||
case NORMAL: {
|
case NORMAL: {
|
||||||
switch(ch) {
|
CharProps cp = char_props_for(ch);
|
||||||
case 0x1b: {
|
state->seg = grapheme_segmentation_step(state->seg, cp);
|
||||||
|
if (state->seg.add_to_current_cell && state->can_combine) {
|
||||||
|
switch(ch) {
|
||||||
|
case 0xfe0f:
|
||||||
|
if (char_props_for(state->prev_ch).is_emoji_presentation_base && state->prev_width == 1) {
|
||||||
|
ans = 1; state->prev_width = 2;
|
||||||
|
} else state->prev_width = 0;
|
||||||
|
break;
|
||||||
|
case 0xfe0e:
|
||||||
|
if (char_props_for(state->prev_ch).is_emoji_presentation_base && state->prev_width == 2) {
|
||||||
|
ans = -1; state->prev_width = 1;
|
||||||
|
} else state->prev_width = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
int width = wcwidth_std(cp);
|
||||||
|
switch (width) {
|
||||||
|
case -1: case 0:
|
||||||
state->prev_width = 0;
|
state->prev_width = 0;
|
||||||
state->parser_state = IN_ESC;
|
if (ch == 0x1b) state->parser_state = IN_ESC;
|
||||||
} break;
|
break;
|
||||||
case 0xfe0f: {
|
case 2:
|
||||||
if (is_emoji_presentation_base(state->prev_ch) && state->prev_width == 1) {
|
state->prev_width = 2; break;
|
||||||
ans += 1;
|
default:
|
||||||
state->prev_width = 2;
|
state->prev_width = 1; break;
|
||||||
} else state->prev_width = 0;
|
}
|
||||||
} break;
|
ans = state->prev_width;
|
||||||
|
state->can_combine = true;
|
||||||
case 0xfe0e: {
|
|
||||||
if (is_emoji_presentation_base(state->prev_ch) && state->prev_width == 2) {
|
|
||||||
ans -= 1;
|
|
||||||
state->prev_width = 1;
|
|
||||||
} else state->prev_width = 0;
|
|
||||||
} break;
|
|
||||||
|
|
||||||
default: {
|
|
||||||
if (is_flag_codepoint(ch)) state->parser_state = FLAG_PAIR_STARTED;
|
|
||||||
int w = wcwidth_std(char_props_for(ch));
|
|
||||||
switch(w) {
|
|
||||||
case -1:
|
|
||||||
case 0:
|
|
||||||
state->prev_width = 0; break;
|
|
||||||
case 2:
|
|
||||||
state->prev_width = 2; break;
|
|
||||||
default:
|
|
||||||
state->prev_width = 1; break;
|
|
||||||
}
|
|
||||||
ans += state->prev_width;
|
|
||||||
} break;
|
|
||||||
} break; // switch(ch)
|
|
||||||
} break; // case NORMAL
|
} break; // case NORMAL
|
||||||
|
|
||||||
case IN_ESC:
|
case IN_ESC:
|
||||||
@@ -113,9 +93,7 @@ wcswidth_step(WCSState *state, const char_type ch) {
|
|||||||
case '~':
|
case '~':
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
state->prev_ch = 0x1b;
|
zero_at_ptr(state);
|
||||||
state->prev_width = 0;
|
|
||||||
state->parser_state = NORMAL;
|
|
||||||
return wcswidth_step(state, ch);
|
return wcswidth_step(state, ch);
|
||||||
} break;
|
} break;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,14 +6,16 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "data-types.h"
|
#include "char-props.h"
|
||||||
|
|
||||||
typedef enum {NORMAL, IN_ESC, IN_CSI, FLAG_PAIR_STARTED, IN_ST_TERMINATED} WCSParserState;
|
typedef enum {NORMAL, IN_ESC, IN_CSI, IN_ST_TERMINATED} WCSParserState;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char_type prev_ch;
|
char_type prev_ch;
|
||||||
int prev_width;
|
int prev_width;
|
||||||
WCSParserState parser_state;
|
WCSParserState parser_state;
|
||||||
|
bool can_combine;
|
||||||
|
GraphemeSegmentationResult seg;
|
||||||
} WCSState;
|
} WCSState;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -379,6 +379,7 @@ class TestDataTypes(BaseTest):
|
|||||||
def test_utils(self):
|
def test_utils(self):
|
||||||
def w(x):
|
def w(x):
|
||||||
return wcwidth(ord(x))
|
return wcwidth(ord(x))
|
||||||
|
self.ae(wcswidth('\x9c'), 0)
|
||||||
self.ae(wcswidth('a\033[2mb'), 2)
|
self.ae(wcswidth('a\033[2mb'), 2)
|
||||||
self.ae(wcswidth('\033a\033[2mb'), 2)
|
self.ae(wcswidth('\033a\033[2mb'), 2)
|
||||||
self.ae(wcswidth('a\033]8;id=moo;https://foo\033\\a'), 2)
|
self.ae(wcswidth('a\033]8;id=moo;https://foo\033\\a'), 2)
|
||||||
|
|||||||
@@ -12,21 +12,12 @@ import (
|
|||||||
|
|
||||||
var _ = fmt.Print
|
var _ = fmt.Print
|
||||||
|
|
||||||
func IsFlagCodepoint(ch rune) bool {
|
|
||||||
return 0x1F1E6 <= ch && ch <= 0x1F1FF
|
|
||||||
}
|
|
||||||
|
|
||||||
func IsFlagPair(a rune, b rune) bool {
|
|
||||||
return IsFlagCodepoint(a) && IsFlagCodepoint(b)
|
|
||||||
}
|
|
||||||
|
|
||||||
type ecparser_state uint8
|
|
||||||
|
|
||||||
type WCWidthIterator struct {
|
type WCWidthIterator struct {
|
||||||
prev_ch rune
|
prev_ch rune
|
||||||
prev_width, current_width int
|
prev_width, current_width int
|
||||||
|
seg GraphemeSegmentationResult
|
||||||
|
can_combine bool
|
||||||
parser EscapeCodeParser
|
parser EscapeCodeParser
|
||||||
state ecparser_state
|
|
||||||
rune_count uint
|
rune_count uint
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -34,6 +25,12 @@ func CreateWCWidthIterator() *WCWidthIterator {
|
|||||||
var ans WCWidthIterator
|
var ans WCWidthIterator
|
||||||
ans.parser.HandleRune = ans.handle_rune
|
ans.parser.HandleRune = ans.handle_rune
|
||||||
ans.parser.HandleCSI = ans.handle_csi
|
ans.parser.HandleCSI = ans.handle_csi
|
||||||
|
ans.parser.HandleOSC = ans.handle_st_terminated
|
||||||
|
ans.parser.HandleDCS = ans.handle_st_terminated
|
||||||
|
ans.parser.HandlePM = ans.handle_st_terminated
|
||||||
|
ans.parser.HandleSOS = ans.handle_st_terminated
|
||||||
|
ans.parser.HandleAPC = ans.handle_st_terminated
|
||||||
|
|
||||||
return &ans
|
return &ans
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -42,6 +39,8 @@ func (self *WCWidthIterator) Reset() {
|
|||||||
self.prev_width = 0
|
self.prev_width = 0
|
||||||
self.current_width = 0
|
self.current_width = 0
|
||||||
self.rune_count = 0
|
self.rune_count = 0
|
||||||
|
self.can_combine = false
|
||||||
|
self.seg = 0
|
||||||
self.parser.Reset()
|
self.parser.Reset()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,54 +57,47 @@ func (self *WCWidthIterator) handle_csi(csi []byte) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
self.can_combine = false
|
||||||
|
self.seg = 0
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self *WCWidthIterator) handle_st_terminated(data []byte) error {
|
||||||
|
self.can_combine = false
|
||||||
|
self.seg = 0
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (self *WCWidthIterator) handle_rune(ch rune) error {
|
func (self *WCWidthIterator) handle_rune(ch rune) error {
|
||||||
self.rune_count += 1
|
self.rune_count += 1
|
||||||
const (
|
cp := CharPropsFor(ch)
|
||||||
normal ecparser_state = 0
|
self.seg = self.seg.Step(cp)
|
||||||
flag_pair_started ecparser_state = 3
|
if self.can_combine && self.seg.Add_to_current_cell() == 1 {
|
||||||
)
|
|
||||||
switch self.state {
|
|
||||||
case flag_pair_started:
|
|
||||||
self.state = normal
|
|
||||||
if IsFlagPair(self.prev_ch, ch) {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
fallthrough
|
|
||||||
case normal:
|
|
||||||
switch ch {
|
switch ch {
|
||||||
case 0xfe0f:
|
case 0xfe0f:
|
||||||
if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 1 {
|
if CharPropsFor(self.prev_ch).Is_emoji_presentation_base() == 1 && self.prev_width == 1 {
|
||||||
self.current_width += 1
|
self.current_width += 1
|
||||||
self.prev_width = 2
|
self.prev_width = 2
|
||||||
} else {
|
|
||||||
self.prev_width = 0
|
|
||||||
}
|
}
|
||||||
case 0xfe0e:
|
case 0xfe0e:
|
||||||
if IsEmojiPresentationBase(self.prev_ch) && self.prev_width == 2 {
|
if CharPropsFor(self.prev_ch).Is_emoji_presentation_base() == 1 && self.prev_width == 2 {
|
||||||
self.current_width -= 1
|
self.current_width -= 1
|
||||||
self.prev_width = 1
|
self.prev_width = 1
|
||||||
} else {
|
|
||||||
self.prev_width = 0
|
|
||||||
}
|
}
|
||||||
default:
|
|
||||||
if IsFlagCodepoint(ch) {
|
|
||||||
self.state = flag_pair_started
|
|
||||||
}
|
|
||||||
w := Runewidth(ch)
|
|
||||||
switch w {
|
|
||||||
case -1:
|
|
||||||
case 0:
|
|
||||||
self.prev_width = 0
|
|
||||||
case 2:
|
|
||||||
self.prev_width = 2
|
|
||||||
default:
|
|
||||||
self.prev_width = 1
|
|
||||||
}
|
|
||||||
self.current_width += self.prev_width
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
width := cp.Width()
|
||||||
|
switch width {
|
||||||
|
case -1:
|
||||||
|
case 0:
|
||||||
|
self.prev_width = 0
|
||||||
|
case 2:
|
||||||
|
self.prev_width = 2
|
||||||
|
default:
|
||||||
|
self.prev_width = 1
|
||||||
|
}
|
||||||
|
self.current_width += self.prev_width
|
||||||
|
self.can_combine = true
|
||||||
}
|
}
|
||||||
self.prev_ch = ch
|
self.prev_ch = ch
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
Reference in New Issue
Block a user