Use SIMD to replace C0 control codes in Go code

This commit is contained in:
Kovid Goyal
2025-07-21 08:54:22 +05:30
parent 12c1b0cbdf
commit fd5876b94e
10 changed files with 74 additions and 47 deletions

View File

@@ -9,10 +9,8 @@ import (
"os"
"path/filepath"
"strings"
"sync"
"unicode/utf8"
"github.com/kovidgoyal/kitty/tools/highlight"
"github.com/kovidgoyal/kitty/tools/utils"
)
@@ -125,10 +123,7 @@ func text_to_lines(text string) []string {
return lines
}
var sanitize = sync.OnceValue(func() func(string) string {
s := highlight.NewSanitizeControlCodes(conf.Replace_tab_by)
return s.Sanitize
})
func sanitize(text string) string { return utils.ReplaceControlCodes(text, conf.Replace_tab_by, "\n") }
func lines_for_path(path string) ([]string, error) {
return lines_cache.GetOrCreate(path, func(path string) ([]string, error) {
@@ -136,7 +131,7 @@ func lines_for_path(path string) ([]string, error) {
if err != nil {
return nil, err
}
return text_to_lines(sanitize()(ans)), nil
return text_to_lines(sanitize(ans)), nil
})
}

View File

@@ -26,7 +26,7 @@ func (s prefer_light_colors) SyntaxAliases() map[string]string { return c
func (s prefer_light_colors) TextForPath(path string) (string, error) { return data_for_path(path) }
var highlighter = sync.OnceValue(func() highlight.Highlighter {
return highlight.NewHighlighter(sanitize())
return highlight.NewHighlighter(sanitize)
})
func highlight_all(paths []string, light bool) {

View File

@@ -284,10 +284,10 @@ func title_lines(left_path, right_path string, columns, margin_size int, ans []*
}
sl := ScreenLine{}
if right_name != "" && right_name != left_name {
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), available_cols)
sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize()(right_name), available_cols)
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), available_cols)
sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize(right_name), available_cols)
} else {
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), columns-margin_size)
sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), columns-margin_size)
ll.is_full_width = true
}
l2 := ll
@@ -755,7 +755,7 @@ func rename_lines(path, other_path string, columns, margin_size int, ans []*Logi
ll := LogicalLine{
left_reference: Reference{path: path}, right_reference: Reference{path: other_path},
line_type: CHANGE_LINE, is_change_start: true, is_full_width: true}
for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize()(path_name_map[path]), sanitize()(path_name_map[other_path])), columns-margin_size) {
for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize(path_name_map[path]), sanitize(path_name_map[other_path])), columns-margin_size) {
sl := ScreenLine{}
sl.right.marked_up_text = line
ll.screen_lines = append(ll.screen_lines, &sl)

View File

@@ -446,7 +446,7 @@ func (self *Handler) draw_status_line() {
if self.inputting_command {
self.rl.RedrawNonAtomic()
} else if self.statusline_message != "" {
self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize()(self.statusline_message), self.screen_size.columns)))
self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize(self.statusline_message), self.screen_size.columns)))
} else {
num := self.logical_lines.NumScreenLinesTo(self.scroll_pos)
den := self.logical_lines.NumScreenLinesTo(self.max_scroll_pos)

View File

@@ -48,6 +48,7 @@
#define zero_at_ptr(p) memset((p), 0, sizeof((p)[0]))
#define literal_strlen(x) (sizeof(x)-1)
#define zero_at_ptr_count(p, count) memset((p), 0, (count) * sizeof((p)[0]))
#define C0_EXCEPT_NL_SPACE_TAB_DEL 0x0 ... 0x8: case 0xb ... 0x1f
#define C0_EXCEPT_NL_SPACE_TAB 0x0 ... 0x8: case 0xb ... 0x1f: case 0x7f
void log_error(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
#define fatal(...) { log_error(__VA_ARGS__); exit(EXIT_FAILURE); }

View File

@@ -27,23 +27,26 @@ log_error(const char *fmt, ...) {
va_end(ar);
if (n < 0) return;
size_t size = 5 * (size_t)n + 8;
RAII_ALLOC(char, arena, calloc(size, sizeof(char)));
RAII_ALLOC(unsigned char, arena, calloc(size, sizeof(char)));
if (!arena) return;
va_start(ar, fmt);
n = vsnprintf(arena, size, fmt, ar);
n = vsnprintf((char*)arena, size, fmt, ar);
va_end(ar);
char *sanbuf = arena + n + 1;
unsigned char *sanbuf = arena + n + 1;
char utf8buf[4];
START_ALLOW_CASE_RANGE
size_t j = 0;
for (char *x = arena; x < arena + n; x++) {
for (unsigned char *x = arena; x < arena + n; x++) {
switch(*x) {
case C0_EXCEPT_NL_SPACE_TAB: {
case C0_EXCEPT_NL_SPACE_TAB_DEL: {
const uint32_t ch = 0x2400 + *x;
const unsigned sz = encode_utf8(ch, utf8buf);
for (unsigned c = 0; c < sz; c++, j++) sanbuf[j] = utf8buf[c];
} break;
case 0x7f:
sanbuf[j++] = 0xe2; sanbuf[j++] = 0x90; sanbuf[j++] = 0xa1; // U+2421
break;
default:
sanbuf[j++] = *x;
break;

View File

@@ -3,9 +3,9 @@ package highlight
import (
"errors"
"fmt"
"strings"
"github.com/alecthomas/chroma/v2"
"github.com/kovidgoyal/kitty/tools/utils"
)
var _ = fmt.Print
@@ -19,29 +19,6 @@ type StyleResolveData interface {
TextForPath(string) (string, error)
}
type SanitizeControlCodes struct {
r *strings.Replacer
}
func (s SanitizeControlCodes) Sanitize(x string) string { return s.r.Replace(x) }
func NewSanitizeControlCodes(replace_tab_by string) *SanitizeControlCodes {
repls := make([]string, 0, 2*(0x1f+2+(0x9f-0x80+1)))
for i := range 0x1f + 1 {
var repl string
switch i {
case '\n', ' ':
repl = string(rune(i))
case '\t':
repl = replace_tab_by
default:
repl = string(rune(0x2400 + i))
}
repls = append(repls, string(rune(i)), repl)
}
return &SanitizeControlCodes{r: strings.NewReplacer(repls...)}
}
type Highlighter interface {
HighlightFile(path string, srd StyleResolveData) (highlighted_string string, err error)
Sanitize(string) string
@@ -49,8 +26,7 @@ type Highlighter interface {
func NewHighlighter(sanitize func(string) string) Highlighter {
if sanitize == nil {
s := NewSanitizeControlCodes(" ")
sanitize = s.Sanitize
sanitize = func(text string) string { return utils.ReplaceControlCodes(text, " ", "\n") }
}
return &highlighter{sanitize: sanitize, tokens_map: make(map[string][]chroma.Token)}
}

View File

@@ -5,7 +5,6 @@ package simdstring
import (
"bytes"
"fmt"
"github.com/kovidgoyal/kitty/tools/utils"
"runtime"
"strings"
"testing"
@@ -120,13 +119,19 @@ func addressof_data(b []byte) uintptr {
return uintptr(unsafe.Pointer(&b[0]))
}
func memset(ans []byte, val byte) {
for i := range ans {
ans[i] = val
}
}
func aligned_slice(sz, alignment int) ([]byte, []byte) {
ans := make([]byte, sz+alignment+512)
a := addressof_data(ans)
a &= uintptr(alignment - 1)
extra := uintptr(alignment) - a
utils.Memset(ans, '<')
utils.Memset(ans[extra+uintptr(sz):], '>')
memset(ans, '<')
memset(ans[extra+uintptr(sz):], '>')
return ans[extra : extra+uintptr(sz)], ans
}

View File

@@ -14,6 +14,7 @@ import (
"strings"
"sync"
"github.com/kovidgoyal/kitty/tools/simdstring"
"golang.org/x/exp/constraints"
"golang.org/x/text/language"
)
@@ -423,3 +424,35 @@ var LanguageTag = sync.OnceValue(func() language.Tag {
return tag
})
// Replace control codes by unicode codepoints that describe the codes
// making the text safe to send to a terminal
func ReplaceControlCodes(text, replace_tab_by, replace_newline_by string) string {
buf := strings.Builder{}
for len(text) > 0 {
idx := simdstring.IndexC0String(text)
if idx < 0 {
if buf.Cap() == 0 {
return text
}
buf.WriteString(text)
break
}
if buf.Cap() == 0 {
buf.Grow(2 * len(text))
}
buf.WriteString(text[:idx])
switch text[idx] {
case '\n':
buf.WriteString(replace_newline_by)
case '\t':
buf.WriteString(replace_tab_by)
case 0x7f:
buf.WriteRune(0x2421)
default:
buf.WriteRune(0x2400 + rune(text[idx]))
}
text = text[idx+1:]
}
return buf.String()
}

View File

@@ -34,3 +34,17 @@ func TestStringScanner(t *testing.T) {
}
}
}
func TestReplaceControlCodes(t *testing.T) {
for text, expected := range map[string]string{
"none": "none",
"a\r\x01b\x03\x7f c\n\td": "a\u240d\u2401b\u2403\u2421 cX d",
"\x01": "\u2401",
"\x00\x0b": "\u2400\u240b",
} {
actual := ReplaceControlCodes(text, " ", "X")
if diff := cmp.Diff(expected, actual); diff != "" {
t.Fatalf("Failed for text: %#v\n%s", text, diff)
}
}
}