From fd5876b94e9db7b7a805e37a5dbfc431a98b0efd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 21 Jul 2025 08:54:22 +0530 Subject: [PATCH] Use SIMD to replace C0 control codes in Go code --- kittens/diff/collect.go | 9 ++------ kittens/diff/highlight.go | 2 +- kittens/diff/render.go | 8 +++---- kittens/diff/ui.go | 2 +- kitty/data-types.h | 1 + kitty/logging.c | 13 +++++++----- tools/highlight/api.go | 28 ++---------------------- tools/simdstring/intrinsics_test.go | 11 +++++++--- tools/utils/misc.go | 33 +++++++++++++++++++++++++++++ tools/utils/strings_test.go | 14 ++++++++++++ 10 files changed, 74 insertions(+), 47 deletions(-) diff --git a/kittens/diff/collect.go b/kittens/diff/collect.go index 6ff75c079..eb168366f 100644 --- a/kittens/diff/collect.go +++ b/kittens/diff/collect.go @@ -9,10 +9,8 @@ import ( "os" "path/filepath" "strings" - "sync" "unicode/utf8" - "github.com/kovidgoyal/kitty/tools/highlight" "github.com/kovidgoyal/kitty/tools/utils" ) @@ -125,10 +123,7 @@ func text_to_lines(text string) []string { return lines } -var sanitize = sync.OnceValue(func() func(string) string { - s := highlight.NewSanitizeControlCodes(conf.Replace_tab_by) - return s.Sanitize -}) +func sanitize(text string) string { return utils.ReplaceControlCodes(text, conf.Replace_tab_by, "\n") } func lines_for_path(path string) ([]string, error) { return lines_cache.GetOrCreate(path, func(path string) ([]string, error) { @@ -136,7 +131,7 @@ func lines_for_path(path string) ([]string, error) { if err != nil { return nil, err } - return text_to_lines(sanitize()(ans)), nil + return text_to_lines(sanitize(ans)), nil }) } diff --git a/kittens/diff/highlight.go b/kittens/diff/highlight.go index cfa09a125..76add4d4b 100644 --- a/kittens/diff/highlight.go +++ b/kittens/diff/highlight.go @@ -26,7 +26,7 @@ func (s prefer_light_colors) SyntaxAliases() map[string]string { return c func (s prefer_light_colors) TextForPath(path string) (string, error) { return data_for_path(path) } var highlighter = sync.OnceValue(func() highlight.Highlighter { - return highlight.NewHighlighter(sanitize()) + return highlight.NewHighlighter(sanitize) }) func highlight_all(paths []string, light bool) { diff --git a/kittens/diff/render.go b/kittens/diff/render.go index 4979d9efe..c0d0dc5a5 100644 --- a/kittens/diff/render.go +++ b/kittens/diff/render.go @@ -284,10 +284,10 @@ func title_lines(left_path, right_path string, columns, margin_size int, ans []* } sl := ScreenLine{} if right_name != "" && right_name != left_name { - sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), available_cols) - sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize()(right_name), available_cols) + sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), available_cols) + sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize(right_name), available_cols) } else { - sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), columns-margin_size) + sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), columns-margin_size) ll.is_full_width = true } l2 := ll @@ -755,7 +755,7 @@ func rename_lines(path, other_path string, columns, margin_size int, ans []*Logi ll := LogicalLine{ left_reference: Reference{path: path}, right_reference: Reference{path: other_path}, line_type: CHANGE_LINE, is_change_start: true, is_full_width: true} - for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize()(path_name_map[path]), sanitize()(path_name_map[other_path])), columns-margin_size) { + for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize(path_name_map[path]), sanitize(path_name_map[other_path])), columns-margin_size) { sl := ScreenLine{} sl.right.marked_up_text = line ll.screen_lines = append(ll.screen_lines, &sl) diff --git a/kittens/diff/ui.go b/kittens/diff/ui.go index 800e694fe..4c0738c4e 100644 --- a/kittens/diff/ui.go +++ b/kittens/diff/ui.go @@ -446,7 +446,7 @@ func (self *Handler) draw_status_line() { if self.inputting_command { self.rl.RedrawNonAtomic() } else if self.statusline_message != "" { - self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize()(self.statusline_message), self.screen_size.columns))) + self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize(self.statusline_message), self.screen_size.columns))) } else { num := self.logical_lines.NumScreenLinesTo(self.scroll_pos) den := self.logical_lines.NumScreenLinesTo(self.max_scroll_pos) diff --git a/kitty/data-types.h b/kitty/data-types.h index 0db25a30f..d1c9e7a3b 100644 --- a/kitty/data-types.h +++ b/kitty/data-types.h @@ -48,6 +48,7 @@ #define zero_at_ptr(p) memset((p), 0, sizeof((p)[0])) #define literal_strlen(x) (sizeof(x)-1) #define zero_at_ptr_count(p, count) memset((p), 0, (count) * sizeof((p)[0])) +#define C0_EXCEPT_NL_SPACE_TAB_DEL 0x0 ... 0x8: case 0xb ... 0x1f #define C0_EXCEPT_NL_SPACE_TAB 0x0 ... 0x8: case 0xb ... 0x1f: case 0x7f void log_error(const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); #define fatal(...) { log_error(__VA_ARGS__); exit(EXIT_FAILURE); } diff --git a/kitty/logging.c b/kitty/logging.c index ef6d60b31..1f8567fb5 100644 --- a/kitty/logging.c +++ b/kitty/logging.c @@ -27,23 +27,26 @@ log_error(const char *fmt, ...) { va_end(ar); if (n < 0) return; size_t size = 5 * (size_t)n + 8; - RAII_ALLOC(char, arena, calloc(size, sizeof(char))); + RAII_ALLOC(unsigned char, arena, calloc(size, sizeof(char))); if (!arena) return; va_start(ar, fmt); - n = vsnprintf(arena, size, fmt, ar); + n = vsnprintf((char*)arena, size, fmt, ar); va_end(ar); - char *sanbuf = arena + n + 1; + unsigned char *sanbuf = arena + n + 1; char utf8buf[4]; START_ALLOW_CASE_RANGE size_t j = 0; - for (char *x = arena; x < arena + n; x++) { + for (unsigned char *x = arena; x < arena + n; x++) { switch(*x) { - case C0_EXCEPT_NL_SPACE_TAB: { + case C0_EXCEPT_NL_SPACE_TAB_DEL: { const uint32_t ch = 0x2400 + *x; const unsigned sz = encode_utf8(ch, utf8buf); for (unsigned c = 0; c < sz; c++, j++) sanbuf[j] = utf8buf[c]; } break; + case 0x7f: + sanbuf[j++] = 0xe2; sanbuf[j++] = 0x90; sanbuf[j++] = 0xa1; // U+2421 + break; default: sanbuf[j++] = *x; break; diff --git a/tools/highlight/api.go b/tools/highlight/api.go index 281e89688..54b4ded46 100644 --- a/tools/highlight/api.go +++ b/tools/highlight/api.go @@ -3,9 +3,9 @@ package highlight import ( "errors" "fmt" - "strings" "github.com/alecthomas/chroma/v2" + "github.com/kovidgoyal/kitty/tools/utils" ) var _ = fmt.Print @@ -19,29 +19,6 @@ type StyleResolveData interface { TextForPath(string) (string, error) } -type SanitizeControlCodes struct { - r *strings.Replacer -} - -func (s SanitizeControlCodes) Sanitize(x string) string { return s.r.Replace(x) } - -func NewSanitizeControlCodes(replace_tab_by string) *SanitizeControlCodes { - repls := make([]string, 0, 2*(0x1f+2+(0x9f-0x80+1))) - for i := range 0x1f + 1 { - var repl string - switch i { - case '\n', ' ': - repl = string(rune(i)) - case '\t': - repl = replace_tab_by - default: - repl = string(rune(0x2400 + i)) - } - repls = append(repls, string(rune(i)), repl) - } - return &SanitizeControlCodes{r: strings.NewReplacer(repls...)} -} - type Highlighter interface { HighlightFile(path string, srd StyleResolveData) (highlighted_string string, err error) Sanitize(string) string @@ -49,8 +26,7 @@ type Highlighter interface { func NewHighlighter(sanitize func(string) string) Highlighter { if sanitize == nil { - s := NewSanitizeControlCodes(" ") - sanitize = s.Sanitize + sanitize = func(text string) string { return utils.ReplaceControlCodes(text, " ", "\n") } } return &highlighter{sanitize: sanitize, tokens_map: make(map[string][]chroma.Token)} } diff --git a/tools/simdstring/intrinsics_test.go b/tools/simdstring/intrinsics_test.go index f68fccb41..b0eedef49 100644 --- a/tools/simdstring/intrinsics_test.go +++ b/tools/simdstring/intrinsics_test.go @@ -5,7 +5,6 @@ package simdstring import ( "bytes" "fmt" - "github.com/kovidgoyal/kitty/tools/utils" "runtime" "strings" "testing" @@ -120,13 +119,19 @@ func addressof_data(b []byte) uintptr { return uintptr(unsafe.Pointer(&b[0])) } +func memset(ans []byte, val byte) { + for i := range ans { + ans[i] = val + } +} + func aligned_slice(sz, alignment int) ([]byte, []byte) { ans := make([]byte, sz+alignment+512) a := addressof_data(ans) a &= uintptr(alignment - 1) extra := uintptr(alignment) - a - utils.Memset(ans, '<') - utils.Memset(ans[extra+uintptr(sz):], '>') + memset(ans, '<') + memset(ans[extra+uintptr(sz):], '>') return ans[extra : extra+uintptr(sz)], ans } diff --git a/tools/utils/misc.go b/tools/utils/misc.go index 0dbc411bf..0094ddfbf 100644 --- a/tools/utils/misc.go +++ b/tools/utils/misc.go @@ -14,6 +14,7 @@ import ( "strings" "sync" + "github.com/kovidgoyal/kitty/tools/simdstring" "golang.org/x/exp/constraints" "golang.org/x/text/language" ) @@ -423,3 +424,35 @@ var LanguageTag = sync.OnceValue(func() language.Tag { return tag }) + +// Replace control codes by unicode codepoints that describe the codes +// making the text safe to send to a terminal +func ReplaceControlCodes(text, replace_tab_by, replace_newline_by string) string { + buf := strings.Builder{} + for len(text) > 0 { + idx := simdstring.IndexC0String(text) + if idx < 0 { + if buf.Cap() == 0 { + return text + } + buf.WriteString(text) + break + } + if buf.Cap() == 0 { + buf.Grow(2 * len(text)) + } + buf.WriteString(text[:idx]) + switch text[idx] { + case '\n': + buf.WriteString(replace_newline_by) + case '\t': + buf.WriteString(replace_tab_by) + case 0x7f: + buf.WriteRune(0x2421) + default: + buf.WriteRune(0x2400 + rune(text[idx])) + } + text = text[idx+1:] + } + return buf.String() +} diff --git a/tools/utils/strings_test.go b/tools/utils/strings_test.go index 0c3851947..475b05133 100644 --- a/tools/utils/strings_test.go +++ b/tools/utils/strings_test.go @@ -34,3 +34,17 @@ func TestStringScanner(t *testing.T) { } } } + +func TestReplaceControlCodes(t *testing.T) { + for text, expected := range map[string]string{ + "none": "none", + "a\r\x01b\x03\x7f c\n\td": "a\u240d\u2401b\u2403\u2421 cX d", + "\x01": "\u2401", + "\x00\x0b": "\u2400\u240b", + } { + actual := ReplaceControlCodes(text, " ", "X") + if diff := cmp.Diff(expected, actual); diff != "" { + t.Fatalf("Failed for text: %#v\n%s", text, diff) + } + } +}