From 28fce006d6c0489bb7e3c93a7351fa2d61f6674c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 20 Jul 2025 15:37:03 +0530 Subject: [PATCH] Make highlight code fully re-useable --- kittens/diff/collect.go | 23 ++-- kittens/diff/highlight.go | 240 ++------------------------------------ kittens/diff/render.go | 8 +- kittens/diff/ui.go | 2 +- tools/highlight/api.go | 55 +++++++++ tools/highlight/impl.go | 228 ++++++++++++++++++++++++++++++++++++ 6 files changed, 304 insertions(+), 252 deletions(-) create mode 100644 tools/highlight/api.go create mode 100644 tools/highlight/impl.go diff --git a/kittens/diff/collect.go b/kittens/diff/collect.go index 33a337c6a..6ff75c079 100644 --- a/kittens/diff/collect.go +++ b/kittens/diff/collect.go @@ -9,8 +9,10 @@ import ( "os" "path/filepath" "strings" + "sync" "unicode/utf8" + "github.com/kovidgoyal/kitty/tools/highlight" "github.com/kovidgoyal/kitty/tools/utils" ) @@ -117,33 +119,24 @@ func hash_for_path(path string) (string, error) { } -// Remove all control codes except newlines -func sanitize_control_codes(x string) string { - pat := utils.MustCompile("[\x00-\x09\x0b-\x1f\x7f\u0080-\u009f]") - return pat.ReplaceAllLiteralString(x, "░") -} - -func sanitize_tabs_and_carriage_returns(x string) string { - return strings.NewReplacer("\t", conf.Replace_tab_by, "\r", "⏎").Replace(x) -} - -func sanitize(x string) string { - return sanitize_control_codes(sanitize_tabs_and_carriage_returns(x)) -} - func text_to_lines(text string) []string { lines := make([]string, 0, 512) splitlines_like_git(text, false, func(line string) { lines = append(lines, line) }) return lines } +var sanitize = sync.OnceValue(func() func(string) string { + s := highlight.NewSanitizeControlCodes(conf.Replace_tab_by) + return s.Sanitize +}) + func lines_for_path(path string) ([]string, error) { return lines_cache.GetOrCreate(path, func(path string) ([]string, error) { ans, err := data_for_path(path) if err != nil { return nil, err } - return text_to_lines(sanitize(ans)), nil + return text_to_lines(sanitize()(ans)), nil }) } diff --git a/kittens/diff/highlight.go b/kittens/diff/highlight.go index e146d8463..cfa09a125 100644 --- a/kittens/diff/highlight.go +++ b/kittens/diff/highlight.go @@ -3,255 +3,31 @@ package diff import ( - "errors" "fmt" - "io" "os" - "path/filepath" - "strings" "sync" + "github.com/kovidgoyal/kitty/tools/highlight" "github.com/kovidgoyal/kitty/tools/utils" "github.com/kovidgoyal/kitty/tools/utils/images" - - "github.com/alecthomas/chroma/v2" - "github.com/alecthomas/chroma/v2/lexers" - "github.com/alecthomas/chroma/v2/styles" ) var _ = fmt.Print var _ = os.WriteFile -var ErrNoLexer = errors.New("No lexer available for this format") -var DefaultStyle = sync.OnceValue(func() *chroma.Style { - // Default style generated by python style.py default pygments.styles.default.DefaultStyle - // with https://raw.githubusercontent.com/alecthomas/chroma/master/_tools/style.py - return styles.Register(chroma.MustNewStyle("default", chroma.StyleEntries{ - chroma.TextWhitespace: "#bbbbbb", - chroma.Comment: "italic #3D7B7B", - chroma.CommentPreproc: "noitalic #9C6500", - chroma.Keyword: "bold #008000", - chroma.KeywordPseudo: "nobold", - chroma.KeywordType: "nobold #B00040", - chroma.Operator: "#666666", - chroma.OperatorWord: "bold #AA22FF", - chroma.NameBuiltin: "#008000", - chroma.NameFunction: "#0000FF", - chroma.NameClass: "bold #0000FF", - chroma.NameNamespace: "bold #0000FF", - chroma.NameException: "bold #CB3F38", - chroma.NameVariable: "#19177C", - chroma.NameConstant: "#880000", - chroma.NameLabel: "#767600", - chroma.NameEntity: "bold #717171", - chroma.NameAttribute: "#687822", - chroma.NameTag: "bold #008000", - chroma.NameDecorator: "#AA22FF", - chroma.LiteralString: "#BA2121", - chroma.LiteralStringDoc: "italic", - chroma.LiteralStringInterpol: "bold #A45A77", - chroma.LiteralStringEscape: "bold #AA5D1F", - chroma.LiteralStringRegex: "#A45A77", - chroma.LiteralStringSymbol: "#19177C", - chroma.LiteralStringOther: "#008000", - chroma.LiteralNumber: "#666666", - chroma.GenericHeading: "bold #000080", - chroma.GenericSubheading: "bold #800080", - chroma.GenericDeleted: "#A00000", - chroma.GenericInserted: "#008400", - chroma.GenericError: "#E40000", - chroma.GenericEmph: "italic", - chroma.GenericStrong: "bold", - chroma.GenericPrompt: "bold #000080", - chroma.GenericOutput: "#717171", - chroma.GenericTraceback: "#04D", - chroma.Error: "border:#FF0000", - chroma.Background: " bg:#f8f8f8", - })) -}) - -// Clear the background colour. -func clear_background(style *chroma.Style) *chroma.Style { - builder := style.Builder() - bg := builder.Get(chroma.Background) - bg.Background = 0 - bg.NoInherit = true - builder.AddEntry(chroma.Background, bg) - style, _ = builder.Build() - return style -} - -func ansi_formatter(w io.Writer, style *chroma.Style, it chroma.Iterator) (err error) { - const SGR_PREFIX = "\033[" - const SGR_SUFFIX = "m" - style = clear_background(style) - before, after := make([]byte, 0, 64), make([]byte, 0, 64) - nl := []byte{'\n'} - write_sgr := func(which []byte) (err error) { - if len(which) > 1 { - if _, err = w.Write(utils.UnsafeStringToBytes(SGR_PREFIX)); err != nil { - return err - } - if _, err = w.Write(which[:len(which)-1]); err != nil { - return err - } - if _, err = w.Write(utils.UnsafeStringToBytes(SGR_SUFFIX)); err != nil { - return err - } - } - return - } - write := func(text string) (err error) { - if err = write_sgr(before); err != nil { - return err - } - if _, err = w.Write(utils.UnsafeStringToBytes(text)); err != nil { - return err - } - if err = write_sgr(after); err != nil { - return err - } - return - } - - for token := it(); token != chroma.EOF; token = it() { - entry := style.Get(token.Type) - before, after = before[:0], after[:0] - if !entry.IsZero() { - if entry.Bold == chroma.Yes { - before = append(before, '1', ';') - after = append(after, '2', '2', '1', ';') - } - if entry.Underline == chroma.Yes { - before = append(before, '4', ';') - after = append(after, '2', '4', ';') - } - if entry.Italic == chroma.Yes { - before = append(before, '3', ';') - after = append(after, '2', '3', ';') - } - if entry.Colour.IsSet() { - before = append(before, fmt.Sprintf("38:2:%d:%d:%d;", entry.Colour.Red(), entry.Colour.Green(), entry.Colour.Blue())...) - after = append(after, '3', '9', ';') - } - } - // independently format each line in a multiline token, needed for the diff kitten highlighting to work, also - // pagers like less reset SGR formatting at line boundaries - text := sanitize(token.Value) - for text != "" { - idx := strings.IndexByte(text, '\n') - if idx < 0 { - if err = write(text); err != nil { - return err - } - break - } - if err = write(text[:idx]); err != nil { - return err - } - if _, err = w.Write(nl); err != nil { - return err - } - text = text[idx+1:] - } - } - return nil -} - type prefer_light_colors bool func (s prefer_light_colors) StyleName() string { return utils.IfElse(bool(s), conf.Pygments_style, conf.Dark_pygments_style) } -func (s prefer_light_colors) UseLightColors() bool { return bool(s) } +func (s prefer_light_colors) UseLightColors() bool { return bool(s) } +func (s prefer_light_colors) SyntaxAliases() map[string]string { return conf.Syntax_aliases } +func (s prefer_light_colors) TextForPath(path string) (string, error) { return data_for_path(path) } -type StyleResolveData interface { - StyleName() string - UseLightColors() bool -} - -func resolved_chroma_style(srd StyleResolveData) *chroma.Style { - name := srd.StyleName() - var style *chroma.Style - if name == "default" { - style = DefaultStyle() - } else { - style = styles.Get(name) - } - if style == nil { - if srd.UseLightColors() { - style = DefaultStyle() - } else { - style = styles.Get("monokai") - if style == nil { - style = styles.Get("github-dark") - } - } - if style == nil { - style = styles.Fallback - } - } - return style -} - -var tokens_map map[string][]chroma.Token -var mu sync.Mutex - -func HighlightFile(path string, srd StyleResolveData) (highlighted string, err error) { - defer func() { - if r := recover(); r != nil { - e, ok := r.(error) - if !ok { - e = fmt.Errorf("%v", r) - } - err = e - } - }() - filename_for_detection := filepath.Base(path) - ext := filepath.Ext(filename_for_detection) - if ext != "" { - ext = strings.ToLower(ext[1:]) - r := conf.Syntax_aliases[ext] - if r != "" { - filename_for_detection = "file." + r - } - } - text, err := data_for_path(path) - if err != nil { - return "", err - } - mu.Lock() - if tokens_map == nil { - tokens_map = make(map[string][]chroma.Token) - } - tokens := tokens_map[path] - mu.Unlock() - if tokens == nil { - lexer := lexers.Match(filename_for_detection) - if lexer == nil { - lexer = lexers.Analyse(text) - } - if lexer == nil { - return "", fmt.Errorf("Cannot highlight %#v: %w", path, ErrNoLexer) - } - lexer = chroma.Coalesce(lexer) - iterator, err := lexer.Tokenise(nil, text) - if err != nil { - return "", err - } - tokens = iterator.Tokens() - mu.Lock() - tokens_map[path] = tokens - mu.Unlock() - } - formatter := chroma.FormatterFunc(ansi_formatter) - w := strings.Builder{} - w.Grow(len(text) * 2) - err = formatter.Format(&w, resolved_chroma_style(srd), chroma.Literator(tokens...)) - // os.WriteFile(filepath.Base(path+".highlighted"), []byte(w.String()), 0o600) - return w.String(), err -} +var highlighter = sync.OnceValue(func() highlight.Highlighter { + return highlight.NewHighlighter(sanitize()) +}) func highlight_all(paths []string, light bool) { ctx := images.Context{} @@ -259,7 +35,7 @@ func highlight_all(paths []string, light bool) { ctx.Parallel(0, len(paths), func(nums <-chan int) { for i := range nums { path := paths[i] - raw, err := HighlightFile(path, &srd) + raw, err := highlighter().HighlightFile(path, &srd) if err != nil { continue } diff --git a/kittens/diff/render.go b/kittens/diff/render.go index c0d0dc5a5..4979d9efe 100644 --- a/kittens/diff/render.go +++ b/kittens/diff/render.go @@ -284,10 +284,10 @@ func title_lines(left_path, right_path string, columns, margin_size int, ans []* } sl := ScreenLine{} if right_name != "" && right_name != left_name { - sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), available_cols) - sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize(right_name), available_cols) + sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), available_cols) + sl.right.marked_up_text = format_as_sgr.title + fit_in(sanitize()(right_name), available_cols) } else { - sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize(left_name), columns-margin_size) + sl.left.marked_up_text = format_as_sgr.title + fit_in(sanitize()(left_name), columns-margin_size) ll.is_full_width = true } l2 := ll @@ -755,7 +755,7 @@ func rename_lines(path, other_path string, columns, margin_size int, ans []*Logi ll := LogicalLine{ left_reference: Reference{path: path}, right_reference: Reference{path: other_path}, line_type: CHANGE_LINE, is_change_start: true, is_full_width: true} - for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize(path_name_map[path]), sanitize(path_name_map[other_path])), columns-margin_size) { + for _, line := range splitlines(fmt.Sprintf(`The file %s was renamed to %s`, sanitize()(path_name_map[path]), sanitize()(path_name_map[other_path])), columns-margin_size) { sl := ScreenLine{} sl.right.marked_up_text = line ll.screen_lines = append(ll.screen_lines, &sl) diff --git a/kittens/diff/ui.go b/kittens/diff/ui.go index 4c0738c4e..800e694fe 100644 --- a/kittens/diff/ui.go +++ b/kittens/diff/ui.go @@ -446,7 +446,7 @@ func (self *Handler) draw_status_line() { if self.inputting_command { self.rl.RedrawNonAtomic() } else if self.statusline_message != "" { - self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize(self.statusline_message), self.screen_size.columns))) + self.lp.QueueWriteString(message_format(wcswidth.TruncateToVisualLength(sanitize()(self.statusline_message), self.screen_size.columns))) } else { num := self.logical_lines.NumScreenLinesTo(self.scroll_pos) den := self.logical_lines.NumScreenLinesTo(self.max_scroll_pos) diff --git a/tools/highlight/api.go b/tools/highlight/api.go new file mode 100644 index 000000000..5068e91a7 --- /dev/null +++ b/tools/highlight/api.go @@ -0,0 +1,55 @@ +package highlight + +import ( + "errors" + "fmt" + "strings" + + "github.com/alecthomas/chroma/v2" +) + +var _ = fmt.Print + +var ErrNoLexer = errors.New("No lexer available for this format") + +type StyleResolveData interface { + StyleName() string + UseLightColors() bool + SyntaxAliases() map[string]string + TextForPath(string) (string, error) +} + +type SanitizeControlCodes struct { + r *strings.Replacer +} + +func (s SanitizeControlCodes) Sanitize(x string) string { return s.r.Replace(x) } + +func NewSanitizeControlCodes(replace_tab_by string) *SanitizeControlCodes { + repls := make([]string, 0, 2*(0x1f+2+(0x9f-0x80+1))) + for i := range 0x1f + 1 { + var repl string + switch i { + case '\n', ' ': + repl = string(rune(i)) + case '\t': + repl = replace_tab_by + default: + repl = string(rune(0x2400 + i)) + } + repls = append(repls, string(rune(i)), repl) + } + return &SanitizeControlCodes{r: strings.NewReplacer(repls...)} +} + +type Highlighter interface { + HighlightFile(path string, srd StyleResolveData) (highlighted_string string, err error) +} + +func NewHighlighter(sanitize func(string) string) Highlighter { + if sanitize == nil { + s := NewSanitizeControlCodes(" ") + sanitize = s.Sanitize + } + return &highlighter{sanitize: sanitize, tokens_map: make(map[string][]chroma.Token)} +} diff --git a/tools/highlight/impl.go b/tools/highlight/impl.go new file mode 100644 index 000000000..878d18828 --- /dev/null +++ b/tools/highlight/impl.go @@ -0,0 +1,228 @@ +package highlight + +import ( + "fmt" + "io" + "path/filepath" + "strings" + "sync" + + "github.com/alecthomas/chroma/v2" + "github.com/alecthomas/chroma/v2/lexers" + "github.com/alecthomas/chroma/v2/styles" + "github.com/kovidgoyal/kitty/tools/utils" +) + +var _ = fmt.Print + +var default_style = sync.OnceValue(func() *chroma.Style { + // Default style generated by python style.py default pygments.styles.default.DefaultStyle + // with https://raw.githubusercontent.com/alecthomas/chroma/master/_tools/style.py + return styles.Register(chroma.MustNewStyle("default", chroma.StyleEntries{ + chroma.TextWhitespace: "#bbbbbb", + chroma.Comment: "italic #3D7B7B", + chroma.CommentPreproc: "noitalic #9C6500", + chroma.Keyword: "bold #008000", + chroma.KeywordPseudo: "nobold", + chroma.KeywordType: "nobold #B00040", + chroma.Operator: "#666666", + chroma.OperatorWord: "bold #AA22FF", + chroma.NameBuiltin: "#008000", + chroma.NameFunction: "#0000FF", + chroma.NameClass: "bold #0000FF", + chroma.NameNamespace: "bold #0000FF", + chroma.NameException: "bold #CB3F38", + chroma.NameVariable: "#19177C", + chroma.NameConstant: "#880000", + chroma.NameLabel: "#767600", + chroma.NameEntity: "bold #717171", + chroma.NameAttribute: "#687822", + chroma.NameTag: "bold #008000", + chroma.NameDecorator: "#AA22FF", + chroma.LiteralString: "#BA2121", + chroma.LiteralStringDoc: "italic", + chroma.LiteralStringInterpol: "bold #A45A77", + chroma.LiteralStringEscape: "bold #AA5D1F", + chroma.LiteralStringRegex: "#A45A77", + chroma.LiteralStringSymbol: "#19177C", + chroma.LiteralStringOther: "#008000", + chroma.LiteralNumber: "#666666", + chroma.GenericHeading: "bold #000080", + chroma.GenericSubheading: "bold #800080", + chroma.GenericDeleted: "#A00000", + chroma.GenericInserted: "#008400", + chroma.GenericError: "#E40000", + chroma.GenericEmph: "italic", + chroma.GenericStrong: "bold", + chroma.GenericPrompt: "bold #000080", + chroma.GenericOutput: "#717171", + chroma.GenericTraceback: "#04D", + chroma.Error: "border:#FF0000", + chroma.Background: " bg:#f8f8f8", + })) +}) + +// Clear the background colour. +func clear_background(style *chroma.Style) *chroma.Style { + builder := style.Builder() + bg := builder.Get(chroma.Background) + bg.Background = 0 + bg.NoInherit = true + builder.AddEntry(chroma.Background, bg) + style, _ = builder.Build() + return style +} + +func ansi_formatter(w io.Writer, style *chroma.Style, sanitize func(string) string, it chroma.Iterator) (err error) { + const SGR_PREFIX = "\033[" + const SGR_SUFFIX = "m" + style = clear_background(style) + before, after := make([]byte, 0, 64), make([]byte, 0, 64) + nl := []byte{'\n'} + write_sgr := func(which []byte) (err error) { + if len(which) > 1 { + if _, err = w.Write(utils.UnsafeStringToBytes(SGR_PREFIX)); err != nil { + return err + } + if _, err = w.Write(which[:len(which)-1]); err != nil { + return err + } + if _, err = w.Write(utils.UnsafeStringToBytes(SGR_SUFFIX)); err != nil { + return err + } + } + return + } + write := func(text string) (err error) { + if err = write_sgr(before); err != nil { + return err + } + if _, err = w.Write(utils.UnsafeStringToBytes(text)); err != nil { + return err + } + if err = write_sgr(after); err != nil { + return err + } + return + } + + for token := it(); token != chroma.EOF; token = it() { + entry := style.Get(token.Type) + before, after = before[:0], after[:0] + if !entry.IsZero() { + if entry.Bold == chroma.Yes { + before = append(before, '1', ';') + after = append(after, '2', '2', '1', ';') + } + if entry.Underline == chroma.Yes { + before = append(before, '4', ';') + after = append(after, '2', '4', ';') + } + if entry.Italic == chroma.Yes { + before = append(before, '3', ';') + after = append(after, '2', '3', ';') + } + if entry.Colour.IsSet() { + before = append(before, fmt.Sprintf("38:2:%d:%d:%d;", entry.Colour.Red(), entry.Colour.Green(), entry.Colour.Blue())...) + after = append(after, '3', '9', ';') + } + } + // independently format each line in a multiline token, needed for the diff kitten highlighting to work, also + // pagers like less reset SGR formatting at line boundaries + text := sanitize(token.Value) + for text != "" { + idx := strings.IndexByte(text, '\n') + if idx < 0 { + if err = write(text); err != nil { + return err + } + break + } + if err = write(text[:idx]); err != nil { + return err + } + if _, err = w.Write(nl); err != nil { + return err + } + text = text[idx+1:] + } + } + return nil +} + +func resolved_chroma_style(srd StyleResolveData) *chroma.Style { + name := srd.StyleName() + var style *chroma.Style + if name == "default" { + style = default_style() + } else { + style = styles.Get(name) + } + if style == nil { + if srd.UseLightColors() { + style = default_style() + } else { + style = styles.Get("monokai") + if style == nil { + style = styles.Get("github-dark") + } + } + if style == nil { + style = styles.Fallback + } + } + return style +} + +type highlighter struct { + tokens_map map[string][]chroma.Token + lock sync.Mutex + sanitize func(string) string +} + +func (h *highlighter) HighlightFile(path string, srd StyleResolveData) (highlighted_string string, err error) { + defer func() { + if r := recover(); r != nil { + text, _ := utils.Format_stacktrace_on_panic(r) + err = fmt.Errorf("%s", text) + } + }() + filename_for_detection := filepath.Base(path) + ext := filepath.Ext(filename_for_detection) + if ext != "" { + ext = strings.ToLower(ext[1:]) + r := srd.SyntaxAliases()[ext] + if r != "" { + filename_for_detection = "file." + r + } + } + text, err := srd.TextForPath(path) + if err != nil { + return "", err + } + h.lock.Lock() + tokens := h.tokens_map[path] + h.lock.Unlock() + if tokens == nil { + lexer := lexers.Match(filename_for_detection) + if lexer == nil { + lexer = lexers.Analyse(text) + } + if lexer == nil { + return "", fmt.Errorf("Cannot highlight %#v: %w", path, ErrNoLexer) + } + lexer = chroma.Coalesce(lexer) + iterator, err := lexer.Tokenise(nil, text) + if err != nil { + return "", err + } + tokens = iterator.Tokens() + h.lock.Lock() + h.tokens_map[path] = tokens + h.lock.Unlock() + } + w := strings.Builder{} + w.Grow(len(text) * 2) + err = ansi_formatter(&w, resolved_chroma_style(srd), h.sanitize, chroma.Literator(tokens...)) + return w.String(), err +}