mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-06 01:05:48 +02:00
227 lines
5.3 KiB
Go
227 lines
5.3 KiB
Go
/*
|
|
Package shlex implements a simple lexer which splits input in to tokens using
|
|
shell-style rules for quoting.
|
|
|
|
The basic use case uses the default ASCII lexer to split a string into sub-strings:
|
|
|
|
shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
|
|
|
|
To process a stream of strings:
|
|
|
|
l := NewLexer(os.Stdin)
|
|
for ; token, err := l.Next(); err != nil {
|
|
// process token
|
|
}
|
|
*/
|
|
package shlex
|
|
|
|
import (
|
|
"fmt"
|
|
"kitty/tools/utils"
|
|
"strings"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type Word struct {
|
|
Value string // The word is empty if EOF is reached
|
|
Pos int // The position in the input string of the word or the trailer
|
|
Err error // Indicates an error (unterminated string or trailing unescaped backslash)
|
|
Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
|
|
}
|
|
|
|
type lexer_state int
|
|
|
|
// Lexer state machine states
|
|
const (
|
|
lex_normal lexer_state = iota
|
|
word
|
|
string_without_escapes
|
|
string_with_escapes
|
|
)
|
|
|
|
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
|
|
type Lexer struct {
|
|
state lexer_state
|
|
src string
|
|
src_sz, src_pos, word_start int
|
|
buf strings.Builder
|
|
}
|
|
|
|
// NewLexer creates a new lexer from an input string.
|
|
func NewLexer(x string) *Lexer {
|
|
return &Lexer{src: x, src_sz: len(x)}
|
|
}
|
|
|
|
func (self *Lexer) start_word() {
|
|
self.buf.Reset()
|
|
self.word_start = self.src_pos - 1
|
|
}
|
|
|
|
func (self *Lexer) get_word() Word {
|
|
return Word{Pos: self.word_start, Value: self.buf.String()}
|
|
}
|
|
|
|
func (self *Lexer) write_ch(ch byte) {
|
|
self.buf.WriteByte(ch)
|
|
}
|
|
|
|
func (self *Lexer) write_escaped_ch() bool {
|
|
ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
|
|
if count > 0 {
|
|
self.src_pos += count
|
|
if ch != utf8.RuneError {
|
|
self.buf.WriteRune(ch)
|
|
}
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Next returns the next word. At EOF Word.Value will be ""
|
|
func (self *Lexer) Next() (ans Word) {
|
|
const string_with_escapes_delim = '"'
|
|
const string_without_escapes_delim = '\''
|
|
const escape_char = '\\'
|
|
for self.src_pos < self.src_sz {
|
|
ch := self.src[self.src_pos]
|
|
self.src_pos++
|
|
switch self.state {
|
|
case lex_normal:
|
|
switch ch {
|
|
case ' ', '\n', '\r', '\t':
|
|
case string_with_escapes_delim:
|
|
self.state = string_with_escapes
|
|
self.start_word()
|
|
case string_without_escapes_delim:
|
|
self.state = string_without_escapes
|
|
self.start_word()
|
|
case escape_char:
|
|
self.start_word()
|
|
if !self.write_escaped_ch() {
|
|
ans.Trailer = "\\"
|
|
ans.Err = fmt.Errorf("Extra backslash at end of input")
|
|
ans.Pos = self.word_start
|
|
return
|
|
}
|
|
self.state = word
|
|
default:
|
|
self.state = word
|
|
self.start_word()
|
|
self.write_ch(ch)
|
|
}
|
|
case word:
|
|
switch ch {
|
|
case ' ', '\n', '\r', '\t':
|
|
self.state = lex_normal
|
|
if self.buf.Len() > 0 {
|
|
return self.get_word()
|
|
}
|
|
case string_with_escapes_delim:
|
|
self.state = string_with_escapes
|
|
case string_without_escapes_delim:
|
|
self.state = string_without_escapes
|
|
case escape_char:
|
|
if !self.write_escaped_ch() {
|
|
ans.Pos = self.word_start
|
|
ans.Trailer = self.buf.String() + "\\"
|
|
ans.Err = fmt.Errorf("Extra backslash at end of input")
|
|
return
|
|
}
|
|
default:
|
|
self.write_ch(ch)
|
|
}
|
|
case string_without_escapes:
|
|
switch ch {
|
|
case string_without_escapes_delim:
|
|
self.state = word
|
|
default:
|
|
self.write_ch(ch)
|
|
}
|
|
case string_with_escapes:
|
|
switch ch {
|
|
case string_with_escapes_delim:
|
|
self.state = word
|
|
case escape_char:
|
|
self.write_escaped_ch()
|
|
default:
|
|
self.write_ch(ch)
|
|
}
|
|
}
|
|
}
|
|
switch self.state {
|
|
case word:
|
|
self.state = lex_normal
|
|
if self.buf.Len() > 0 {
|
|
return self.get_word()
|
|
}
|
|
case string_with_escapes, string_without_escapes:
|
|
self.state = lex_normal
|
|
ans.Trailer = self.buf.String()
|
|
ans.Pos = self.word_start
|
|
ans.Err = fmt.Errorf("Unterminated string at end of input")
|
|
return
|
|
case lex_normal:
|
|
|
|
}
|
|
return
|
|
}
|
|
|
|
// Split partitions a string into a slice of strings.
|
|
func Split(s string) (ans []string, err error) {
|
|
l := NewLexer(s)
|
|
var word Word
|
|
for {
|
|
word = l.Next()
|
|
if word.Err != nil {
|
|
return ans, word.Err
|
|
}
|
|
if word.Value == "" {
|
|
break
|
|
}
|
|
ans = append(ans, word.Value)
|
|
}
|
|
return
|
|
}
|
|
|
|
func Quote(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
if utils.MustCompile(`[^\w@%+=:,./-]`).MatchString(s) {
|
|
return "'" + strings.ReplaceAll(s, "'", "'\"'\"'") + "'"
|
|
}
|
|
return s
|
|
}
|
|
|
|
// SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
|
|
// more relaxed about errors and also adding an empty string at the end if s ends with a Space.
|
|
func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
|
|
t := NewLexer(s)
|
|
argv = make([]string, 0, len(s)/4)
|
|
for {
|
|
word := t.Next()
|
|
if word.Value == "" {
|
|
if word.Trailer == "" {
|
|
trimmed := strings.TrimRight(s, " ")
|
|
if len(trimmed) < len(s) { // trailing spaces
|
|
pos := position_of_last_arg
|
|
if len(argv) > 0 {
|
|
pos += len(argv[len(argv)-1])
|
|
}
|
|
if pos < len(s) { // trailing whitespace
|
|
argv = append(argv, "")
|
|
position_of_last_arg += len(s) - pos + 1
|
|
}
|
|
}
|
|
} else {
|
|
argv = append(argv, word.Trailer)
|
|
position_of_last_arg = word.Pos
|
|
}
|
|
break
|
|
}
|
|
position_of_last_arg = word.Pos
|
|
argv = append(argv, word.Value)
|
|
}
|
|
return
|
|
}
|