mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 14:18:26 +02:00
Port new shlex code to Go
This commit is contained in:
@@ -28,6 +28,9 @@ func TestParseSSHArgs(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
if len(ans) == 0 {
|
||||||
|
ans = []string{}
|
||||||
|
}
|
||||||
return ans
|
return ans
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -39,7 +42,7 @@ func TestParseSSHArgs(t *testing.T) {
|
|||||||
check := func(a, b any) {
|
check := func(a, b any) {
|
||||||
diff := cmp.Diff(a, b)
|
diff := cmp.Diff(a, b)
|
||||||
if diff != "" {
|
if diff != "" {
|
||||||
t.Fatalf("Unexpected value for args: %s\n%s", args, diff)
|
t.Fatalf("Unexpected value for args: %#v\n%s", args, diff)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
check(split(expected_ssh_args), ssh_args)
|
check(split(expected_ssh_args), ssh_args)
|
||||||
|
|||||||
@@ -109,7 +109,6 @@ next_word(Shlex *self, PyObject *args UNUSED) {
|
|||||||
switch(ch) {
|
switch(ch) {
|
||||||
case STRING_WITHOUT_ESCAPES_DELIM:
|
case STRING_WITHOUT_ESCAPES_DELIM:
|
||||||
set_state(self, WORD);
|
set_state(self, WORD);
|
||||||
if (self->buf_pos && self->state == NORMAL) return get_word(self);
|
|
||||||
break;
|
break;
|
||||||
default: write_ch(self, ch); break;
|
default: write_ch(self, ch); break;
|
||||||
} break;
|
} break;
|
||||||
@@ -117,13 +116,9 @@ next_word(Shlex *self, PyObject *args UNUSED) {
|
|||||||
switch(ch) {
|
switch(ch) {
|
||||||
case STRING_WITH_ESCAPES_DELIM:
|
case STRING_WITH_ESCAPES_DELIM:
|
||||||
set_state(self, WORD);
|
set_state(self, WORD);
|
||||||
if (self->buf_pos && self->state == NORMAL) return get_word(self);
|
|
||||||
break;
|
break;
|
||||||
case ESCAPE_CHAR:
|
case ESCAPE_CHAR:
|
||||||
if (self->src_pos < self->src_sz) {
|
write_escape_ch(self);
|
||||||
Py_UCS4 nch = PyUnicode_READ(self->kind, self->src_data, self->src_pos); self->src_pos++;
|
|
||||||
write_ch(self, nch);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
default: write_ch(self, ch); break;
|
default: write_ch(self, ch); break;
|
||||||
} break;
|
} break;
|
||||||
|
|||||||
@@ -12,419 +12,204 @@ To process a stream of strings:
|
|||||||
for ; token, err := l.Next(); err != nil {
|
for ; token, err := l.Next(); err != nil {
|
||||||
// process token
|
// process token
|
||||||
}
|
}
|
||||||
|
|
||||||
To access the raw token stream (which includes tokens for spaces):
|
|
||||||
|
|
||||||
t := NewTokenizer(os.Stdin)
|
|
||||||
for ; token, err := t.Next(); err != nil {
|
|
||||||
// process token
|
|
||||||
}
|
|
||||||
*/
|
*/
|
||||||
package shlex
|
package shlex
|
||||||
|
|
||||||
// Based on https://pkg.go.dev/github.com/google/shlex with many improvements
|
|
||||||
// Relicensed to GPLv3 since all my additions.changes are GPLv3 which makes the
|
|
||||||
// original work with was APL2 also GPLv3
|
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
"unicode/utf8"
|
||||||
)
|
)
|
||||||
|
|
||||||
// TokenType is a top-level token classification: A word, space, unknown.
|
type Word struct {
|
||||||
type TokenType int
|
Value string // The word is empty if EOF is reached
|
||||||
|
Pos int // The position in the input string of the word or the trailer
|
||||||
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
|
Err error // Indicates an error (unterminated string or trailing unescaped backslash)
|
||||||
type runeTokenClass int
|
Trailer string // Extra trailing data such as an unterminated string or an unescaped backslash. Present only if Err != nil
|
||||||
|
|
||||||
// the internal state used by the lexer state machine
|
|
||||||
type lexerState int
|
|
||||||
|
|
||||||
// Token is a (type, value) pair representing a lexographical token.
|
|
||||||
type Token struct {
|
|
||||||
Type TokenType
|
|
||||||
Value string
|
|
||||||
Pos int64
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Named classes of UTF-8 runes
|
type lexer_state int
|
||||||
const (
|
|
||||||
spaceRunes = " \t\r\n"
|
|
||||||
escapingQuoteRunes = `"`
|
|
||||||
nonEscapingQuoteRunes = "'"
|
|
||||||
escapeRunes = `\`
|
|
||||||
)
|
|
||||||
|
|
||||||
// Classes of rune token
|
|
||||||
const (
|
|
||||||
unknownRuneClass runeTokenClass = iota
|
|
||||||
spaceRuneClass
|
|
||||||
escapingQuoteRuneClass
|
|
||||||
nonEscapingQuoteRuneClass
|
|
||||||
escapeRuneClass
|
|
||||||
eofRuneClass
|
|
||||||
)
|
|
||||||
|
|
||||||
// Classes of lexographic token
|
|
||||||
const (
|
|
||||||
UnknownToken TokenType = iota
|
|
||||||
WordToken
|
|
||||||
SpaceToken
|
|
||||||
)
|
|
||||||
|
|
||||||
func (t TokenType) String() string {
|
|
||||||
switch t {
|
|
||||||
default:
|
|
||||||
return "UnknownToken"
|
|
||||||
case WordToken:
|
|
||||||
return "WordToken"
|
|
||||||
case SpaceToken:
|
|
||||||
return "SpaceToken"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lexer state machine states
|
// Lexer state machine states
|
||||||
const (
|
const (
|
||||||
startState lexerState = iota // no runes have been seen
|
lex_normal lexer_state = iota
|
||||||
inWordState // processing regular runes in a word
|
word
|
||||||
inSpaceState // processing runes in a space
|
string_without_escapes
|
||||||
escapingState // we have just consumed an escape rune; the next rune is literal
|
string_with_escapes
|
||||||
escapingQuotedState // we have just consumed an escape rune within a quoted string
|
|
||||||
quotingEscapingState // we are within a quoted string that supports escaping ("...")
|
|
||||||
quotingState // we are within a string that does not support escaping ('...')
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// tokenClassifier is used for classifying rune characters.
|
|
||||||
type tokenClassifier map[rune]runeTokenClass
|
|
||||||
|
|
||||||
func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
|
|
||||||
for _, runeChar := range runes {
|
|
||||||
typeMap[runeChar] = tokenType
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// newDefaultClassifier creates a new classifier for ASCII characters.
|
|
||||||
func newDefaultClassifier() tokenClassifier {
|
|
||||||
t := tokenClassifier{}
|
|
||||||
t.addRuneClass(spaceRunes, spaceRuneClass)
|
|
||||||
t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
|
|
||||||
t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
|
|
||||||
t.addRuneClass(escapeRunes, escapeRuneClass)
|
|
||||||
return t
|
|
||||||
}
|
|
||||||
|
|
||||||
// ClassifyRune classifiees a rune
|
|
||||||
func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
|
|
||||||
return t[runeVal]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
|
// Lexer turns an input stream into a sequence of tokens. Whitespace is skipped.
|
||||||
type Lexer Tokenizer
|
type Lexer struct {
|
||||||
|
state lexer_state
|
||||||
// NewLexer creates a new lexer from an input stream.
|
src string
|
||||||
func NewLexer(x io.RuneReader) *Lexer {
|
src_sz, src_pos, word_start int
|
||||||
|
buf strings.Builder
|
||||||
return (*Lexer)(NewTokenizer(x))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Next returns the next word, or an error. If there are no more words,
|
// NewLexer creates a new lexer from an input string.
|
||||||
// the error will be io.EOF.
|
func NewLexer(x string) *Lexer {
|
||||||
func (l *Lexer) Next() (string, error) {
|
return &Lexer{src: x, src_sz: len(x)}
|
||||||
for {
|
|
||||||
token, err := (*Tokenizer)(l).Next()
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
}
|
||||||
switch token.Type {
|
|
||||||
case WordToken:
|
func (self *Lexer) start_word() {
|
||||||
return token.Value, nil
|
self.buf.Reset()
|
||||||
case SpaceToken:
|
self.word_start = self.src_pos - 1
|
||||||
// skip spaces
|
}
|
||||||
|
|
||||||
|
func (self *Lexer) get_word() Word {
|
||||||
|
return Word{Pos: self.word_start, Value: self.buf.String()}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self *Lexer) write_ch(ch byte) {
|
||||||
|
self.buf.WriteByte(ch)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (self *Lexer) write_escaped_ch() bool {
|
||||||
|
ch, count := utf8.DecodeRuneInString(self.src[self.src_pos:])
|
||||||
|
if count > 0 {
|
||||||
|
self.src_pos += count
|
||||||
|
if ch != utf8.RuneError {
|
||||||
|
self.buf.WriteRune(ch)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next returns the next word. At EOF Word.Value will be ""
|
||||||
|
func (self *Lexer) Next() (ans Word) {
|
||||||
|
const string_with_escapes_delim = '"'
|
||||||
|
const string_without_escapes_delim = '\''
|
||||||
|
const escape_char = '\\'
|
||||||
|
for self.src_pos < self.src_sz {
|
||||||
|
ch := self.src[self.src_pos]
|
||||||
|
self.src_pos++
|
||||||
|
switch self.state {
|
||||||
|
case lex_normal:
|
||||||
|
switch ch {
|
||||||
|
case ' ', '\n', '\r', '\t':
|
||||||
|
case string_with_escapes_delim:
|
||||||
|
self.state = string_with_escapes
|
||||||
|
self.start_word()
|
||||||
|
case string_without_escapes_delim:
|
||||||
|
self.state = string_without_escapes
|
||||||
|
self.start_word()
|
||||||
|
case escape_char:
|
||||||
|
self.start_word()
|
||||||
|
if !self.write_escaped_ch() {
|
||||||
|
ans.Trailer = "\\"
|
||||||
|
ans.Err = fmt.Errorf("Extra backslash at end of input")
|
||||||
|
ans.Pos = self.word_start
|
||||||
|
return
|
||||||
|
}
|
||||||
|
self.state = word
|
||||||
default:
|
default:
|
||||||
return "", fmt.Errorf("Unknown token type: %s", token.Type)
|
self.state = word
|
||||||
|
self.start_word()
|
||||||
|
self.write_ch(ch)
|
||||||
}
|
}
|
||||||
|
case word:
|
||||||
|
switch ch {
|
||||||
|
case ' ', '\n', '\r', '\t':
|
||||||
|
self.state = lex_normal
|
||||||
|
if self.buf.Len() > 0 {
|
||||||
|
return self.get_word()
|
||||||
}
|
}
|
||||||
}
|
case string_with_escapes_delim:
|
||||||
|
self.state = string_with_escapes
|
||||||
// Tokenizer turns an input stream into a sequence of typed tokens
|
case string_without_escapes_delim:
|
||||||
type Tokenizer struct {
|
self.state = string_without_escapes
|
||||||
input io.RuneReader
|
case escape_char:
|
||||||
classifier tokenClassifier
|
if !self.write_escaped_ch() {
|
||||||
pos int64
|
ans.Pos = self.word_start
|
||||||
redo_rune struct {
|
ans.Trailer = self.buf.String() + "\\"
|
||||||
char rune
|
ans.Err = fmt.Errorf("Extra backslash at end of input")
|
||||||
sz int
|
return
|
||||||
rune_type runeTokenClass
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewTokenizer creates a new tokenizer from an input stream.
|
|
||||||
func NewTokenizer(input io.RuneReader) *Tokenizer {
|
|
||||||
classifier := newDefaultClassifier()
|
|
||||||
return &Tokenizer{
|
|
||||||
input: input,
|
|
||||||
classifier: classifier}
|
|
||||||
}
|
|
||||||
|
|
||||||
var ErrTrailingEscape error = errors.New("EOF found after escape character")
|
|
||||||
var ErrTrailingQuoteEscape error = errors.New("EOF found after escape character for double quote")
|
|
||||||
var ErrUnclosedDoubleQuote error = errors.New("EOF found when expecting closing double quote")
|
|
||||||
var ErrUnclosedSingleQuote error = errors.New("EOF found when expecting closing single quote")
|
|
||||||
|
|
||||||
// scanStream scans the stream for the next token using the internal state machine.
|
|
||||||
// It will panic if it encounters a rune which it does not know how to handle.
|
|
||||||
func (t *Tokenizer) scanStream() (*Token, error) {
|
|
||||||
state := startState
|
|
||||||
var tokenType TokenType
|
|
||||||
var nextRune rune
|
|
||||||
var nextRuneType runeTokenClass
|
|
||||||
var err error
|
|
||||||
var sz int
|
|
||||||
value := strings.Builder{}
|
|
||||||
pos_at_start := t.pos
|
|
||||||
|
|
||||||
unread_rune := func() {
|
|
||||||
t.redo_rune.sz = sz
|
|
||||||
t.redo_rune.char = nextRune
|
|
||||||
t.redo_rune.rune_type = nextRuneType
|
|
||||||
t.pos -= int64(sz)
|
|
||||||
}
|
|
||||||
|
|
||||||
token := func() *Token {
|
|
||||||
return &Token{tokenType, value.String(), pos_at_start}
|
|
||||||
}
|
|
||||||
|
|
||||||
for {
|
|
||||||
if t.redo_rune.sz > 0 {
|
|
||||||
nextRune, sz = t.redo_rune.char, t.redo_rune.sz
|
|
||||||
nextRuneType = t.redo_rune.rune_type
|
|
||||||
t.redo_rune.sz = 0
|
|
||||||
} else {
|
|
||||||
nextRune, sz, err = t.input.ReadRune()
|
|
||||||
nextRuneType = t.classifier.ClassifyRune(nextRune)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err == io.EOF {
|
|
||||||
nextRuneType = eofRuneClass
|
|
||||||
err = nil
|
|
||||||
} else if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
t.pos += int64(sz)
|
|
||||||
|
|
||||||
switch state {
|
|
||||||
case startState: // no runes read yet
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
return nil, io.EOF
|
|
||||||
}
|
|
||||||
case spaceRuneClass:
|
|
||||||
{
|
|
||||||
tokenType = SpaceToken
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
state = inSpaceState
|
|
||||||
}
|
|
||||||
case escapingQuoteRuneClass:
|
|
||||||
{
|
|
||||||
tokenType = WordToken
|
|
||||||
state = quotingEscapingState
|
|
||||||
}
|
|
||||||
case nonEscapingQuoteRuneClass:
|
|
||||||
{
|
|
||||||
tokenType = WordToken
|
|
||||||
state = quotingState
|
|
||||||
}
|
|
||||||
case escapeRuneClass:
|
|
||||||
{
|
|
||||||
tokenType = WordToken
|
|
||||||
state = escapingState
|
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
{
|
self.write_ch(ch)
|
||||||
tokenType = WordToken
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
state = inWordState
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case inSpaceState: // in a sequence of spaces separating words
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case spaceRuneClass:
|
|
||||||
{
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
}
|
}
|
||||||
|
case string_without_escapes:
|
||||||
|
switch ch {
|
||||||
|
case string_without_escapes_delim:
|
||||||
|
self.state = word
|
||||||
default:
|
default:
|
||||||
{
|
self.write_ch(ch)
|
||||||
unread_rune()
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case inWordState: // in a regular word
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
case spaceRuneClass:
|
|
||||||
{
|
|
||||||
unread_rune()
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
case escapingQuoteRuneClass:
|
|
||||||
{
|
|
||||||
state = quotingEscapingState
|
|
||||||
}
|
|
||||||
case nonEscapingQuoteRuneClass:
|
|
||||||
{
|
|
||||||
state = quotingState
|
|
||||||
}
|
|
||||||
case escapeRuneClass:
|
|
||||||
{
|
|
||||||
state = escapingState
|
|
||||||
}
|
}
|
||||||
|
case string_with_escapes:
|
||||||
|
switch ch {
|
||||||
|
case string_with_escapes_delim:
|
||||||
|
self.state = word
|
||||||
|
case escape_char:
|
||||||
|
self.write_escaped_ch()
|
||||||
default:
|
default:
|
||||||
{
|
self.write_ch(ch)
|
||||||
value.WriteRune(nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case escapingState: // the rune after an escape character
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
err = ErrTrailingEscape
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
state = inWordState
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case escapingQuotedState: // the next rune after an escape character, in double quotes
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
err = ErrTrailingQuoteEscape
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
state = quotingEscapingState
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case quotingEscapingState: // in escaping double quotes
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
err = ErrUnclosedDoubleQuote
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
case escapingQuoteRuneClass:
|
|
||||||
{
|
|
||||||
state = inWordState
|
|
||||||
}
|
|
||||||
case escapeRuneClass:
|
|
||||||
{
|
|
||||||
state = escapingQuotedState
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case quotingState: // in non-escaping single quotes
|
|
||||||
{
|
|
||||||
switch nextRuneType {
|
|
||||||
case eofRuneClass:
|
|
||||||
{
|
|
||||||
err = ErrUnclosedSingleQuote
|
|
||||||
return token(), err
|
|
||||||
}
|
|
||||||
case nonEscapingQuoteRuneClass:
|
|
||||||
{
|
|
||||||
state = inWordState
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
value.WriteRune(nextRune)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
return nil, fmt.Errorf("Unexpected state: %v", state)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
switch self.state {
|
||||||
|
case word:
|
||||||
|
self.state = lex_normal
|
||||||
|
if self.buf.Len() > 0 {
|
||||||
|
return self.get_word()
|
||||||
}
|
}
|
||||||
|
case string_with_escapes, string_without_escapes:
|
||||||
|
self.state = lex_normal
|
||||||
|
ans.Trailer = self.buf.String()
|
||||||
|
ans.Pos = self.word_start
|
||||||
|
ans.Err = fmt.Errorf("Unterminated string at end of input")
|
||||||
|
return
|
||||||
|
case lex_normal:
|
||||||
|
|
||||||
// Next returns the next token in the stream.
|
|
||||||
func (t *Tokenizer) Next() (*Token, error) {
|
|
||||||
return t.scanStream()
|
|
||||||
}
|
}
|
||||||
|
return
|
||||||
// Pos returns the current position in the string as a byte offset
|
|
||||||
func (t *Tokenizer) Pos() int64 {
|
|
||||||
return t.pos
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split partitions a string into a slice of strings.
|
// Split partitions a string into a slice of strings.
|
||||||
func Split(s string) ([]string, error) {
|
func Split(s string) (ans []string, err error) {
|
||||||
l := NewLexer(strings.NewReader(s))
|
l := NewLexer(s)
|
||||||
subStrings := make([]string, 0)
|
var word Word
|
||||||
for {
|
for {
|
||||||
word, err := l.Next()
|
word = l.Next()
|
||||||
if err != nil {
|
if word.Err != nil {
|
||||||
if err == io.EOF {
|
return ans, word.Err
|
||||||
return subStrings, nil
|
|
||||||
}
|
}
|
||||||
return subStrings, err
|
if word.Value == "" {
|
||||||
|
break
|
||||||
}
|
}
|
||||||
subStrings = append(subStrings, word)
|
ans = append(ans, word.Value)
|
||||||
}
|
}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
|
// SplitForCompletion partitions a string into a slice of strings. It differs from Split in being
|
||||||
// more relaxed about errors and also adding an empty string at the end if s ends with a SpaceToken.
|
// more relaxed about errors and also adding an empty string at the end if s ends with a Space.
|
||||||
func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
|
func SplitForCompletion(s string) (argv []string, position_of_last_arg int) {
|
||||||
t := NewTokenizer(strings.NewReader(s))
|
t := NewLexer(s)
|
||||||
argv = make([]string, 0, len(s)/4)
|
argv = make([]string, 0, len(s)/4)
|
||||||
token := &Token{}
|
|
||||||
for {
|
for {
|
||||||
ntoken, err := t.Next()
|
word := t.Next()
|
||||||
if err == io.EOF {
|
if word.Value == "" {
|
||||||
if token.Type == SpaceToken {
|
if word.Trailer == "" {
|
||||||
|
trimmed := strings.TrimRight(s, " ")
|
||||||
|
if len(trimmed) < len(s) { // trailing spaces
|
||||||
|
pos := position_of_last_arg
|
||||||
|
if len(argv) > 0 {
|
||||||
|
pos += len(argv[len(argv)-1])
|
||||||
|
}
|
||||||
|
if pos < len(s) { // trailing whitespace
|
||||||
argv = append(argv, "")
|
argv = append(argv, "")
|
||||||
token.Pos += int64(len(token.Value))
|
position_of_last_arg += len(s) - pos + 1
|
||||||
}
|
|
||||||
return argv, int(token.Pos)
|
|
||||||
}
|
|
||||||
if ntoken == nil {
|
|
||||||
return []string{}, -1
|
|
||||||
}
|
|
||||||
switch ntoken.Type {
|
|
||||||
case WordToken:
|
|
||||||
argv = append(argv, ntoken.Value)
|
|
||||||
case SpaceToken:
|
|
||||||
// skip spaces
|
|
||||||
default:
|
|
||||||
return []string{}, -1
|
|
||||||
}
|
|
||||||
token = ntoken
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
argv = append(argv, word.Trailer)
|
||||||
|
position_of_last_arg = word.Pos
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
position_of_last_arg = word.Pos
|
||||||
|
argv = append(argv, word.Value)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
package shlex
|
package shlex
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"strings"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/google/go-cmp/cmp"
|
"github.com/google/go-cmp/cmp"
|
||||||
@@ -13,78 +12,24 @@ var (
|
|||||||
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
|
testString = "one two \"three four\" \"five \\\"six\\\"\" seven#eight # nine # ten eleven 'twelve\\' thirteen=13 fourteen/14"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestClassifier(t *testing.T) {
|
|
||||||
classifier := newDefaultClassifier()
|
|
||||||
tests := map[rune]runeTokenClass{
|
|
||||||
' ': spaceRuneClass,
|
|
||||||
'"': escapingQuoteRuneClass,
|
|
||||||
'\'': nonEscapingQuoteRuneClass}
|
|
||||||
for runeChar, want := range tests {
|
|
||||||
got := classifier.ClassifyRune(runeChar)
|
|
||||||
if got != want {
|
|
||||||
t.Errorf("ClassifyRune(%v) -> %v. Want: %v", runeChar, got, want)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestTokenizer(t *testing.T) {
|
|
||||||
testInput := testString
|
|
||||||
expectedTokens := []*Token{
|
|
||||||
{WordToken, "one", 0},
|
|
||||||
{SpaceToken, " ", 3},
|
|
||||||
{WordToken, "two", 4},
|
|
||||||
{SpaceToken, " ", 7},
|
|
||||||
{WordToken, "three four", 8},
|
|
||||||
{SpaceToken, " ", 20},
|
|
||||||
{WordToken, "five \"six\"", 21},
|
|
||||||
{SpaceToken, " ", 35},
|
|
||||||
{WordToken, "seven#eight", 36},
|
|
||||||
{SpaceToken, " ", 47},
|
|
||||||
{WordToken, "#", 48},
|
|
||||||
{SpaceToken, " ", 49},
|
|
||||||
{WordToken, "nine", 50},
|
|
||||||
{SpaceToken, " ", 54},
|
|
||||||
{WordToken, "#", 55},
|
|
||||||
{SpaceToken, " ", 56},
|
|
||||||
{WordToken, "ten", 57},
|
|
||||||
{SpaceToken, " ", 60},
|
|
||||||
{WordToken, "eleven", 61},
|
|
||||||
{SpaceToken, " ", 67},
|
|
||||||
{WordToken, "twelve\\", 68},
|
|
||||||
{SpaceToken, " ", 77},
|
|
||||||
{WordToken, "thirteen=13", 78},
|
|
||||||
{SpaceToken, " ", 89},
|
|
||||||
{WordToken, "fourteen/14", 90},
|
|
||||||
}
|
|
||||||
|
|
||||||
tokenizer := NewTokenizer(strings.NewReader(testInput))
|
|
||||||
for i, want := range expectedTokens {
|
|
||||||
got, err := tokenizer.Next()
|
|
||||||
if err != nil {
|
|
||||||
t.Error(err)
|
|
||||||
}
|
|
||||||
if diff := cmp.Diff(want, got); diff != "" {
|
|
||||||
t.Fatalf("Tokenizer.Next()[%v] of: %s:\n%s", i, testString, diff)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestLexer(t *testing.T) {
|
func TestLexer(t *testing.T) {
|
||||||
testInput := testString
|
testInput := testString
|
||||||
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
expectedStrings := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||||
|
|
||||||
lexer := NewLexer(strings.NewReader(testInput))
|
lexer := NewLexer(testInput)
|
||||||
for i, want := range expectedStrings {
|
for i, want := range expectedStrings {
|
||||||
got, err := lexer.Next()
|
got := lexer.Next()
|
||||||
if err != nil {
|
if got.Value != want {
|
||||||
t.Error(err)
|
|
||||||
}
|
|
||||||
if got != want {
|
|
||||||
t.Errorf("Lexer.Next()[%v] of %q -> %v. Want: %v", i, testString, got, want)
|
t.Errorf("Lexer.Next()[%v] of %q -> %v. Want: %v", i, testString, got, want)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type Tok struct {
|
||||||
|
Pos int
|
||||||
|
Val string
|
||||||
|
}
|
||||||
|
|
||||||
func TestSplit(t *testing.T) {
|
func TestSplit(t *testing.T) {
|
||||||
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
want := []string{"one", "two", "three four", "five \"six\"", "seven#eight", "#", "nine", "#", "ten", "eleven", "twelve\\", "thirteen=13", "fourteen/14"}
|
||||||
got, err := Split(testString)
|
got, err := Split(testString)
|
||||||
@@ -99,6 +44,43 @@ func TestSplit(t *testing.T) {
|
|||||||
t.Errorf("Split(%q)[%v] -> %v. Want: %v", testString, i, got[i], want[i])
|
t.Errorf("Split(%q)[%v] -> %v. Want: %v", testString, i, got[i], want[i])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, x := range []string{
|
||||||
|
`abc\`, `\`, `'abc`, `'`, `"`, `asd\`,
|
||||||
|
} {
|
||||||
|
_, err := Split(x)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("Failed to get an error for: %#v", x)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
s := func(q string) (ans []Tok) {
|
||||||
|
l := NewLexer(q)
|
||||||
|
for {
|
||||||
|
w := l.Next()
|
||||||
|
if w.Err != nil {
|
||||||
|
t.Fatal(w.Err)
|
||||||
|
}
|
||||||
|
if w.Value == "" {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
ans = append(ans, Tok{w.Pos, w.Value})
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for q, expected := range map[string][]Tok{
|
||||||
|
`"ab"`: {{0, "ab"}},
|
||||||
|
`x "ab"y \m`: {{0, `x`}, {2, `aby`}, {8, `m`}},
|
||||||
|
`x'y"\z'1`: {{0, `xy"\z1`}},
|
||||||
|
`\abc\ d`: {{0, `abc d`}},
|
||||||
|
``: nil,
|
||||||
|
` `: nil,
|
||||||
|
" \tabc\n\t\r ": {{2, "abc"}},
|
||||||
|
} {
|
||||||
|
if diff := cmp.Diff(expected, s(q)); diff != "" {
|
||||||
|
t.Fatalf("Failed for string: %#v\n%s", q, diff)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestSplitForCompletion(t *testing.T) {
|
func TestSplitForCompletion(t *testing.T) {
|
||||||
@@ -108,7 +90,7 @@ func TestSplitForCompletion(t *testing.T) {
|
|||||||
t.Fatalf("Failed to split: %s\n%s", cmdline, diff)
|
t.Fatalf("Failed to split: %s\n%s", cmdline, diff)
|
||||||
}
|
}
|
||||||
if last_arg_pos != actual_pos {
|
if last_arg_pos != actual_pos {
|
||||||
t.Fatalf("Failed to split: %s\n Last arg pos: %d != %d", cmdline, last_arg_pos, actual_pos)
|
t.Fatalf("Failed to split: %#v\n Last arg pos: %d != %d", cmdline, last_arg_pos, actual_pos)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
test("a b", 2, "a", "b")
|
test("a b", 2, "a", "b")
|
||||||
|
|||||||
Reference in New Issue
Block a user