diff --git a/tools/utils/shlex/shlex.go b/tools/utils/shlex/shlex.go index 70f908ceb..36439b3ae 100644 --- a/tools/utils/shlex/shlex.go +++ b/tools/utils/shlex/shlex.go @@ -101,10 +101,24 @@ const ( CommentToken ) +func (t TokenType) String() string { + switch t { + default: + return "UnknownToken" + case WordToken: + return "WordToken" + case SpaceToken: + return "SpaceToken" + case CommentToken: + return "CommentToken" + } +} + // Lexer state machine states const ( startState lexerState = iota // no runes have been seen inWordState // processing regular runes in a word + inSpaceState // processing runes in a space escapingState // we have just consumed an escape rune; the next rune is literal escapingQuotedState // we have just consumed an escape rune within a quoted string quotingEscapingState // we are within a quoted string that supports escaping ("...") @@ -157,8 +171,8 @@ func (l *Lexer) Next() (string, error) { switch token.tokenType { case WordToken: return token.value, nil - case CommentToken: - // skip comments + case CommentToken, SpaceToken: + // skip comments and spaces default: return "", fmt.Errorf("Unknown token type: %v", token.tokenType) } @@ -170,6 +184,11 @@ type Tokenizer struct { input io.RuneReader classifier tokenClassifier pos int64 + redo_rune struct { + char rune + sz int + rune_type runeTokenClass + } } // NewTokenizer creates a new tokenizer from an input stream. @@ -196,9 +215,22 @@ func (t *Tokenizer) scanStream() (*Token, error) { var err error var sz int + unread_rune := func() { + t.redo_rune.sz = sz + t.redo_rune.char = nextRune + t.redo_rune.rune_type = nextRuneType + t.pos -= int64(sz) + } + for { - nextRune, sz, err = t.input.ReadRune() - nextRuneType = t.classifier.ClassifyRune(nextRune) + if t.redo_rune.sz > 0 { + nextRune, sz = t.redo_rune.char, t.redo_rune.sz + nextRuneType = t.redo_rune.rune_type + t.redo_rune.sz = 0 + } else { + nextRune, sz, err = t.input.ReadRune() + nextRuneType = t.classifier.ClassifyRune(nextRune) + } if err == io.EOF { nextRuneType = eofRuneClass @@ -218,6 +250,9 @@ func (t *Tokenizer) scanStream() (*Token, error) { } case spaceRuneClass: { + tokenType = SpaceToken + value = append(value, nextRune) + state = inSpaceState } case escapingQuoteRuneClass: { @@ -247,6 +282,23 @@ func (t *Tokenizer) scanStream() (*Token, error) { } } } + case inSpaceState: // in a sequence of spaces separating words + { + switch nextRuneType { + case spaceRuneClass: + { + value = append(value, nextRune) + } + default: + { + token := &Token{ + tokenType: tokenType, + value: string(value)} + unread_rune() + return token, err + } + } + } case inWordState: // in a regular word { switch nextRuneType { @@ -262,6 +314,7 @@ func (t *Tokenizer) scanStream() (*Token, error) { token := &Token{ tokenType: tokenType, value: string(value)} + unread_rune() return token, err } case escapingQuoteRuneClass: diff --git a/tools/utils/shlex/shlex_test.go b/tools/utils/shlex/shlex_test.go index c53cff01a..034a41988 100644 --- a/tools/utils/shlex/shlex_test.go +++ b/tools/utils/shlex/shlex_test.go @@ -46,14 +46,23 @@ func TestTokenizer(t *testing.T) { testInput := testString expectedTokens := []*Token{ {WordToken, "one"}, + {SpaceToken, " "}, {WordToken, "two"}, + {SpaceToken, " "}, {WordToken, "three four"}, + {SpaceToken, " "}, {WordToken, "five \"six\""}, + {SpaceToken, " "}, {WordToken, "seven#eight"}, + {SpaceToken, " "}, {CommentToken, " nine # ten"}, + {SpaceToken, " "}, {WordToken, "eleven"}, + {SpaceToken, " "}, {WordToken, "twelve\\"}, + {SpaceToken, " "}, {WordToken, "thirteen=13"}, + {SpaceToken, " "}, {WordToken, "fourteen/14"}} tokenizer := NewTokenizer(strings.NewReader(testInput)) @@ -63,7 +72,7 @@ func TestTokenizer(t *testing.T) { t.Error(err) } if !got.Equal(want) { - t.Errorf("Tokenizer.Next()[%v] of %q -> %v. Want: %v", i, testString, got, want) + t.Errorf("Tokenizer.Next()[%v] of %q -> %#v. Want: %#v", i, testString, got, want) } } }