Make shlex_split always return a token

Matches behavior of split() so is therefore more intuitive
This commit is contained in:
Kovid Goyal
2024-09-02 17:30:18 +05:30
parent d9b1c8c04f
commit 913ce58fe3
3 changed files with 10 additions and 3 deletions

View File

@@ -1215,14 +1215,22 @@ def key_val_matcher(items: Iterable[tuple[str, str]], key_pat: 're.Pattern[str]'
def shlex_split(text: str, allow_ansi_quoted_strings: bool = False) -> Iterator[str]:
s = Shlex(text, allow_ansi_quoted_strings)
yielded = False
while (q := s.next_word())[0] > -1:
yield q[1]
yielded = True
if not yielded:
yield ''
def shlex_split_with_positions(text: str, allow_ansi_quoted_strings: bool = False) -> Iterator[tuple[int, str]]:
s = Shlex(text, allow_ansi_quoted_strings)
yielded = False
while (q := s.next_word())[0] > -1:
yield q
yielded = True
if not yielded:
yield 0, ''
def timed_debug_print(*a: Any, sep: str = ' ', end: str = '\n') -> None:

View File

@@ -213,8 +213,7 @@ def compile_match_query(exp: str, is_simple: bool = True) -> MatchPatternType:
def decode_cmdline(x: str) -> str:
ctype, sep, val = x.partition('=')
if ctype == 'cmdline':
with suppress(StopIteration):
return next(shlex_split(val, True))
return next(shlex_split(val, True))
elif ctype == 'cmdline_url':
from urllib.parse import unquote
return unquote(val)

View File

@@ -629,7 +629,7 @@ class TestDataTypes(BaseTest):
r'x "ab"y \m': ((0, 'x'), (2, 'aby'), (8, 'm')),
r'''x'y"\z'1''': ((0, 'xy"\\z1'),),
r'\abc\ d': ((0, 'abc d'),),
'': (), ' ': (), ' \tabc\n\t\r ': ((2, 'abc'),),
'': ((0, ''),), ' ': ((0, ''),), ' \tabc\n\t\r ': ((2, 'abc'),),
"$'ab'": ((0, '$ab'),),
}.items():
actual = tuple(shlex_split_with_positions(q))