mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-08 14:18:26 +02:00
Add support for ANSI-C quoted strings to shlex
This commit is contained in:
@@ -1527,7 +1527,7 @@ class AES256GCMDecrypt:
|
|||||||
|
|
||||||
|
|
||||||
class Shlex:
|
class Shlex:
|
||||||
def __init__(self, src: str): ...
|
def __init__(self, src: str, allow_ansi_quoted_strings: bool = False): ...
|
||||||
def next_word(self) -> Tuple[int, str]: ...
|
def next_word(self) -> Tuple[int, str]: ...
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
123
kitty/shlex.c
123
kitty/shlex.c
@@ -7,13 +7,13 @@
|
|||||||
|
|
||||||
#include "data-types.h"
|
#include "data-types.h"
|
||||||
|
|
||||||
typedef enum { NORMAL, WORD, STRING_WITHOUT_ESCAPES, STRING_WITH_ESCAPES, } State;
|
typedef enum { NORMAL, WORD, STRING_WITHOUT_ESCAPES, STRING_WITH_ESCAPES, ANSI_C_QUOTED } State;
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
|
|
||||||
PyObject *src, *buf;
|
PyObject *src, *buf;
|
||||||
Py_ssize_t src_sz, src_pos, word_start, buf_pos;
|
Py_ssize_t src_sz, src_pos, word_start, buf_pos;
|
||||||
int kind; void *src_data, *buf_data;
|
int kind, support_ansi_c_quoting, output_kind; void *src_data, *buf_data;
|
||||||
State state;
|
State state;
|
||||||
} Shlex;
|
} Shlex;
|
||||||
|
|
||||||
@@ -24,15 +24,17 @@ new_shlex_object(PyTypeObject *type, PyObject *args, PyObject UNUSED *kwds) {
|
|||||||
self = (Shlex *)type->tp_alloc(type, 0);
|
self = (Shlex *)type->tp_alloc(type, 0);
|
||||||
if (self) {
|
if (self) {
|
||||||
PyObject *src;
|
PyObject *src;
|
||||||
if (!PyArg_ParseTuple(args, "U", &src)) return NULL;
|
self->support_ansi_c_quoting = 0;
|
||||||
|
if (!PyArg_ParseTuple(args, "U|p", &src, &self->support_ansi_c_quoting)) return NULL;
|
||||||
self->src_sz = PyUnicode_GET_LENGTH(src);
|
self->src_sz = PyUnicode_GET_LENGTH(src);
|
||||||
self->buf = PyUnicode_New(self->src_sz, PyUnicode_MAX_CHAR_VALUE(src));
|
self->buf = PyUnicode_New(self->src_sz, self->support_ansi_c_quoting ? 1114111 : PyUnicode_MAX_CHAR_VALUE(src));
|
||||||
if (self->buf) {
|
if (self->buf) {
|
||||||
self->src = src;
|
self->src = src;
|
||||||
Py_INCREF(src);
|
Py_INCREF(src);
|
||||||
self->kind = PyUnicode_KIND(src);
|
self->kind = PyUnicode_KIND(src);
|
||||||
self->src_data = PyUnicode_DATA(src);
|
self->src_data = PyUnicode_DATA(src);
|
||||||
self->buf_data = PyUnicode_DATA(self->buf);
|
self->buf_data = PyUnicode_DATA(self->buf);
|
||||||
|
self->output_kind = PyUnicode_KIND(self->buf);
|
||||||
} else Py_CLEAR(self);
|
} else Py_CLEAR(self);
|
||||||
}
|
}
|
||||||
return (PyObject*) self;
|
return (PyObject*) self;
|
||||||
@@ -57,7 +59,7 @@ start_word(Shlex *self) {
|
|||||||
|
|
||||||
static void
|
static void
|
||||||
write_ch(Shlex *self, Py_UCS4 ch) {
|
write_ch(Shlex *self, Py_UCS4 ch) {
|
||||||
PyUnicode_WRITE(self->kind, self->buf_data, self->buf_pos, ch); self->buf_pos++;
|
PyUnicode_WRITE(self->output_kind, self->buf_data, self->buf_pos, ch); self->buf_pos++;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
@@ -66,16 +68,93 @@ get_word(Shlex *self) {
|
|||||||
return Py_BuildValue("nN", self->word_start, PyUnicode_Substring(self->buf, 0, pos));
|
return Py_BuildValue("nN", self->word_start, PyUnicode_Substring(self->buf, 0, pos));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Py_UCS4
|
||||||
|
read_ch(Shlex *self) {
|
||||||
|
Py_UCS4 nch = PyUnicode_READ(self->kind, self->src_data, self->src_pos); self->src_pos++;
|
||||||
|
return nch;
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
write_escape_ch(Shlex *self) {
|
write_escape_ch(Shlex *self) {
|
||||||
if (self->src_pos < self->src_sz) {
|
if (self->src_pos < self->src_sz) {
|
||||||
Py_UCS4 nch = PyUnicode_READ(self->kind, self->src_data, self->src_pos); self->src_pos++;
|
Py_UCS4 nch = read_ch(self);
|
||||||
write_ch(self, nch);
|
write_ch(self, nch);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
write_control_ch(Shlex *self) {
|
||||||
|
if (self->src_pos >= self->src_sz) { PyErr_SetString(PyExc_ValueError, "Trailing \\c escape at end of input data"); return false; }
|
||||||
|
Py_UCS4 ch = read_ch(self);
|
||||||
|
write_ch(self, ch & 31);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
read_valid_digits(Shlex *self, int max, char *output, bool(*is_valid)(Py_UCS4 ch)) {
|
||||||
|
for (int i = 0; i < max && self->src_pos < self->src_sz; i++) {
|
||||||
|
Py_UCS4 ch = PyUnicode_READ(self->kind, self->src_data, self->src_pos);
|
||||||
|
if (!is_valid(ch)) break;
|
||||||
|
output[0] = ch;
|
||||||
|
self->src_pos++; output++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_octal_digit(Py_UCS4 ch) { return '0' <= ch && ch <= '7'; }
|
||||||
|
|
||||||
|
static bool
|
||||||
|
is_hex_digit(Py_UCS4 ch) { return ('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f') || ('A' <= ch && ch <= 'F'); }
|
||||||
|
|
||||||
|
static void
|
||||||
|
write_octal_ch(Shlex *self, Py_UCS4 ch) {
|
||||||
|
char chars[4] = {ch, 0, 0, 0};
|
||||||
|
read_valid_digits(self, 2, chars + 1, is_octal_digit);
|
||||||
|
write_ch(self, strtol(chars, NULL, 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
write_unicode_ch(Shlex *self, int max) {
|
||||||
|
char chars[16] = {0};
|
||||||
|
read_valid_digits(self, max, chars, is_hex_digit);
|
||||||
|
if (!chars[0]) { PyErr_SetString(PyExc_ValueError, "Trailing unicode escape at end of input data"); return false; }
|
||||||
|
write_ch(self, strtol(chars, NULL, 16));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
write_ansi_escape_ch(Shlex *self) {
|
||||||
|
if (self->src_pos >= self->src_sz) { PyErr_SetString(PyExc_ValueError, "Trailing backslash at end of input data"); return false; }
|
||||||
|
Py_UCS4 ch = read_ch(self);
|
||||||
|
switch(ch) {
|
||||||
|
case 'a': write_ch(self, '\a'); return true;
|
||||||
|
case 'b': write_ch(self, '\b'); return true;
|
||||||
|
case 'e': case 'E': write_ch(self, 0x1b); return true;
|
||||||
|
case 'f': write_ch(self, '\f'); return true;
|
||||||
|
case 'n': write_ch(self, '\n'); return true;
|
||||||
|
case 'r': write_ch(self, '\r'); return true;
|
||||||
|
case 't': write_ch(self, '\t'); return true;
|
||||||
|
case 'v': write_ch(self, '\v'); return true;
|
||||||
|
case '\\': write_ch(self, '\\'); return true;
|
||||||
|
case '\'': write_ch(self, '\''); return true;
|
||||||
|
case '\"': write_ch(self, '\"'); return true;
|
||||||
|
case '\?': write_ch(self, '\?'); return true;
|
||||||
|
|
||||||
|
case 'c': return write_control_ch(self);
|
||||||
|
case 'x': return write_unicode_ch(self, 2);
|
||||||
|
case 'u': return write_unicode_ch(self, 4);
|
||||||
|
case 'U': return write_unicode_ch(self, 8);
|
||||||
|
START_ALLOW_CASE_RANGE
|
||||||
|
case '0' ... '7': write_octal_ch(self, ch); return true;
|
||||||
|
END_ALLOW_CASE_RANGE
|
||||||
|
|
||||||
|
default:
|
||||||
|
write_ch(self, ch); return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
set_state(Shlex *self, State s) {
|
set_state(Shlex *self, State s) {
|
||||||
self->state = s;
|
self->state = s;
|
||||||
@@ -85,8 +164,9 @@ static PyObject*
|
|||||||
next_word(Shlex *self, PyObject *args UNUSED) {
|
next_word(Shlex *self, PyObject *args UNUSED) {
|
||||||
#define write_escaped_or_fail() if (!write_escape_ch(self)) { PyErr_SetString(PyExc_ValueError, "Trailing backslash at end of input data"); return NULL; }
|
#define write_escaped_or_fail() if (!write_escape_ch(self)) { PyErr_SetString(PyExc_ValueError, "Trailing backslash at end of input data"); return NULL; }
|
||||||
|
|
||||||
|
Py_UCS4 prev_word_ch = 0;
|
||||||
while (self->src_pos < self->src_sz) {
|
while (self->src_pos < self->src_sz) {
|
||||||
Py_UCS4 ch = PyUnicode_READ(self->kind, self->src_data, self->src_pos); self->src_pos++;
|
Py_UCS4 ch = read_ch(self);
|
||||||
switch(self->state) {
|
switch(self->state) {
|
||||||
case NORMAL:
|
case NORMAL:
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
@@ -94,32 +174,35 @@ next_word(Shlex *self, PyObject *args UNUSED) {
|
|||||||
case STRING_WITH_ESCAPES_DELIM: set_state(self, STRING_WITH_ESCAPES); start_word(self); break;
|
case STRING_WITH_ESCAPES_DELIM: set_state(self, STRING_WITH_ESCAPES); start_word(self); break;
|
||||||
case STRING_WITHOUT_ESCAPES_DELIM: set_state(self, STRING_WITHOUT_ESCAPES); start_word(self); break;
|
case STRING_WITHOUT_ESCAPES_DELIM: set_state(self, STRING_WITHOUT_ESCAPES); start_word(self); break;
|
||||||
case ESCAPE_CHAR: start_word(self); write_escaped_or_fail(); set_state(self, WORD); break;
|
case ESCAPE_CHAR: start_word(self); write_escaped_or_fail(); set_state(self, WORD); break;
|
||||||
default: set_state(self, WORD); start_word(self); write_ch(self, ch); break;
|
default: set_state(self, WORD); start_word(self); write_ch(self, ch); prev_word_ch = ch; break;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case WORD:
|
case WORD:
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
case WHITESPACE: set_state(self, NORMAL); if (self->buf_pos) return get_word(self); break;
|
case WHITESPACE: set_state(self, NORMAL); if (self->buf_pos) return get_word(self); break;
|
||||||
case STRING_WITH_ESCAPES_DELIM: set_state(self, STRING_WITH_ESCAPES); break;
|
case STRING_WITH_ESCAPES_DELIM: set_state(self, STRING_WITH_ESCAPES); break;
|
||||||
case STRING_WITHOUT_ESCAPES_DELIM: set_state(self, STRING_WITHOUT_ESCAPES); break;
|
case STRING_WITHOUT_ESCAPES_DELIM:
|
||||||
|
if (self->support_ansi_c_quoting && prev_word_ch == '$') { self->buf_pos--; set_state(self, ANSI_C_QUOTED); }
|
||||||
|
else set_state(self, STRING_WITHOUT_ESCAPES);
|
||||||
|
break;
|
||||||
case ESCAPE_CHAR: write_escaped_or_fail(); break;
|
case ESCAPE_CHAR: write_escaped_or_fail(); break;
|
||||||
default: write_ch(self, ch); break;
|
default: write_ch(self, ch); prev_word_ch = ch; break;
|
||||||
} break;
|
} break;
|
||||||
case STRING_WITHOUT_ESCAPES:
|
case STRING_WITHOUT_ESCAPES:
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
case STRING_WITHOUT_ESCAPES_DELIM:
|
case STRING_WITHOUT_ESCAPES_DELIM: set_state(self, WORD); break;
|
||||||
set_state(self, WORD);
|
|
||||||
break;
|
|
||||||
default: write_ch(self, ch); break;
|
default: write_ch(self, ch); break;
|
||||||
} break;
|
} break;
|
||||||
case STRING_WITH_ESCAPES:
|
case STRING_WITH_ESCAPES:
|
||||||
switch(ch) {
|
switch(ch) {
|
||||||
case STRING_WITH_ESCAPES_DELIM:
|
case STRING_WITH_ESCAPES_DELIM: set_state(self, WORD); break;
|
||||||
set_state(self, WORD);
|
case ESCAPE_CHAR: write_escaped_or_fail(); break;
|
||||||
break;
|
default: write_ch(self, ch); break;
|
||||||
case ESCAPE_CHAR:
|
} break;
|
||||||
write_escape_ch(self);
|
case ANSI_C_QUOTED:
|
||||||
break;
|
switch(ch) {
|
||||||
|
case STRING_WITHOUT_ESCAPES_DELIM: set_state(self, WORD); break;
|
||||||
|
case ESCAPE_CHAR: if (!write_ansi_escape_ch(self)) return NULL; break;
|
||||||
default: write_ch(self, ch); break;
|
default: write_ch(self, ch); break;
|
||||||
} break;
|
} break;
|
||||||
}
|
}
|
||||||
@@ -129,7 +212,7 @@ next_word(Shlex *self, PyObject *args UNUSED) {
|
|||||||
self->state = NORMAL;
|
self->state = NORMAL;
|
||||||
if (self->buf_pos) return get_word(self);
|
if (self->buf_pos) return get_word(self);
|
||||||
break;
|
break;
|
||||||
case STRING_WITH_ESCAPES: case STRING_WITHOUT_ESCAPES:
|
case STRING_WITH_ESCAPES: case STRING_WITHOUT_ESCAPES: case ANSI_C_QUOTED:
|
||||||
PyErr_SetString(PyExc_ValueError, "Unterminated string at the end of input");
|
PyErr_SetString(PyExc_ValueError, "Unterminated string at the end of input");
|
||||||
self->state = NORMAL;
|
self->state = NORMAL;
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
@@ -1227,14 +1227,14 @@ def key_val_matcher(items: Iterable[Tuple[str, str]], key_pat: 're.Pattern[str]'
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def shlex_split(text: str) -> Iterator[str]:
|
def shlex_split(text: str, allow_ansi_quoted_strings: bool = False) -> Iterator[str]:
|
||||||
s = Shlex(text)
|
s = Shlex(text, allow_ansi_quoted_strings)
|
||||||
while (q := s.next_word())[0] > -1:
|
while (q := s.next_word())[0] > -1:
|
||||||
yield q[1]
|
yield q[1]
|
||||||
|
|
||||||
|
|
||||||
def shlex_split_with_positions(text: str) -> Iterator[Tuple[int, str]]:
|
def shlex_split_with_positions(text: str, allow_ansi_quoted_strings: bool = False) -> Iterator[Tuple[int, str]]:
|
||||||
s = Shlex(text)
|
s = Shlex(text, allow_ansi_quoted_strings)
|
||||||
while (q := s.next_word())[0] > -1:
|
while (q := s.next_word())[0] > -1:
|
||||||
yield q
|
yield q
|
||||||
|
|
||||||
|
|||||||
@@ -629,7 +629,7 @@ class TestDataTypes(BaseTest):
|
|||||||
|
|
||||||
def test_shlex_split(self):
|
def test_shlex_split(self):
|
||||||
for bad in (
|
for bad in (
|
||||||
'abc\\', '\\', "'abc", "'", '"', 'asd' + '\\',
|
'abc\\', '\\', "'abc", "'", '"', 'asd' + '\\', r'"a\"', '"a\\',
|
||||||
):
|
):
|
||||||
with self.assertRaises(ValueError, msg=f'Failed to raise exception for {bad!r}'):
|
with self.assertRaises(ValueError, msg=f'Failed to raise exception for {bad!r}'):
|
||||||
tuple(shlex_split_with_positions(bad))
|
tuple(shlex_split_with_positions(bad))
|
||||||
@@ -640,6 +640,24 @@ class TestDataTypes(BaseTest):
|
|||||||
r'''x'y"\z'1''': ((0, 'xy"\\z1'),),
|
r'''x'y"\z'1''': ((0, 'xy"\\z1'),),
|
||||||
r'\abc\ d': ((0, 'abc d'),),
|
r'\abc\ d': ((0, 'abc d'),),
|
||||||
'': (), ' ': (), ' \tabc\n\t\r ': ((2, 'abc'),),
|
'': (), ' ': (), ' \tabc\n\t\r ': ((2, 'abc'),),
|
||||||
|
"$'ab'": ((0, '$ab'),),
|
||||||
}.items():
|
}.items():
|
||||||
actual = tuple(shlex_split_with_positions(q))
|
actual = tuple(shlex_split_with_positions(q))
|
||||||
self.ae(expected, actual, f'Failed for text: {q!r}')
|
self.ae(expected, actual, f'Failed for text: {q!r}')
|
||||||
|
|
||||||
|
for q, expected in {
|
||||||
|
"$'ab'": ((0, 'ab'),),
|
||||||
|
"1$'ab'": ((0, '1ab'),),
|
||||||
|
'''"1$'ab'"''': ((0, "1$'ab'"),),
|
||||||
|
r"$'a\123b'": ((0, 'a\123b'),),
|
||||||
|
r"$'a\1b'": ((0, 'a\001b'),),
|
||||||
|
r"$'a\12b'": ((0, 'a\012b'),),
|
||||||
|
r"$'a\db'": ((0, 'adb'),),
|
||||||
|
r"$'a\x1bb'": ((0, 'a\x1bb'),),
|
||||||
|
r"$'\u123z'": ((0, '\u0123z'),),
|
||||||
|
r"$'\U0001F1E8'": ((0, '\U0001F1E8'),),
|
||||||
|
r"$'\U1F1E8'": ((0, '\U0001F1E8'),),
|
||||||
|
r"$'a\U1F1E8'b": ((0, 'a\U0001F1E8b'),),
|
||||||
|
}.items():
|
||||||
|
actual = tuple(shlex_split_with_positions(q, True))
|
||||||
|
self.ae(expected, actual, f'Failed for text: {q!r}')
|
||||||
|
|||||||
Reference in New Issue
Block a user