mirror of
https://github.com/kovidgoyal/kitty
synced 2026-06-11 02:59:40 +02:00
Move function to detect URLs into C code
This commit is contained in:
81
kitty/line.c
81
kitty/line.c
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "data-types.h"
|
||||
#include "unicode-data.h"
|
||||
#include "lineops.h"
|
||||
|
||||
static PyObject *
|
||||
@@ -51,6 +52,85 @@ line_text_at(char_type ch, combining_type cc) {
|
||||
return ans;
|
||||
}
|
||||
|
||||
static const char* url_prefixes[4] = {"https", "http", "file", "ftp"};
|
||||
static size_t url_prefix_lengths[sizeof(url_prefixes)/sizeof(url_prefixes[0])] = {0};
|
||||
typedef enum URL_PARSER_STATES {ANY, FIRST_SLASH, SECOND_SLASH} URL_PARSER_STATE;
|
||||
|
||||
static inline index_type
|
||||
find_colon_slash(Line *self, index_type x, index_type limit) {
|
||||
// Find :// at or before x
|
||||
index_type pos = x;
|
||||
URL_PARSER_STATE state = ANY;
|
||||
limit = MAX(2, limit);
|
||||
if (pos < limit) return 0;
|
||||
do {
|
||||
char_type ch = self->cells[pos].ch & CHAR_MASK;
|
||||
if (!is_url_char(ch)) return false;
|
||||
switch(state) {
|
||||
case ANY:
|
||||
if (ch == '/') state = FIRST_SLASH;
|
||||
break;
|
||||
case FIRST_SLASH:
|
||||
state = ch == '/' ? SECOND_SLASH : ANY;
|
||||
break;
|
||||
case SECOND_SLASH:
|
||||
if (ch == ':') return pos;
|
||||
state = ANY;
|
||||
break;
|
||||
}
|
||||
pos--;
|
||||
} while(pos >= limit);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
prefix_matches(Line *self, index_type at, const char* prefix, index_type prefix_len) {
|
||||
if (prefix_len > at) return false;
|
||||
index_type p, i;
|
||||
for (p = at - prefix_len, i = 0; i < prefix_len && p < self->xnum; i++, p++) {
|
||||
if ((self->cells[p].ch & CHAR_MASK) != (unsigned char)prefix[i]) return false;
|
||||
}
|
||||
return i == prefix_len ? true : false;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
has_url_prefix_at(Line *self, index_type at, index_type min_prefix_len, index_type *ans) {
|
||||
if (UNLIKELY(!url_prefix_lengths[0])) {
|
||||
for (index_type i = 0; i < sizeof(url_prefixes)/sizeof(url_prefixes[0]); i++) url_prefix_lengths[i] = strlen(url_prefixes[i]);
|
||||
}
|
||||
for (index_type i = 0; i < sizeof(url_prefixes)/sizeof(url_prefixes[0]); i++) {
|
||||
index_type prefix_len = url_prefix_lengths[i];
|
||||
if (at < prefix_len || prefix_len < min_prefix_len) continue;
|
||||
if (prefix_matches(self, at, url_prefixes[i], prefix_len)) { *ans = at - prefix_len; return true; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
#define MAX_URL_SCHEME_LEN 5
|
||||
#define MIN_URL_LEN 5
|
||||
index_type
|
||||
line_url_start_at(Line *self, index_type x) {
|
||||
// Find the starting cell for a URL that contains the position x. A URL is defined as
|
||||
// known-prefix://url-chars. If no URL is found self->xnum is returned.
|
||||
if (x >= self->xnum || self->xnum <= MIN_URL_LEN + 3) return self->xnum;
|
||||
index_type ds_pos = 0, t;
|
||||
// First look for :// ahead of x
|
||||
if (self->xnum - x > MAX_URL_SCHEME_LEN + 3) ds_pos = find_colon_slash(self, x + MAX_URL_SCHEME_LEN + 3, x < 2 ? 0 : x - 2);
|
||||
if (ds_pos != 0) {
|
||||
if (has_url_prefix_at(self, ds_pos, ds_pos > x ? ds_pos - x: 0, &t)) return t;
|
||||
}
|
||||
ds_pos = find_colon_slash(self, x, 0);
|
||||
if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3) return self->xnum;
|
||||
if (has_url_prefix_at(self, ds_pos, 0, &t)) return t;
|
||||
return self->xnum;
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
url_start_at(Line *self, PyObject *x) {
|
||||
#define url_start_at_doc "url_start_at(x) -> Return the start cell number for a URL containing x or self->xnum if not found"
|
||||
return PyLong_FromUnsignedLong((unsigned long)line_url_start_at(self, PyLong_AsUnsignedLong(x)));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
text_at(Line* self, Py_ssize_t xval) {
|
||||
#define text_at_doc "[x] -> Return the text in the specified cell"
|
||||
@@ -474,6 +554,7 @@ static PyMethodDef methods[] = {
|
||||
METHOD(as_ansi, METH_NOARGS)
|
||||
METHOD(is_continued, METH_NOARGS)
|
||||
METHOD(width, METH_O)
|
||||
METHOD(url_start_at, METH_O)
|
||||
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
@@ -61,6 +61,7 @@ void line_apply_cursor(Line *self, Cursor *cursor, unsigned int at, unsigned int
|
||||
void line_set_char(Line *, unsigned int , uint32_t , unsigned int , Cursor *);
|
||||
void line_right_shift(Line *, unsigned int , unsigned int );
|
||||
void line_add_combining_char(Line *, uint32_t , unsigned int );
|
||||
index_type line_url_start_at(Line *self, index_type x);
|
||||
index_type line_as_ansi(Line *self, Py_UCS4 *buf, index_type buflen);
|
||||
unsigned int line_length(Line *self);
|
||||
PyObject* unicode_in_range(Line *self, index_type start, index_type limit, bool include_cc, char leading_char);
|
||||
|
||||
@@ -215,6 +215,27 @@ class TestDataTypes(BaseTest):
|
||||
l.set_char(0, 'x', 1, q)
|
||||
self.assertEqualAttributes(l.cursor_from(0), q)
|
||||
|
||||
def test_url_at(self):
|
||||
def create(t):
|
||||
lb = create.lb = LineBuf(1, len(t))
|
||||
l = lb.line(0)
|
||||
l.set_text(t, 0, len(t), C())
|
||||
return l
|
||||
|
||||
def lspace_test(n):
|
||||
l = create(' ' * n + 'http://acme.com')
|
||||
for i in range(0, n):
|
||||
self.ae(l.url_start_at(i), len(l))
|
||||
for i in range(n, len(l)):
|
||||
self.ae(l.url_start_at(i), n)
|
||||
for i in range(5):
|
||||
lspace_test(i)
|
||||
l = create('b https://testing.me a')
|
||||
for s in (0, 1, len(l) - 1, len(l) - 2):
|
||||
self.ae(l.url_start_at(s), len(l), 'failed with start at: %d' % s)
|
||||
for s in range(2, len(l) - 2):
|
||||
self.ae(l.url_start_at(s), 2, 'failed with start at: %d (%s)' % (s, str(l)[s:]))
|
||||
|
||||
def rewrap(self, lb, lb2):
|
||||
hb = HistoryBuf(lb2.ynum, lb2.xnum)
|
||||
cy = lb.rewrap(lb2, hb)
|
||||
|
||||
Reference in New Issue
Block a user