From 1a38b6046368965ec57bb03f5f0a612f12f36ad7 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 21 Jun 2024 05:53:21 +0530 Subject: [PATCH] URL detection: Fix IPv6 hostnames breaking URL detection Fixes #7565 --- docs/changelog.rst | 2 ++ kitty/line.c | 41 +++++++++++++++++++++++++++++++---------- kitty/lineops.h | 4 ++-- kitty/screen.c | 30 +++++++++++++++++++++++------- kitty_tests/screen.py | 11 ++++++++--- 5 files changed, 66 insertions(+), 22 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 9339bd84c..12958a81d 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -65,6 +65,8 @@ Detailed list of changes - Fix :opt:`scrollback_indicator_opacity` not actually controlling the opacity (:iss:`7557`) +- URL detection: Fix IPv6 hostnames breaking URL detection (:iss:`7565`) + 0.35.1 [2024-05-31] ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/kitty/line.c b/kitty/line.c index 1ab1bb5fe..66b6b4793 100644 --- a/kitty/line.c +++ b/kitty/line.c @@ -50,6 +50,11 @@ cell_text(CPUCell *cell) { // URL detection {{{ +static bool +is_hostname_char(char_type ch) { + return ch == '[' || ch == ']' || is_url_char(ch); +} + static index_type find_colon_slash(Line *self, index_type x, index_type limit) { // Find :// at or before x @@ -60,7 +65,7 @@ find_colon_slash(Line *self, index_type x, index_type limit) { if (pos < limit) return 0; do { char_type ch = self->cpu_cells[pos].ch; - if (!is_url_char(ch)) return false; + if (!is_hostname_char(ch)) return false; if (pos == x) { if (ch == ':') { if (pos + 2 < self->xnum && self->cpu_cells[pos+1].ch == '/' && self->cpu_cells[pos + 2].ch == '/') state = SECOND_SLASH; @@ -108,9 +113,15 @@ has_url_prefix_at(Line *self, index_type at, index_type min_prefix_len, index_ty #define MIN_URL_LEN 5 static bool -has_url_beyond(Line *self, index_type x) { +has_url_beyond_colon_slash(Line *self, index_type x) { + unsigned num_of_slashes = 0; for (index_type i = x; i < MIN(x + MIN_URL_LEN + 3, self->xnum); i++) { - if (!is_url_char(self->cpu_cells[i].ch)) return false; + const char_type ch = self->cpu_cells[i].ch; + if (num_of_slashes < 3) { + if (!is_hostname_char(ch)) return false; + if (ch == '/') num_of_slashes++; + } + else { if (!is_url_char(ch)) return false; } } return true; } @@ -123,30 +134,40 @@ line_url_start_at(Line *self, index_type x) { index_type ds_pos = 0, t; // First look for :// ahead of x ds_pos = find_colon_slash(self, x + OPT(url_prefixes).max_prefix_len + 3, x < 2 ? 0 : x - 2); - if (ds_pos != 0 && has_url_beyond(self, ds_pos)) { + if (ds_pos != 0 && has_url_beyond_colon_slash(self, ds_pos)) { if (has_url_prefix_at(self, ds_pos, ds_pos > x ? ds_pos - x: 0, &t)) return t; } ds_pos = find_colon_slash(self, x, 0); - if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3 || !has_url_beyond(self, ds_pos)) return self->xnum; + if (ds_pos == 0 || self->xnum < ds_pos + MIN_URL_LEN + 3 || !has_url_beyond_colon_slash(self, ds_pos)) return self->xnum; if (has_url_prefix_at(self, ds_pos, 0, &t)) return t; return self->xnum; } +static bool +is_pos_ok_for_url(Line *self, index_type x, bool in_hostname, index_type last_hostname_char_pos) { + if (x >= self->xnum) return false; + if (in_hostname && x <= last_hostname_char_pos) return is_hostname_char(self->cpu_cells[x].ch); + return is_url_char(self->cpu_cells[x].ch); +} + index_type -line_url_end_at(Line *self, index_type x, bool check_short, char_type sentinel, bool next_line_starts_with_url_chars) { +line_url_end_at(Line *self, index_type x, bool check_short, char_type sentinel, bool next_line_starts_with_url_chars, bool in_hostname, index_type last_hostname_char_pos) { index_type ans = x; if (x >= self->xnum || (check_short && self->xnum <= MIN_URL_LEN + 3)) return 0; - if (sentinel) { while (ans < self->xnum && self->cpu_cells[ans].ch != sentinel && is_url_char(self->cpu_cells[ans].ch)) ans++; } - else { while (ans < self->xnum && is_url_char(self->cpu_cells[ans].ch)) ans++; } +#define pos_ok(x) is_pos_ok_for_url(self, x, in_hostname, last_hostname_char_pos) + if (sentinel) { while (ans < self->xnum && self->cpu_cells[ans].ch != sentinel && pos_ok(ans)) ans++; } + else { while (ans < self->xnum && pos_ok(ans)) ans++; } if (ans) ans--; if (ans < self->xnum - 1 || !next_line_starts_with_url_chars) { while (ans > x && can_strip_from_end_of_url(self->cpu_cells[ans].ch)) ans--; } +#undef pos_ok return ans; } bool -line_startswith_url_chars(Line *self) { +line_startswith_url_chars(Line *self, bool in_hostname) { + if (in_hostname) return is_hostname_char(self->cpu_cells[0].ch); return is_url_char(self->cpu_cells[0].ch); } @@ -163,7 +184,7 @@ url_end_at(Line *self, PyObject *args) { unsigned int x, sentinel = 0; int next_line_starts_with_url_chars = 0; if (!PyArg_ParseTuple(args, "I|Ip", &x, &sentinel, &next_line_starts_with_url_chars)) return NULL; - return PyLong_FromUnsignedLong((unsigned long)line_url_end_at(self, x, true, sentinel, next_line_starts_with_url_chars)); + return PyLong_FromUnsignedLong((unsigned long)line_url_end_at(self, x, true, sentinel, next_line_starts_with_url_chars, false, self->xnum)); } // }}} diff --git a/kitty/lineops.h b/kitty/lineops.h index 277dbee2e..c13b77898 100644 --- a/kitty/lineops.h +++ b/kitty/lineops.h @@ -89,8 +89,8 @@ void line_set_char(Line *, unsigned int , uint32_t , unsigned int , Cursor *, hy void line_right_shift(Line *, unsigned int , unsigned int ); void line_add_combining_char(CPUCell *, GPUCell *, uint32_t , unsigned int ); index_type line_url_start_at(Line *self, index_type x); -index_type line_url_end_at(Line *self, index_type x, bool, char_type, bool); -bool line_startswith_url_chars(Line*); +index_type line_url_end_at(Line *self, index_type x, bool, char_type, bool, bool, index_type); +bool line_startswith_url_chars(Line*, bool); bool line_as_ansi(Line *self, ANSIBuf *output, const GPUCell**, index_type start_at, index_type stop_before, char_type prefix_char) __attribute__((nonnull)); unsigned int line_length(Line *self); size_t cell_as_unicode(CPUCell *cell, bool include_cc, Py_UCS4 *buf, char_type); diff --git a/kitty/screen.c b/kitty/screen.c index c753b8a5f..8fc26a4a6 100644 --- a/kitty/screen.c +++ b/kitty/screen.c @@ -3189,25 +3189,35 @@ screen_open_url(Screen *self) { // URLs {{{ static void -extend_url(Screen *screen, Line *line, index_type *x, index_type *y, char_type sentinel, bool newlines_allowed) { +extend_url(Screen *screen, Line *line, index_type *x, index_type *y, char_type sentinel, bool newlines_allowed, index_type last_hostname_char_pos) { unsigned int count = 0; bool has_newline = false; index_type orig_y = *y; while(count++ < 10) { + bool in_hostname = last_hostname_char_pos >= line->xnum; has_newline = !line->gpu_cells[line->xnum-1].attrs.next_char_was_wrapped; if (*x != line->xnum - 1 || (!newlines_allowed && has_newline)) break; bool next_line_starts_with_url_chars = false; line = screen_visual_line(screen, *y + 2); if (line) { - next_line_starts_with_url_chars = line_startswith_url_chars(line); + next_line_starts_with_url_chars = line_startswith_url_chars(line, in_hostname); has_newline = !line->attrs.is_continued; if (next_line_starts_with_url_chars && has_newline && !newlines_allowed) next_line_starts_with_url_chars = false; if (sentinel && next_line_starts_with_url_chars && line->cpu_cells[0].ch == sentinel) next_line_starts_with_url_chars = false; } line = screen_visual_line(screen, *y + 1); if (!line) break; - index_type new_x = line_url_end_at(line, 0, false, sentinel, next_line_starts_with_url_chars); - if (!new_x && !line_startswith_url_chars(line)) break; + if (in_hostname) { + for (last_hostname_char_pos = 0; last_hostname_char_pos < line->xnum; last_hostname_char_pos++) { + if (line->cpu_cells[last_hostname_char_pos].ch == '/') { + if (last_hostname_char_pos > 0) last_hostname_char_pos--; + else { in_hostname = false; last_hostname_char_pos = line->xnum; } + break; + } + } + } + index_type new_x = line_url_end_at(line, 0, false, sentinel, next_line_starts_with_url_chars, in_hostname, last_hostname_char_pos); + if (!new_x && !line_startswith_url_chars(line, in_hostname)) break; *y += 1; *x = new_x; } if (sentinel && *x == 0 && *y > orig_y) { @@ -3254,24 +3264,30 @@ screen_detect_url(Screen *screen, unsigned int x, unsigned int y) { } char_type sentinel = 0; bool newlines_allowed = !is_excluded_from_url('\n'); + index_type last_hostname_char_pos = screen->columns; if (line) { url_start = line_url_start_at(line, x); if (url_start < line->xnum) { bool next_line_starts_with_url_chars = false; if (y < screen->lines - 1) { line = screen_visual_line(screen, y+1); - next_line_starts_with_url_chars = line_startswith_url_chars(line); + next_line_starts_with_url_chars = line_startswith_url_chars(line, last_hostname_char_pos >= line->xnum); if (next_line_starts_with_url_chars && !newlines_allowed && !line->attrs.is_continued) next_line_starts_with_url_chars = false; line = screen_visual_line(screen, y); } sentinel = get_url_sentinel(line, url_start); - url_end = line_url_end_at(line, x, true, sentinel, next_line_starts_with_url_chars); + index_type slash_count = 0; + for (index_type i = url_start; i < line->xnum; i++) { + const char_type ch = line->cpu_cells[i].ch; + if (ch == '/' && ++slash_count > 2) { last_hostname_char_pos = i - 1; break; } + } + url_end = line_url_end_at(line, x, true, sentinel, next_line_starts_with_url_chars, x <= last_hostname_char_pos, last_hostname_char_pos); } has_url = url_end > url_start; } if (has_url) { index_type y_extended = y; - extend_url(screen, line, &url_end, &y_extended, sentinel, newlines_allowed); + extend_url(screen, line, &url_end, &y_extended, sentinel, newlines_allowed, last_hostname_char_pos); screen_mark_url(screen, url_start, y, url_end, y_extended); } else { screen_mark_url(screen, 0, 0, 0, 0); diff --git a/kitty_tests/screen.py b/kitty_tests/screen.py index 017ce6077..754cf972b 100644 --- a/kitty_tests/screen.py +++ b/kitty_tests/screen.py @@ -1028,12 +1028,12 @@ class TestScreen(BaseTest): url = ''.join(s.text_for_marked_url()) self.assertEqual(expected, url) - def t(url, x=0, y=0, before='', after=''): + def t(url, x=0, y=0, before='', after='', expected=''): s.reset() s.cursor.x = x s.cursor.y = y s.draw(before + url + after) - ae(url, x=x + 1 + len(before), y=y) + ae(expected or url, x=x + 1 + len(before), y=y) t('http://moo.com') @@ -1043,10 +1043,15 @@ class TestScreen(BaseTest): t('http://moo.com', before=st, after=e) for trailer in ')-=': t('http://moo.com' + trailer) - for trailer in '{([]}<>': + for trailer in '{}([<>': t('http://moo.com', after=trailer) t('http://moo.com', x=s.columns - 9) t('https://wraps-by-one-char.com', before='[', after=']') + t('http://[::1]:8080') + t('https://wr[aps-by-one-ch]ar.com') + t('http://[::1]:8080/x', after='[') + t('http://[::1]:8080/x]y34', expected='http://[::1]:8080/x') + t('https://wraps-by-one-char.com[]/x', after='[') def test_prompt_marking(self): s = self.create_screen()