Get the unicode grapheme seg tests passing on screen, with minimal modification

We ignore tests including ACII control codes and we modify the results when there are grapheme breaks before zero width characters.
2026-07-02 12:44:01 +02:00 · 2025-04-10 08:33:18 +05:30
parent e976cf67fd
commit c01a941fe7
2 changed files with 41 additions and 14 deletions
--- a/kitty/screen.c
+++ b/kitty/screen.c
@@ -1101,11 +1101,6 @@ draw_control_char(Screen *self, text_loop_state *s, uint32_t ch) {
    }
 }

-static bool
-is_roundtripped_zero_width_char(char_type ch) {
-    return ch == 0xad || ch == 0x200b || ch == 0x2060;
-}
-
 static void
 draw_text_loop(Screen *self, const uint32_t *chars, size_t num_chars, text_loop_state *s) {
    init_text_loop_line(self, s);
@@ -1124,13 +1119,11 @@ draw_text_loop(Screen *self, const uint32_t *chars, size_t num_chars, text_loop_
        int char_width = wcwidth_std(cp);
        if (UNLIKELY(char_width < 1)) {
            if (char_width == 0) {
-                // check for some zero width chars that we want to preserve for
-                // round tripping that are not added to prev cell by grapheme
-                // segmentation.
-                if (s->prev.cc && is_roundtripped_zero_width_char(ch)) {  // soft hyphen, zero width space, word joiner
-                    draw_combining_char(self, s, ch);
-                }
-                continue;  // we cannot represent zero width chars except as combining chars
+                // Preserve zero width chars as combining chars even though
+                // they were not added to the prev cell by grapheme segmentation.
+                // Zero width chars can only be represented as combining chars.
+                if (s->prev.cc) draw_combining_char(self, s, ch);
+                continue;
            }
            char_width = 1;
        }
@@ -1317,7 +1310,7 @@ screen_handle_multicell_command(Screen *self, const MultiCellCommand *cmd, const
            char_type ch = self->lc->chars[i];
            CharProps cp = char_props_for(ch);
            if (cp.is_invalid) continue;
-            if ((s = grapheme_segmentation_step(s, cp)).add_to_current_cell || (wcwidth_std(cp) == 0 && is_roundtripped_zero_width_char(ch) && lc.count)) lc.chars[lc.count++] = ch;
+            if ((s = grapheme_segmentation_step(s, cp)).add_to_current_cell || (wcwidth_std(cp) == 0 && lc.count)) lc.chars[lc.count++] = ch;
            else {
                if (lc.count) handle_variable_width_multicell_command(self, mcd, &lc);
                if (wcwidth_std(cp) < 1) lc.count = 0;
--- a/kitty_tests/datatypes.py
+++ b/kitty_tests/datatypes.py
@@ -640,7 +640,41 @@ class TestDataTypes(BaseTest):
    def test_split_into_graphemes(self):
        self.assertEqual(char_props_for('\ue000')['category'], 'Co')
        self.ae(split_into_graphemes('ab'), ['a', 'b'])
+        s = self.create_screen(cols=12)
+        excluded_chars = set(range(32))
+
+        def is_excluded(text):
+            return bool(set(map(ord, text)) & excluded_chars)
+
+        def adapt_cell_text(cells):
+            for cell in cells:
+                gp = split_into_graphemes(cell)
+                if len(gp) == 1:
+                    yield cell
+                else:
+                    for i, g in enumerate(gp[:-1]):
+                        if wcswidth(gp[i+1][0]) != 0:
+                            raise AssertionError(
+                                f'cell {cell!r} contains grapheme break point at non zero width character for Test #{i}: {test["comment"]}')
+                    yield from gp
+
        for i, test in enumerate(json.loads(read_kitty_resource('GraphemeBreakTest.json', __name__.rpartition('.')[0]))):
            expected = test['data']
-            actual = split_into_graphemes(''.join(expected))
+            text = ''.join(expected)
+            actual = split_into_graphemes(text)
            self.ae(expected, actual, f'Test #{i} failed: {test["comment"]}')
+            if is_excluded(text):
+                continue
+            s.carriage_return(), s.erase_in_line()
+            s.draw(' ' + text)
+            actual = []
+            for x in range(s.cursor.x):
+                cell = s.cpu_cells(0, x)
+                if cell['x'] > 0:
+                    continue
+                ct = cell['text']
+                if x == 0:
+                    ct = ct[1:]
+                if ct:
+                    actual.append(ct)
+            self.ae(expected, list(adapt_cell_text(actual)), f'Test #{i} failed: {test["comment"]}')