diff --git a/kitty/simd-string-impl.h b/kitty/simd-string-impl.h
index 46bf7244d..8c3b13e38 100644
--- a/kitty/simd-string-impl.h
+++ b/kitty/simd-string-impl.h
@@ -541,7 +541,7 @@ FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src_data, size_t src_len
         src_data += d->num_consumed; src_len -= d->num_consumed;
     }
     const integer_t esc_vec = set1_epi8(0x1b);
-    const integer_t zero = create_zero_integer(), one = set1_epi8(1), two = set1_epi8(2), three = set1_epi8(3), numbered = numbered_bytes();
+    const integer_t zero = create_zero_integer(), one = set1_epi8(1), two = set1_epi8(2), three = set1_epi8(3), four = set1_epi8(4), numbered = numbered_bytes();
     const uint8_t *limit = src_data + src_len, *p = src_data, *start_of_current_chunk = src_data;
     bool sentinel_found = false;
     unsigned chunk_src_sz = 0;
@@ -593,10 +593,14 @@ start_classification:
             handle_trailing_bytes();
             continue;
         }
-        // Classify the bytes
+        // Classify the bytes by whether they may be the start of a 2-byte, 3-byte, or 4-byte sequence.
+        // This is only an initial, potential classification.
+        // 0xC0 and 0xC1 are initially classified as potential starter bytes of 2-byte sequences.
+        // And 0xF5..0xFF are initially classified as potential starter bytes of 4-byte sequences.
+        // They will be marked as actually invalid later in the chunk_is_invalid checks.
         integer_t state = set1_epi8(0x80);
         const integer_t vec_signed = add_epi8(vec, state); // needed because cmplt_epi8 works only on signed chars
-
+        // state now has 0x80 on all bytes
         const integer_t bytes_indicating_start_of_two_byte_sequence = cmplt_epi8(set1_epi8(0xc0 - 1 - 0x80), vec_signed);
         state = blendv_epi8(state, set1_epi8(0xc2), bytes_indicating_start_of_two_byte_sequence);
         // state now has 0xc2 on all bytes that start a 2 or more byte sequence and 0x80 on the rest
@@ -631,11 +635,61 @@ start_classification:
             vec = zero_last_n_bytes(vec, sizeof(integer_t) - chunk_src_sz);
             goto start_classification;
         }
+
+        // The next section performs detailed validation of the chunk's byte sequences.
+        // It accumulates validation errors into a chunk_is_invalid vector.
+        // When chunk_is_invalid has any non-zero byte, then the chunk contains invalid UTF-8.
+        // chunk_is_invalid is a vector, and not a bitmask or boolean,
+        // because the or_si SIMD operation is empirically faster than movemask_epi8 with |= or ||=.
+        integer_t chunk_is_invalid;
+
         // Only ASCII chars should have corresponding byte of counts == 0
         if (ascii_mask != movemask_epi8(cmpgt_epi8(counts, zero))) { abort_with_invalid_utf8(); }
+
         // The difference between a byte in counts and the next one should be negative,
         // zero, or one. Any other value means there is not enough continuation bytes.
-        if (!is_zero(cmpgt_epi8(subtract_epi8(shift_right_by_one_byte(counts), counts), one))) { abort_with_invalid_utf8(); }
+        chunk_is_invalid = cmpgt_epi8(subtract_epi8(shift_right_by_one_byte(counts), counts), one);
+
+        // Validate 2-byte sequence starter bytes: 0xC0..0xC1 are invalid (overlong encodings for U+0000..U+007F).
+        // Without this, "\xc0\x80" would incorrectly be decoded as a "\x00".
+        chunk_is_invalid = or_si(chunk_is_invalid, and_si(bytes_indicating_start_of_two_byte_sequence, cmplt_epi8(vec, set1_epi8(0xc2))));
+
+        // Validate 4-byte sequence starter bytes: 0xF5..0xFF are invalid (out of Unicode codespace).
+        // Without this, "\xff\x80\x80\x80" would incorrectly be decoded as an ill-formed "\U003C0000".
+        chunk_is_invalid = or_si(chunk_is_invalid, and_si(bytes_indicating_start_of_four_byte_sequence, cmpgt_epi8(vec, set1_epi8(0xf4))));
+
+        // Validate second bytes of E0-starting 3-byte sequences.
+        // 0xE0 must be followed by 0xA0..0xBF (not 0x80..0x9F) to avoid overlong encodings.
+        // Without this, "\xe0\x80\x80" would incorrectly be decoded as a "\x00".
+        const integer_t e0_starter_bytes = cmpeq_epi8(vec, set1_epi8(0xe0));
+        const integer_t e0_first_follower_bytes = shift_right_by_one_byte(e0_starter_bytes);
+        chunk_is_invalid = or_si(chunk_is_invalid, and_si(e0_first_follower_bytes, cmplt_epi8(and_si(e0_first_follower_bytes, vec), set1_epi8(0xa0))));
+
+        // Validate second bytes of ED-starting 3-byte sequences.
+        // 0xED must be followed by 0x80..0x9F (not 0xA0..0xBF) to avoid UTF-16 surrogates.
+        // Without this, "\xed\xa0\x80" would incorrectly be decoded as an isolated surrogate "\uD800".
+        const integer_t ed_starter_bytes = cmpeq_epi8(vec, set1_epi8(0xed));
+        const integer_t ed_first_follower_bytes = shift_right_by_one_byte(ed_starter_bytes);
+        chunk_is_invalid = or_si(chunk_is_invalid, and_si(ed_first_follower_bytes, cmpgt_epi8(and_si(ed_first_follower_bytes, vec), set1_epi8(0x9f))));
+
+        // Validate second bytes of F0-starting 4-byte sequences.
+        // F0 must be followed by 0x90..0xBF (not 0x80..0x8F) to avoid overlong encodings.
+        // Without this, "\xf0\x80\x80\x80" would incorrectly be decoded as a "\x0000".
+        const integer_t f0_starter_bytes = cmpeq_epi8(vec, set1_epi8(0xf0));
+        const integer_t f0_first_follower_bytes = shift_right_by_one_byte(f0_starter_bytes);
+        chunk_is_invalid = or_si(chunk_is_invalid, and_si(f0_first_follower_bytes, cmplt_epi8(and_si(f0_first_follower_bytes, vec), set1_epi8(0x90))));
+
+        // Validate second bytes of F4-starting 4-byte sequences.
+        // F4 must be followed by 0x80..0x8F (not 0x90..0xBF) to stay within the Unicode codespace.
+        // Without this, "\xf4\x90\x80\x80" would incorrectly be decoded as an ill-formed "\U00110000".
+        const integer_t f4_starter_bytes = cmpeq_epi8(vec, set1_epi8(0xf4));
+        const integer_t f4_first_follower_bytes = shift_right_by_one_byte(f4_starter_bytes);
+        chunk_is_invalid = or_si(chunk_is_invalid, and_si(f4_first_follower_bytes, cmpgt_epi8(and_si(f4_first_follower_bytes, vec), set1_epi8(0x8f))));
+
+        // Check for any accumulated validation errors and, if found,
+        // fall back to slow scalar decoding of this chunk,
+        // which handles replacement of invalid sequences with U+FFFD
+        if (!is_zero(chunk_is_invalid)) { abort_with_invalid_utf8(); }
 
         // Process the bytes storing the three resulting bytes that make up the unicode codepoint
         // mask all control bits so that we have only useful bits left
@@ -673,7 +727,7 @@ start_classification:
 
         // The last byte is made up of bits 5 and 6 from count == 3 and 3 bits from count == 4
         integer_t output3 = and_si(three, shift_right_by_bits32(vec, 4));  // bits 5 and 6 from count == 3
-        const integer_t count4_locations = cmpeq_epi8(counts, set1_epi8(4));
+        const integer_t count4_locations = cmpeq_epi8(counts, four);
         // 3 bits from count == 4 locations, placed at count == 3 locations shifted left by 2 bits
         output3 = or_si(output3,
             and_si(set1_epi8(0xfc),
diff --git a/kitty_tests/parser.py b/kitty_tests/parser.py
index b65e02b80..2e3752209 100644
--- a/kitty_tests/parser.py
+++ b/kitty_tests/parser.py
@@ -322,10 +322,6 @@ class TestParser(BaseTest):
             # Bad continuation byte (restored as ASCII)
             pb(b'"\xe1\x28\xa1"', '"\ufffd(\ufffd"')  # )
 
-            # The following all fail when using SIMD and need to be fixed in the SIMD parser
-            if which != 1:
-                continue
-
             # Overlong 2-byte sequence for U+0000 (should be `0x00`)
             pb(b'"\xc0\x80"', '"\ufffd\ufffd"')