Add Extended_Pictographic property

This commit is contained in:
Kovid Goyal
2025-03-13 10:01:41 +05:30
parent 039af78785
commit 98f9a568ce
3 changed files with 360 additions and 0 deletions

View File

@@ -73,6 +73,7 @@ not_assigned = set(range(0, sys.maxunicode))
property_maps: dict[str, set[int]] = defaultdict(set)
grapheme_segmentation_maps: dict[str, set[int]] = defaultdict(set)
incb_map: dict[str, set[int]] = defaultdict(set)
extended_pictographic: set[int] = set()
def parse_prop_list() -> None:
@@ -262,6 +263,7 @@ def parse_eaw() -> None:
def parse_grapheme_segmentation() -> None:
global extended_pictographic
for line in get_data('ucd/auxiliary/GraphemeBreakProperty.txt'):
chars, category = split_two(line)
grapheme_segmentation_maps[category] |= chars
@@ -274,6 +276,10 @@ def parse_grapheme_segmentation() -> None:
# there exist some InCB chars that do not have a GBP category
subcat = rest.strip().split(';')[1].strip().split()[0].strip()
incb_map[subcat] |= chars
for line in get_data('ucd/emoji/emoji-data.txt'):
chars, category = split_two(line)
if 'Extended_Pictographic#' == category:
extended_pictographic |= chars
def get_ranges(items: list[int]) -> Generator[Union[int, tuple[int, int]], None, None]:
@@ -509,6 +515,30 @@ def gen_grapheme_segmentation() -> None:
p('')
gp('')
get_cat('IndicConjunctBreak', 'indic_conjunct_break', 'IndicConjunctBreakFor', 'ICB_', incb_map)
p('''
static inline bool
is_extended_pictographic(char_type c) {
switch (c) {
default: return false;
''')
gp('''
func IsExtendedPictographic(c rune) bool {
switch c {
default: return false;
''')
for spec in get_ranges(list(extended_pictographic)):
write_case(spec, p)
p('\t\t\treturn true;')
write_case(spec, gp, for_go=True)
gp('\t\t\treturn true')
p('''
}
}''')
gp('''
}
}''')
gofmt(gof.name)

View File

@@ -3650,4 +3650,169 @@ indic_conjunct_break(const char_type c) {
return ICB_None;
}
static inline bool
is_extended_pictographic(char_type c) {
switch (c) {
default: return false;
case 0xa9:
return true;
case 0xae:
return true;
case 0x203c:
return true;
case 0x2049:
return true;
case 0x2122:
return true;
case 0x2139:
return true;
case 0x2194 ... 0x2199:
return true;
case 0x21a9 ... 0x21aa:
return true;
case 0x231a ... 0x231b:
return true;
case 0x2328:
return true;
case 0x2388:
return true;
case 0x23cf:
return true;
case 0x23e9 ... 0x23f3:
return true;
case 0x23f8 ... 0x23fa:
return true;
case 0x24c2:
return true;
case 0x25aa ... 0x25ab:
return true;
case 0x25b6:
return true;
case 0x25c0:
return true;
case 0x25fb ... 0x25fe:
return true;
case 0x2600 ... 0x2605:
return true;
case 0x2607 ... 0x2612:
return true;
case 0x2614 ... 0x2685:
return true;
case 0x2690 ... 0x2705:
return true;
case 0x2708 ... 0x2712:
return true;
case 0x2714:
return true;
case 0x2716:
return true;
case 0x271d:
return true;
case 0x2721:
return true;
case 0x2728:
return true;
case 0x2733 ... 0x2734:
return true;
case 0x2744:
return true;
case 0x2747:
return true;
case 0x274c:
return true;
case 0x274e:
return true;
case 0x2753 ... 0x2755:
return true;
case 0x2757:
return true;
case 0x2763 ... 0x2767:
return true;
case 0x2795 ... 0x2797:
return true;
case 0x27a1:
return true;
case 0x27b0:
return true;
case 0x27bf:
return true;
case 0x2934 ... 0x2935:
return true;
case 0x2b05 ... 0x2b07:
return true;
case 0x2b1b ... 0x2b1c:
return true;
case 0x2b50:
return true;
case 0x2b55:
return true;
case 0x3030:
return true;
case 0x303d:
return true;
case 0x3297:
return true;
case 0x3299:
return true;
case 0x1f000 ... 0x1f0ff:
return true;
case 0x1f10d ... 0x1f10f:
return true;
case 0x1f12f:
return true;
case 0x1f16c ... 0x1f171:
return true;
case 0x1f17e ... 0x1f17f:
return true;
case 0x1f18e:
return true;
case 0x1f191 ... 0x1f19a:
return true;
case 0x1f1ad ... 0x1f1e5:
return true;
case 0x1f201 ... 0x1f20f:
return true;
case 0x1f21a:
return true;
case 0x1f22f:
return true;
case 0x1f232 ... 0x1f23a:
return true;
case 0x1f23c ... 0x1f23f:
return true;
case 0x1f249 ... 0x1f3fa:
return true;
case 0x1f400 ... 0x1f53d:
return true;
case 0x1f546 ... 0x1f64f:
return true;
case 0x1f680 ... 0x1f6ff:
return true;
case 0x1f774 ... 0x1f77f:
return true;
case 0x1f7d5 ... 0x1f7ff:
return true;
case 0x1f80c ... 0x1f80f:
return true;
case 0x1f848 ... 0x1f84f:
return true;
case 0x1f85a ... 0x1f85f:
return true;
case 0x1f888 ... 0x1f88f:
return true;
case 0x1f8ae ... 0x1f8ff:
return true;
case 0x1f90c ... 0x1f93a:
return true;
case 0x1f93c ... 0x1f945:
return true;
case 0x1f947 ... 0x1faff:
return true;
case 0x1fc00 ... 0x1fffd:
return true;
}
}
END_ALLOW_CASE_RANGE

File diff suppressed because one or more lines are too long