diff options
author | Lars Ingebrigtsen <larsi@gnus.org> | 2022-01-17 15:47:37 +0100 |
---|---|---|
committer | Lars Ingebrigtsen <larsi@gnus.org> | 2022-01-17 15:47:50 +0100 |
commit | e2c8091113404971de75a893fb30cac591a82694 (patch) | |
tree | 850cbb3bbffb0fbe9a2e0a6403e418bf8c908fa7 | |
parent | 39d4e1ca21f3270d4835d5efa8862efc618c4cd9 (diff) | |
download | emacs-e2c8091113404971de75a893fb30cac591a82694.tar.gz |
Add support for functions that deal with Unicode scripts
* admin/unidata/Makefile.in (${unidir}/uni-scripts.el): Build
uni-scripts.el.
* admin/unidata/Scripts.txt:
* admin/unidata/ScriptExtensions.txt:
* admin/unidata/PropertyValueAliases.txt: New files from Unicode.
* admin/unidata/README: Update.
* admin/unidata/unidata-gen.el (unidata-gen-charprop): Allow
writing other data, too.
(unidata-gen-scripts, unidata-gen--read-script-aliases)
(unidata-gen--insert-file): New functions to parse the Script* files.
* lisp/international/textsec.el: Implement some functions that
work on scripts.
-rw-r--r-- | admin/unidata/Makefile.in | 10 | ||||
-rw-r--r-- | admin/unidata/PropertyValueAliases.txt | 1615 | ||||
-rw-r--r-- | admin/unidata/README | 12 | ||||
-rw-r--r-- | admin/unidata/ScriptExtensions.txt | 628 | ||||
-rw-r--r-- | admin/unidata/Scripts.txt | 2991 | ||||
-rw-r--r-- | admin/unidata/unidata-gen.el | 117 | ||||
-rw-r--r-- | etc/NEWS | 4 | ||||
-rw-r--r-- | lisp/international/textsec.el | 95 | ||||
-rw-r--r-- | test/lisp/international/textsec-tests.el | 72 |
9 files changed, 5536 insertions, 8 deletions
diff --git a/admin/unidata/Makefile.in b/admin/unidata/Makefile.in index e75010dc2b8..07b0702fd08 100644 --- a/admin/unidata/Makefile.in +++ b/admin/unidata/Makefile.in @@ -44,7 +44,8 @@ unifiles = $(addprefix ${unidir}/,$(sort $(shell sed -n 's/^[ \t][ \t]*${lparen} .PHONY: all all: ${top_srcdir}/src/macuvs.h ${unifiles} ${unidir}/charscript.el \ - ${unidir}/charprop.el ${unidir}/emoji-zwj.el ${unidir}/emoji-labels.el + ${unidir}/charprop.el ${unidir}/emoji-zwj.el ${unidir}/emoji-labels.el \ + ${unidir}/uni-scripts.el ## Specify .elc as an order-only prereq so as to not needlessly rebuild ## target just because the .elc is missing. @@ -82,6 +83,13 @@ ${unidir}/emoji-labels.el: ${unidir}/../international/emoji.el \ ${srcdir}/emoji-test.txt $(AM_V_GEN)${emacs} -l emoji.el -f emoji--generate-file $@ +${unidir}/uni-scripts.el: ${srcdir}/unidata-gen.el \ + ${srcdir}/Scripts.txt \ + ${srcdir}/ScriptExtensions.txt \ + ${srcdir}/PropertyValueAliases.txt + $(AM_V_GEN)${emacs} -L ${srcdir} \ + -l unidata-gen -f unidata-gen-scripts $@ + .PHONY: charscript.el charscript.el: ${unidir}/charscript.el diff --git a/admin/unidata/PropertyValueAliases.txt b/admin/unidata/PropertyValueAliases.txt new file mode 100644 index 00000000000..bdc13857dcb --- /dev/null +++ b/admin/unidata/PropertyValueAliases.txt @@ -0,0 +1,1615 @@ +# PropertyValueAliases-14.0.0.txt +# Date: 2021-05-10, 21:08:53 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# This file contains aliases for property values used in the UCD. +# These names can be used for XML formats of UCD data, for regular-expression +# property tests, and other programmatic textual descriptions of Unicode data. +# +# The names may be translated in appropriate environments, and additional +# aliases may be useful. +# +# FORMAT +# +# Each line describes a property value name. +# This consists of three or more fields, separated by semicolons. +# +# First Field: The first field describes the property for which that +# property value name is used. +# +# Second Field: The second field is the short name for the property value. +# It is typically an abbreviation, but in a number of cases it is simply +# a duplicate of the "long name" in the third field. +# +# Third Field: The third field is the long name for the property value, +# typically the formal name used in documentation about the property value. +# +# In the case of Canonical_Combining_Class (ccc), there are 4 fields: +# The second field is numeric, the third is the short name, and the fourth is the long name. +# +# The above are the preferred aliases. Other aliases may be listed in additional fields. +# +# Loose matching should be applied to all property names and property values, with +# the exception of String Property values. With loose matching of property names and +# values, the case distinctions, whitespace, hyphens, and '_' are ignored. +# For Numeric Property values, numeric equivalence is applied: thus "01.00" +# is equivalent to "1". +# +# NOTE: Property value names are NOT unique across properties. For example: +# +# AL means Arabic Letter for the Bidi_Class property, and +# AL means Above_Left for the Canonical_Combining_Class property, and +# AL means Alphabetic for the Line_Break property. +# +# In addition, some property names may be the same as some property value names. +# For example: +# +# sc means the Script property, and +# Sc means the General_Category property value Currency_Symbol (Sc) +# +# The combination of property value and property name is, however, unique. +# +# For more information, see UAX #44, Unicode Character Database, and +# UTS #18, Unicode Regular Expressions. +# ================================================ + + +# ASCII_Hex_Digit (AHex) + +AHex; N ; No ; F ; False +AHex; Y ; Yes ; T ; True + +# Age (age) + +age; 1.1 ; V1_1 +age; 2.0 ; V2_0 +age; 2.1 ; V2_1 +age; 3.0 ; V3_0 +age; 3.1 ; V3_1 +age; 3.2 ; V3_2 +age; 4.0 ; V4_0 +age; 4.1 ; V4_1 +age; 5.0 ; V5_0 +age; 5.1 ; V5_1 +age; 5.2 ; V5_2 +age; 6.0 ; V6_0 +age; 6.1 ; V6_1 +age; 6.2 ; V6_2 +age; 6.3 ; V6_3 +age; 7.0 ; V7_0 +age; 8.0 ; V8_0 +age; 9.0 ; V9_0 +age; 10.0 ; V10_0 +age; 11.0 ; V11_0 +age; 12.0 ; V12_0 +age; 12.1 ; V12_1 +age; 13.0 ; V13_0 +age; 14.0 ; V14_0 +age; NA ; Unassigned + +# Alphabetic (Alpha) + +Alpha; N ; No ; F ; False +Alpha; Y ; Yes ; T ; True + +# Bidi_Class (bc) + +bc ; AL ; Arabic_Letter +bc ; AN ; Arabic_Number +bc ; B ; Paragraph_Separator +bc ; BN ; Boundary_Neutral +bc ; CS ; Common_Separator +bc ; EN ; European_Number +bc ; ES ; European_Separator +bc ; ET ; European_Terminator +bc ; FSI ; First_Strong_Isolate +bc ; L ; Left_To_Right +bc ; LRE ; Left_To_Right_Embedding +bc ; LRI ; Left_To_Right_Isolate +bc ; LRO ; Left_To_Right_Override +bc ; NSM ; Nonspacing_Mark +bc ; ON ; Other_Neutral +bc ; PDF ; Pop_Directional_Format +bc ; PDI ; Pop_Directional_Isolate +bc ; R ; Right_To_Left +bc ; RLE ; Right_To_Left_Embedding +bc ; RLI ; Right_To_Left_Isolate +bc ; RLO ; Right_To_Left_Override +bc ; S ; Segment_Separator +bc ; WS ; White_Space + +# Bidi_Control (Bidi_C) + +Bidi_C; N ; No ; F ; False +Bidi_C; Y ; Yes ; T ; True + +# Bidi_Mirrored (Bidi_M) + +Bidi_M; N ; No ; F ; False +Bidi_M; Y ; Yes ; T ; True + +# Bidi_Mirroring_Glyph (bmg) + +# @missing: 0000..10FFFF; Bidi_Mirroring_Glyph; <none> + +# Bidi_Paired_Bracket (bpb) + +# @missing: 0000..10FFFF; Bidi_Paired_Bracket; <none> + +# Bidi_Paired_Bracket_Type (bpt) + +bpt; c ; Close +bpt; n ; None +bpt; o ; Open +# @missing: 0000..10FFFF; Bidi_Paired_Bracket_Type; n + +# Block (blk) + +blk; Adlam ; Adlam +blk; Aegean_Numbers ; Aegean_Numbers +blk; Ahom ; Ahom +blk; Alchemical ; Alchemical_Symbols +blk; Alphabetic_PF ; Alphabetic_Presentation_Forms +blk; Anatolian_Hieroglyphs ; Anatolian_Hieroglyphs +blk; Ancient_Greek_Music ; Ancient_Greek_Musical_Notation +blk; Ancient_Greek_Numbers ; Ancient_Greek_Numbers +blk; Ancient_Symbols ; Ancient_Symbols +blk; Arabic ; Arabic +blk; Arabic_Ext_A ; Arabic_Extended_A +blk; Arabic_Ext_B ; Arabic_Extended_B +blk; Arabic_Math ; Arabic_Mathematical_Alphabetic_Symbols +blk; Arabic_PF_A ; Arabic_Presentation_Forms_A ; Arabic_Presentation_Forms-A +blk; Arabic_PF_B ; Arabic_Presentation_Forms_B +blk; Arabic_Sup ; Arabic_Supplement +blk; Armenian ; Armenian +blk; Arrows ; Arrows +blk; ASCII ; Basic_Latin +blk; Avestan ; Avestan +blk; Balinese ; Balinese +blk; Bamum ; Bamum +blk; Bamum_Sup ; Bamum_Supplement +blk; Bassa_Vah ; Bassa_Vah +blk; Batak ; Batak +blk; Bengali ; Bengali +blk; Bhaiksuki ; Bhaiksuki +blk; Block_Elements ; Block_Elements +blk; Bopomofo ; Bopomofo +blk; Bopomofo_Ext ; Bopomofo_Extended +blk; Box_Drawing ; Box_Drawing +blk; Brahmi ; Brahmi +blk; Braille ; Braille_Patterns +blk; Buginese ; Buginese +blk; Buhid ; Buhid +blk; Byzantine_Music ; Byzantine_Musical_Symbols +blk; Carian ; Carian +blk; Caucasian_Albanian ; Caucasian_Albanian +blk; Chakma ; Chakma +blk; Cham ; Cham +blk; Cherokee ; Cherokee +blk; Cherokee_Sup ; Cherokee_Supplement +blk; Chess_Symbols ; Chess_Symbols +blk; Chorasmian ; Chorasmian +blk; CJK ; CJK_Unified_Ideographs +blk; CJK_Compat ; CJK_Compatibility +blk; CJK_Compat_Forms ; CJK_Compatibility_Forms +blk; CJK_Compat_Ideographs ; CJK_Compatibility_Ideographs +blk; CJK_Compat_Ideographs_Sup ; CJK_Compatibility_Ideographs_Supplement +blk; CJK_Ext_A ; CJK_Unified_Ideographs_Extension_A +blk; CJK_Ext_B ; CJK_Unified_Ideographs_Extension_B +blk; CJK_Ext_C ; CJK_Unified_Ideographs_Extension_C +blk; CJK_Ext_D ; CJK_Unified_Ideographs_Extension_D +blk; CJK_Ext_E ; CJK_Unified_Ideographs_Extension_E +blk; CJK_Ext_F ; CJK_Unified_Ideographs_Extension_F +blk; CJK_Ext_G ; CJK_Unified_Ideographs_Extension_G +blk; CJK_Radicals_Sup ; CJK_Radicals_Supplement +blk; CJK_Strokes ; CJK_Strokes +blk; CJK_Symbols ; CJK_Symbols_And_Punctuation +blk; Compat_Jamo ; Hangul_Compatibility_Jamo +blk; Control_Pictures ; Control_Pictures +blk; Coptic ; Coptic +blk; Coptic_Epact_Numbers ; Coptic_Epact_Numbers +blk; Counting_Rod ; Counting_Rod_Numerals +blk; Cuneiform ; Cuneiform +blk; Cuneiform_Numbers ; Cuneiform_Numbers_And_Punctuation +blk; Currency_Symbols ; Currency_Symbols +blk; Cypriot_Syllabary ; Cypriot_Syllabary +blk; Cypro_Minoan ; Cypro_Minoan +blk; Cyrillic ; Cyrillic +blk; Cyrillic_Ext_A ; Cyrillic_Extended_A +blk; Cyrillic_Ext_B ; Cyrillic_Extended_B +blk; Cyrillic_Ext_C ; Cyrillic_Extended_C +blk; Cyrillic_Sup ; Cyrillic_Supplement ; Cyrillic_Supplementary +blk; Deseret ; Deseret +blk; Devanagari ; Devanagari +blk; Devanagari_Ext ; Devanagari_Extended +blk; Diacriticals ; Combining_Diacritical_Marks +blk; Diacriticals_Ext ; Combining_Diacritical_Marks_Extended +blk; Diacriticals_For_Symbols ; Combining_Diacritical_Marks_For_Symbols; Combining_Marks_For_Symbols +blk; Diacriticals_Sup ; Combining_Diacritical_Marks_Supplement +blk; Dingbats ; Dingbats +blk; Dives_Akuru ; Dives_Akuru +blk; Dogra ; Dogra +blk; Domino ; Domino_Tiles +blk; Duployan ; Duployan +blk; Early_Dynastic_Cuneiform ; Early_Dynastic_Cuneiform +blk; Egyptian_Hieroglyph_Format_Controls; Egyptian_Hieroglyph_Format_Controls +blk; Egyptian_Hieroglyphs ; Egyptian_Hieroglyphs +blk; Elbasan ; Elbasan +blk; Elymaic ; Elymaic +blk; Emoticons ; Emoticons +blk; Enclosed_Alphanum ; Enclosed_Alphanumerics +blk; Enclosed_Alphanum_Sup ; Enclosed_Alphanumeric_Supplement +blk; Enclosed_CJK ; Enclosed_CJK_Letters_And_Months +blk; Enclosed_Ideographic_Sup ; Enclosed_Ideographic_Supplement +blk; Ethiopic ; Ethiopic +blk; Ethiopic_Ext ; Ethiopic_Extended +blk; Ethiopic_Ext_A ; Ethiopic_Extended_A +blk; Ethiopic_Ext_B ; Ethiopic_Extended_B +blk; Ethiopic_Sup ; Ethiopic_Supplement +blk; Geometric_Shapes ; Geometric_Shapes +blk; Geometric_Shapes_Ext ; Geometric_Shapes_Extended +blk; Georgian ; Georgian +blk; Georgian_Ext ; Georgian_Extended +blk; Georgian_Sup ; Georgian_Supplement +blk; Glagolitic ; Glagolitic +blk; Glagolitic_Sup ; Glagolitic_Supplement +blk; Gothic ; Gothic +blk; Grantha ; Grantha +blk; Greek ; Greek_And_Coptic +blk; Greek_Ext ; Greek_Extended +blk; Gujarati ; Gujarati +blk; Gunjala_Gondi ; Gunjala_Gondi +blk; Gurmukhi ; Gurmukhi +blk; Half_And_Full_Forms ; Halfwidth_And_Fullwidth_Forms +blk; Half_Marks ; Combining_Half_Marks +blk; Hangul ; Hangul_Syllables +blk; Hanifi_Rohingya ; Hanifi_Rohingya +blk; Hanunoo ; Hanunoo +blk; Hatran ; Hatran +blk; Hebrew ; Hebrew +blk; High_PU_Surrogates ; High_Private_Use_Surrogates +blk; High_Surrogates ; High_Surrogates +blk; Hiragana ; Hiragana +blk; IDC ; Ideographic_Description_Characters +blk; Ideographic_Symbols ; Ideographic_Symbols_And_Punctuation +blk; Imperial_Aramaic ; Imperial_Aramaic +blk; Indic_Number_Forms ; Common_Indic_Number_Forms +blk; Indic_Siyaq_Numbers ; Indic_Siyaq_Numbers +blk; Inscriptional_Pahlavi ; Inscriptional_Pahlavi +blk; Inscriptional_Parthian ; Inscriptional_Parthian +blk; IPA_Ext ; IPA_Extensions +blk; Jamo ; Hangul_Jamo +blk; Jamo_Ext_A ; Hangul_Jamo_Extended_A +blk; Jamo_Ext_B ; Hangul_Jamo_Extended_B +blk; Javanese ; Javanese +blk; Kaithi ; Kaithi +blk; Kana_Ext_A ; Kana_Extended_A +blk; Kana_Ext_B ; Kana_Extended_B +blk; Kana_Sup ; Kana_Supplement +blk; Kanbun ; Kanbun +blk; Kangxi ; Kangxi_Radicals +blk; Kannada ; Kannada +blk; Katakana ; Katakana +blk; Katakana_Ext ; Katakana_Phonetic_Extensions +blk; Kayah_Li ; Kayah_Li +blk; Kharoshthi ; Kharoshthi +blk; Khitan_Small_Script ; Khitan_Small_Script +blk; Khmer ; Khmer +blk; Khmer_Symbols ; Khmer_Symbols +blk; Khojki ; Khojki +blk; Khudawadi ; Khudawadi +blk; Lao ; Lao +blk; Latin_1_Sup ; Latin_1_Supplement ; Latin_1 +blk; Latin_Ext_A ; Latin_Extended_A +blk; Latin_Ext_Additional ; Latin_Extended_Additional +blk; Latin_Ext_B ; Latin_Extended_B +blk; Latin_Ext_C ; Latin_Extended_C +blk; Latin_Ext_D ; Latin_Extended_D +blk; Latin_Ext_E ; Latin_Extended_E +blk; Latin_Ext_F ; Latin_Extended_F +blk; Latin_Ext_G ; Latin_Extended_G +blk; Lepcha ; Lepcha +blk; Letterlike_Symbols ; Letterlike_Symbols +blk; Limbu ; Limbu +blk; Linear_A ; Linear_A +blk; Linear_B_Ideograms ; Linear_B_Ideograms +blk; Linear_B_Syllabary ; Linear_B_Syllabary +blk; Lisu ; Lisu +blk; Lisu_Sup ; Lisu_Supplement +blk; Low_Surrogates ; Low_Surrogates +blk; Lycian ; Lycian +blk; Lydian ; Lydian +blk; Mahajani ; Mahajani +blk; Mahjong ; Mahjong_Tiles +blk; Makasar ; Makasar +blk; Malayalam ; Malayalam +blk; Mandaic ; Mandaic +blk; Manichaean ; Manichaean +blk; Marchen ; Marchen +blk; Masaram_Gondi ; Masaram_Gondi +blk; Math_Alphanum ; Mathematical_Alphanumeric_Symbols +blk; Math_Operators ; Mathematical_Operators +blk; Mayan_Numerals ; Mayan_Numerals +blk; Medefaidrin ; Medefaidrin +blk; Meetei_Mayek ; Meetei_Mayek +blk; Meetei_Mayek_Ext ; Meetei_Mayek_Extensions +blk; Mende_Kikakui ; Mende_Kikakui +blk; Meroitic_Cursive ; Meroitic_Cursive +blk; Meroitic_Hieroglyphs ; Meroitic_Hieroglyphs +blk; Miao ; Miao +blk; Misc_Arrows ; Miscellaneous_Symbols_And_Arrows +blk; Misc_Math_Symbols_A ; Miscellaneous_Mathematical_Symbols_A +blk; Misc_Math_Symbols_B ; Miscellaneous_Mathematical_Symbols_B +blk; Misc_Pictographs ; Miscellaneous_Symbols_And_Pictographs +blk; Misc_Symbols ; Miscellaneous_Symbols +blk; Misc_Technical ; Miscellaneous_Technical +blk; Modi ; Modi +blk; Modifier_Letters ; Spacing_Modifier_Letters +blk; Modifier_Tone_Letters ; Modifier_Tone_Letters +blk; Mongolian ; Mongolian +blk; Mongolian_Sup ; Mongolian_Supplement +blk; Mro ; Mro +blk; Multani ; Multani +blk; Music ; Musical_Symbols +blk; Myanmar ; Myanmar +blk; Myanmar_Ext_A ; Myanmar_Extended_A +blk; Myanmar_Ext_B ; Myanmar_Extended_B +blk; Nabataean ; Nabataean +blk; Nandinagari ; Nandinagari +blk; NB ; No_Block +blk; New_Tai_Lue ; New_Tai_Lue +blk; Newa ; Newa +blk; NKo ; NKo +blk; Number_Forms ; Number_Forms +blk; Nushu ; Nushu +blk; Nyiakeng_Puachue_Hmong ; Nyiakeng_Puachue_Hmong +blk; OCR ; Optical_Character_Recognition +blk; Ogham ; Ogham +blk; Ol_Chiki ; Ol_Chiki +blk; Old_Hungarian ; Old_Hungarian +blk; Old_Italic ; Old_Italic +blk; Old_North_Arabian ; Old_North_Arabian +blk; Old_Permic ; Old_Permic +blk; Old_Persian ; Old_Persian +blk; Old_Sogdian ; Old_Sogdian +blk; Old_South_Arabian ; Old_South_Arabian +blk; Old_Turkic ; Old_Turkic +blk; Old_Uyghur ; Old_Uyghur +blk; Oriya ; Oriya +blk; Ornamental_Dingbats ; Ornamental_Dingbats +blk; Osage ; Osage +blk; Osmanya ; Osmanya +blk; Ottoman_Siyaq_Numbers ; Ottoman_Siyaq_Numbers +blk; Pahawh_Hmong ; Pahawh_Hmong +blk; Palmyrene ; Palmyrene +blk; Pau_Cin_Hau ; Pau_Cin_Hau +blk; Phags_Pa ; Phags_Pa +blk; Phaistos ; Phaistos_Disc +blk; Phoenician ; Phoenician +blk; Phonetic_Ext ; Phonetic_Extensions +blk; Phonetic_Ext_Sup ; Phonetic_Extensions_Supplement +blk; Playing_Cards ; Playing_Cards +blk; Psalter_Pahlavi ; Psalter_Pahlavi +blk; PUA ; Private_Use_Area ; Private_Use +blk; Punctuation ; General_Punctuation +blk; Rejang ; Rejang +blk; Rumi ; Rumi_Numeral_Symbols +blk; Runic ; Runic +blk; Samaritan ; Samaritan +blk; Saurashtra ; Saurashtra +blk; Sharada ; Sharada +blk; Shavian ; Shavian +blk; Shorthand_Format_Controls ; Shorthand_Format_Controls +blk; Siddham ; Siddham +blk; Sinhala ; Sinhala +blk; Sinhala_Archaic_Numbers ; Sinhala_Archaic_Numbers +blk; Small_Forms ; Small_Form_Variants +blk; Small_Kana_Ext ; Small_Kana_Extension +blk; Sogdian ; Sogdian +blk; Sora_Sompeng ; Sora_Sompeng +blk; Soyombo ; Soyombo +blk; Specials ; Specials +blk; Sundanese ; Sundanese +blk; Sundanese_Sup ; Sundanese_Supplement +blk; Sup_Arrows_A ; Supplemental_Arrows_A +blk; Sup_Arrows_B ; Supplemental_Arrows_B +blk; Sup_Arrows_C ; Supplemental_Arrows_C +blk; Sup_Math_Operators ; Supplemental_Mathematical_Operators +blk; Sup_PUA_A ; Supplementary_Private_Use_Area_A +blk; Sup_PUA_B ; Supplementary_Private_Use_Area_B +blk; Sup_Punctuation ; Supplemental_Punctuation +blk; Sup_Symbols_And_Pictographs ; Supplemental_Symbols_And_Pictographs +blk; Super_And_Sub ; Superscripts_And_Subscripts +blk; Sutton_SignWriting ; Sutton_SignWriting +blk; Syloti_Nagri ; Syloti_Nagri +blk; Symbols_And_Pictographs_Ext_A ; Symbols_And_Pictographs_Extended_A +blk; Symbols_For_Legacy_Computing ; Symbols_For_Legacy_Computing +blk; Syriac ; Syriac +blk; Syriac_Sup ; Syriac_Supplement +blk; Tagalog ; Tagalog +blk; Tagbanwa ; Tagbanwa +blk; Tags ; Tags +blk; Tai_Le ; Tai_Le +blk; Tai_Tham ; Tai_Tham +blk; Tai_Viet ; Tai_Viet +blk; Tai_Xuan_Jing ; Tai_Xuan_Jing_Symbols +blk; Takri ; Takri +blk; Tamil ; Tamil +blk; Tamil_Sup ; Tamil_Supplement +blk; Tangsa ; Tangsa +blk; Tangut ; Tangut +blk; Tangut_Components ; Tangut_Components +blk; Tangut_Sup ; Tangut_Supplement +blk; Telugu ; Telugu +blk; Thaana ; Thaana +blk; Thai ; Thai +blk; Tibetan ; Tibetan +blk; Tifinagh ; Tifinagh +blk; Tirhuta ; Tirhuta +blk; Toto ; Toto +blk; Transport_And_Map ; Transport_And_Map_Symbols +blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics +blk; UCAS_Ext ; Unified_Canadian_Aboriginal_Syllabics_Extended +blk; UCAS_Ext_A ; Unified_Canadian_Aboriginal_Syllabics_Extended_A +blk; Ugaritic ; Ugaritic +blk; Vai ; Vai +blk; Vedic_Ext ; Vedic_Extensions +blk; Vertical_Forms ; Vertical_Forms +blk; Vithkuqi ; Vithkuqi +blk; VS ; Variation_Selectors +blk; VS_Sup ; Variation_Selectors_Supplement +blk; Wancho ; Wancho +blk; Warang_Citi ; Warang_Citi +blk; Yezidi ; Yezidi +blk; Yi_Radicals ; Yi_Radicals +blk; Yi_Syllables ; Yi_Syllables +blk; Yijing ; Yijing_Hexagram_Symbols +blk; Zanabazar_Square ; Zanabazar_Square +blk; Znamenny_Music ; Znamenny_Musical_Notation + +# Canonical_Combining_Class (ccc) + +ccc; 0; NR ; Not_Reordered +ccc; 1; OV ; Overlay +ccc; 6; HANR ; Han_Reading +ccc; 7; NK ; Nukta +ccc; 8; KV ; Kana_Voicing +ccc; 9; VR ; Virama +ccc; 10; CCC10 ; CCC10 +ccc; 11; CCC11 ; CCC11 +ccc; 12; CCC12 ; CCC12 +ccc; 13; CCC13 ; CCC13 +ccc; 14; CCC14 ; CCC14 +ccc; 15; CCC15 ; CCC15 +ccc; 16; CCC16 ; CCC16 +ccc; 17; CCC17 ; CCC17 +ccc; 18; CCC18 ; CCC18 +ccc; 19; CCC19 ; CCC19 +ccc; 20; CCC20 ; CCC20 +ccc; 21; CCC21 ; CCC21 +ccc; 22; CCC22 ; CCC22 +ccc; 23; CCC23 ; CCC23 +ccc; 24; CCC24 ; CCC24 +ccc; 25; CCC25 ; CCC25 +ccc; 26; CCC26 ; CCC26 +ccc; 27; CCC27 ; CCC27 +ccc; 28; CCC28 ; CCC28 +ccc; 29; CCC29 ; CCC29 +ccc; 30; CCC30 ; CCC30 +ccc; 31; CCC31 ; CCC31 +ccc; 32; CCC32 ; CCC32 +ccc; 33; CCC33 ; CCC33 +ccc; 34; CCC34 ; CCC34 +ccc; 35; CCC35 ; CCC35 +ccc; 36; CCC36 ; CCC36 +ccc; 84; CCC84 ; CCC84 +ccc; 91; CCC91 ; CCC91 +ccc; 103; CCC103 ; CCC103 +ccc; 107; CCC107 ; CCC107 +ccc; 118; CCC118 ; CCC118 +ccc; 122; CCC122 ; CCC122 +ccc; 129; CCC129 ; CCC129 +ccc; 130; CCC130 ; CCC130 +ccc; 132; CCC132 ; CCC132 +ccc; 133; CCC133 ; CCC133 # RESERVED +ccc; 200; ATBL ; Attached_Below_Left +ccc; 202; ATB ; Attached_Below +ccc; 214; ATA ; Attached_Above +ccc; 216; ATAR ; Attached_Above_Right +ccc; 218; BL ; Below_Left +ccc; 220; B ; Below +ccc; 222; BR ; Below_Right +ccc; 224; L ; Left +ccc; 226; R ; Right +ccc; 228; AL ; Above_Left +ccc; 230; A ; Above +ccc; 232; AR ; Above_Right +ccc; 233; DB ; Double_Below +ccc; 234; DA ; Double_Above +ccc; 240; IS ; Iota_Subscript + +# Case_Folding (cf) + +# @missing: 0000..10FFFF; Case_Folding; <code point> + +# Case_Ignorable (CI) + +CI ; N ; No ; F ; False +CI ; Y ; Yes ; T ; True + +# Cased (Cased) + +Cased; N ; No ; F ; False +Cased; Y ; Yes ; T ; True + +# Changes_When_Casefolded (CWCF) + +CWCF; N ; No ; F ; False +CWCF; Y ; Yes ; T ; True + +# Changes_When_Casemapped (CWCM) + +CWCM; N ; No ; F ; False +CWCM; Y ; Yes ; T ; True + +# Changes_When_Lowercased (CWL) + +CWL; N ; No ; F ; False +CWL; Y ; Yes ; T ; True + +# Changes_When_NFKC_Casefolded (CWKCF) + +CWKCF; N ; No ; F ; False +CWKCF; Y ; Yes ; T ; True + +# Changes_When_Titlecased (CWT) + +CWT; N ; No ; F ; False +CWT; Y ; Yes ; T ; True + +# Changes_When_Uppercased (CWU) + +CWU; N ; No ; F ; False +CWU; Y ; Yes ; T ; True + +# Composition_Exclusion (CE) + +CE ; N ; No ; F ; False +CE ; Y ; Yes ; T ; True + +# Dash (Dash) + +Dash; N ; No ; F ; False +Dash; Y ; Yes ; T ; True + +# Decomposition_Mapping (dm) + +# @missing: 0000..10FFFF; Decomposition_Mapping; <code point> + +# Decomposition_Type (dt) + +dt ; Can ; Canonical ; can +dt ; Com ; Compat ; com +dt ; Enc ; Circle ; enc +dt ; Fin ; Final ; fin +dt ; Font ; Font ; font +dt ; Fra ; Fraction ; fra +dt ; Init ; Initial ; init +dt ; Iso ; Isolated ; iso +dt ; Med ; Medial ; med +dt ; Nar ; Narrow ; nar +dt ; Nb ; Nobreak ; nb +dt ; None ; None ; none +dt ; Sml ; Small ; sml +dt ; Sqr ; Square ; sqr +dt ; Sub ; Sub ; sub +dt ; Sup ; Super ; sup +dt ; Vert ; Vertical ; vert +dt ; Wide ; Wide ; wide + +# Default_Ignorable_Code_Point (DI) + +DI ; N ; No ; F ; False +DI ; Y ; Yes ; T ; True + +# Deprecated (Dep) + +Dep; N ; No ; F ; False +Dep; Y ; Yes ; T ; True + +# Diacritic (Dia) + +Dia; N ; No ; F ; False +Dia; Y ; Yes ; T ; True + +# East_Asian_Width (ea) + +ea ; A ; Ambiguous +ea ; F ; Fullwidth +ea ; H ; Halfwidth +ea ; N ; Neutral +ea ; Na ; Narrow +ea ; W ; Wide + +# Emoji (Emoji) + +Emoji; N ; No ; F ; False +Emoji; Y ; Yes ; T ; True + +# Emoji_Component (EComp) + +EComp; N ; No ; F ; False +EComp; Y ; Yes ; T ; True + +# Emoji_Modifier (EMod) + +EMod; N ; No ; F ; False +EMod; Y ; Yes ; T ; True + +# Emoji_Modifier_Base (EBase) + +EBase; N ; No ; F ; False +EBase; Y ; Yes ; T ; True + +# Emoji_Presentation (EPres) + +EPres; N ; No ; F ; False +EPres; Y ; Yes ; T ; True + +# Equivalent_Unified_Ideograph (EqUIdeo) + +# @missing: 0000..10FFFF; Equivalent_Unified_Ideograph; <none> + +# Expands_On_NFC (XO_NFC) + +XO_NFC; N ; No ; F ; False +XO_NFC; Y ; Yes ; T ; True + +# Expands_On_NFD (XO_NFD) + +XO_NFD; N ; No ; F ; False +XO_NFD; Y ; Yes ; T ; True + +# Expands_On_NFKC (XO_NFKC) + +XO_NFKC; N ; No ; F ; False +XO_NFKC; Y ; Yes ; T ; True + +# Expands_On_NFKD (XO_NFKD) + +XO_NFKD; N ; No ; F ; False +XO_NFKD; Y ; Yes ; T ; True + +# Extended_Pictographic (ExtPict) + +ExtPict; N ; No ; F ; False +ExtPict; Y ; Yes ; T ; True + +# Extender (Ext) + +Ext; N ; No ; F ; False +Ext; Y ; Yes ; T ; True + +# FC_NFKC_Closure (FC_NFKC) + +# @missing: 0000..10FFFF; FC_NFKC_Closure; <code point> + +# Full_Composition_Exclusion (Comp_Ex) + +Comp_Ex; N ; No ; F ; False +Comp_Ex; Y ; Yes ; T ; True + +# General_Category (gc) + +gc ; C ; Other # Cc | Cf | Cn | Co | Cs +gc ; Cc ; Control ; cntrl +gc ; Cf ; Format +gc ; Cn ; Unassigned +gc ; Co ; Private_Use +gc ; Cs ; Surrogate +gc ; L ; Letter # Ll | Lm | Lo | Lt | Lu +gc ; LC ; Cased_Letter # Ll | Lt | Lu +gc ; Ll ; Lowercase_Letter +gc ; Lm ; Modifier_Letter +gc ; Lo ; Other_Letter +gc ; Lt ; Titlecase_Letter +gc ; Lu ; Uppercase_Letter +gc ; M ; Mark ; Combining_Mark # Mc | Me | Mn +gc ; Mc ; Spacing_Mark +gc ; Me ; Enclosing_Mark +gc ; Mn ; Nonspacing_Mark +gc ; N ; Number # Nd | Nl | No +gc ; Nd ; Decimal_Number ; digit +gc ; Nl ; Letter_Number +gc ; No ; Other_Number +gc ; P ; Punctuation ; punct # Pc | Pd | Pe | Pf | Pi | Po | Ps +gc ; Pc ; Connector_Punctuation +gc ; Pd ; Dash_Punctuation +gc ; Pe ; Close_Punctuation +gc ; Pf ; Final_Punctuation +gc ; Pi ; Initial_Punctuation +gc ; Po ; Other_Punctuation +gc ; Ps ; Open_Punctuation +gc ; S ; Symbol # Sc | Sk | Sm | So +gc ; Sc ; Currency_Symbol +gc ; Sk ; Modifier_Symbol +gc ; Sm ; Math_Symbol +gc ; So ; Other_Symbol +gc ; Z ; Separator # Zl | Zp | Zs +gc ; Zl ; Line_Separator +gc ; Zp ; Paragraph_Separator +gc ; Zs ; Space_Separator +# @missing: 0000..10FFFF; General_Category; Unassigned + +# Grapheme_Base (Gr_Base) + +Gr_Base; N ; No ; F ; False +Gr_Base; Y ; Yes ; T ; True + +# Grapheme_Cluster_Break (GCB) + +GCB; CN ; Control +GCB; CR ; CR +GCB; EB ; E_Base +GCB; EBG ; E_Base_GAZ +GCB; EM ; E_Modifier +GCB; EX ; Extend +GCB; GAZ ; Glue_After_Zwj +GCB; L ; L +GCB; LF ; LF +GCB; LV ; LV +GCB; LVT ; LVT +GCB; PP ; Prepend +GCB; RI ; Regional_Indicator +GCB; SM ; SpacingMark +GCB; T ; T +GCB; V ; V +GCB; XX ; Other +GCB; ZWJ ; ZWJ + +# Grapheme_Extend (Gr_Ext) + +Gr_Ext; N ; No ; F ; False +Gr_Ext; Y ; Yes ; T ; True + +# Grapheme_Link (Gr_Link) + +Gr_Link; N ; No ; F ; False +Gr_Link; Y ; Yes ; T ; True + +# Hangul_Syllable_Type (hst) + +hst; L ; Leading_Jamo +hst; LV ; LV_Syllable +hst; LVT ; LVT_Syllable +hst; NA ; Not_Applicable +hst; T ; Trailing_Jamo +hst; V ; Vowel_Jamo + +# Hex_Digit (Hex) + +Hex; N ; No ; F ; False +Hex; Y ; Yes ; T ; True + +# Hyphen (Hyphen) + +Hyphen; N ; No ; F ; False +Hyphen; Y ; Yes ; T ; True + +# IDS_Binary_Operator (IDSB) + +IDSB; N ; No ; F ; False +IDSB; Y ; Yes ; T ; True + +# IDS_Trinary_Operator (IDST) + +IDST; N ; No ; F ; False +IDST; Y ; Yes ; T ; True + +# ID_Continue (IDC) + +IDC; N ; No ; F ; False +IDC; Y ; Yes ; T ; True + +# ID_Start (IDS) + +IDS; N ; No ; F ; False +IDS; Y ; Yes ; T ; True + +# ISO_Comment (isc) + +# @missing: 0000..10FFFF; ISO_Comment; <none> + +# Ideographic (Ideo) + +Ideo; N ; No ; F ; False +Ideo; Y ; Yes ; T ; True + +# Indic_Positional_Category (InPC) + +InPC; Bottom ; Bottom +InPC; Bottom_And_Left ; Bottom_And_Left +InPC; Bottom_And_Right ; Bottom_And_Right +InPC; Left ; Left +InPC; Left_And_Right ; Left_And_Right +InPC; NA ; NA +InPC; Overstruck ; Overstruck +InPC; Right ; Right +InPC; Top ; Top +InPC; Top_And_Bottom ; Top_And_Bottom +InPC; Top_And_Bottom_And_Left ; Top_And_Bottom_And_Left +InPC; Top_And_Bottom_And_Right ; Top_And_Bottom_And_Right +InPC; Top_And_Left ; Top_And_Left +InPC; Top_And_Left_And_Right ; Top_And_Left_And_Right +InPC; Top_And_Right ; Top_And_Right +InPC; Visual_Order_Left ; Visual_Order_Left + +# Indic_Syllabic_Category (InSC) + +InSC; Avagraha ; Avagraha +InSC; Bindu ; Bindu +InSC; Brahmi_Joining_Number ; Brahmi_Joining_Number +InSC; Cantillation_Mark ; Cantillation_Mark +InSC; Consonant ; Consonant +InSC; Consonant_Dead ; Consonant_Dead +InSC; Consonant_Final ; Consonant_Final +InSC; Consonant_Head_Letter ; Consonant_Head_Letter +InSC; Consonant_Initial_Postfixed ; Consonant_Initial_Postfixed +InSC; Consonant_Killer ; Consonant_Killer +InSC; Consonant_Medial ; Consonant_Medial +InSC; Consonant_Placeholder ; Consonant_Placeholder +InSC; Consonant_Preceding_Repha ; Consonant_Preceding_Repha +InSC; Consonant_Prefixed ; Consonant_Prefixed +InSC; Consonant_Subjoined ; Consonant_Subjoined +InSC; Consonant_Succeeding_Repha ; Consonant_Succeeding_Repha +InSC; Consonant_With_Stacker ; Consonant_With_Stacker +InSC; Gemination_Mark ; Gemination_Mark +InSC; Invisible_Stacker ; Invisible_Stacker +InSC; Joiner ; Joiner +InSC; Modifying_Letter ; Modifying_Letter +InSC; Non_Joiner ; Non_Joiner +InSC; Nukta ; Nukta +InSC; Number ; Number +InSC; Number_Joiner ; Number_Joiner +InSC; Other ; Other +InSC; Pure_Killer ; Pure_Killer +InSC; Register_Shifter ; Register_Shifter +InSC; Syllable_Modifier ; Syllable_Modifier +InSC; Tone_Letter ; Tone_Letter +InSC; Tone_Mark ; Tone_Mark +InSC; Virama ; Virama +InSC; Visarga ; Visarga +InSC; Vowel ; Vowel +InSC; Vowel_Dependent ; Vowel_Dependent +InSC; Vowel_Independent ; Vowel_Independent + +# Jamo_Short_Name (JSN) + +JSN; A ; A +JSN; AE ; AE +JSN; B ; B +JSN; BB ; BB +JSN; BS ; BS +JSN; C ; C +JSN; D ; D +JSN; DD ; DD +JSN; E ; E +JSN; EO ; EO +JSN; EU ; EU +JSN; G ; G +JSN; GG ; GG +JSN; GS ; GS +JSN; H ; H +JSN; I ; I +JSN; J ; J +JSN; JJ ; JJ +JSN; K ; K +JSN; L ; L +JSN; LB ; LB +JSN; LG ; LG +JSN; LH ; LH +JSN; LM ; LM +JSN; LP ; LP +JSN; LS ; LS +JSN; LT ; LT +JSN; M ; M +JSN; N ; N +JSN; NG ; NG +JSN; NH ; NH +JSN; NJ ; NJ +JSN; O ; O +JSN; OE ; OE +JSN; P ; P +JSN; R ; R +JSN; S ; S +JSN; SS ; SS +JSN; T ; T +JSN; U ; U +JSN; WA ; WA +JSN; WAE ; WAE +JSN; WE ; WE +JSN; WEO ; WEO +JSN; WI ; WI +JSN; YA ; YA +JSN; YAE ; YAE +JSN; YE ; YE +JSN; YEO ; YEO +JSN; YI ; YI +JSN; YO ; YO +JSN; YU ; YU +# @missing: 0000..10FFFF; Jamo_Short_Name; <none> + +# Join_Control (Join_C) + +Join_C; N ; No ; F ; False +Join_C; Y ; Yes ; T ; True + +# Joining_Group (jg) + +jg ; African_Feh ; African_Feh +jg ; African_Noon ; African_Noon +jg ; African_Qaf ; African_Qaf +jg ; Ain ; Ain +jg ; Alaph ; Alaph +jg ; Alef ; Alef +jg ; Beh ; Beh +jg ; Beth ; Beth +jg ; Burushaski_Yeh_Barree ; Burushaski_Yeh_Barree +jg ; Dal ; Dal +jg ; Dalath_Rish ; Dalath_Rish +jg ; E ; E +jg ; Farsi_Yeh ; Farsi_Yeh +jg ; Fe ; Fe +jg ; Feh ; Feh +jg ; Final_Semkath ; Final_Semkath +jg ; Gaf ; Gaf +jg ; Gamal ; Gamal +jg ; Hah ; Hah +jg ; Hanifi_Rohingya_Kinna_Ya ; Hanifi_Rohingya_Kinna_Ya +jg ; Hanifi_Rohingya_Pa ; Hanifi_Rohingya_Pa +jg ; He ; He +jg ; Heh ; Heh +jg ; Heh_Goal ; Heh_Goal +jg ; Heth ; Heth +jg ; Kaf ; Kaf +jg ; Kaph ; Kaph +jg ; Khaph ; Khaph +jg ; Knotted_Heh ; Knotted_Heh +jg ; Lam ; Lam +jg ; Lamadh ; Lamadh +jg ; Malayalam_Bha ; Malayalam_Bha +jg ; Malayalam_Ja ; Malayalam_Ja +jg ; Malayalam_Lla ; Malayalam_Lla +jg ; Malayalam_Llla ; Malayalam_Llla +jg ; Malayalam_Nga ; Malayalam_Nga +jg ; Malayalam_Nna ; Malayalam_Nna +jg ; Malayalam_Nnna ; Malayalam_Nnna +jg ; Malayalam_Nya ; Malayalam_Nya +jg ; Malayalam_Ra ; Malayalam_Ra +jg ; Malayalam_Ssa ; Malayalam_Ssa +jg ; Malayalam_Tta ; Malayalam_Tta +jg ; Manichaean_Aleph ; Manichaean_Aleph +jg ; Manichaean_Ayin ; Manichaean_Ayin +jg ; Manichaean_Beth ; Manichaean_Beth +jg ; Manichaean_Daleth ; Manichaean_Daleth +jg ; Manichaean_Dhamedh ; Manichaean_Dhamedh +jg ; Manichaean_Five ; Manichaean_Five +jg ; Manichaean_Gimel ; Manichaean_Gimel +jg ; Manichaean_Heth ; Manichaean_Heth +jg ; Manichaean_Hundred ; Manichaean_Hundred +jg ; Manichaean_Kaph ; Manichaean_Kaph +jg ; Manichaean_Lamedh ; Manichaean_Lamedh +jg ; Manichaean_Mem ; Manichaean_Mem +jg ; Manichaean_Nun ; Manichaean_Nun +jg ; Manichaean_One ; Manichaean_One +jg ; Manichaean_Pe ; Manichaean_Pe +jg ; Manichaean_Qoph ; Manichaean_Qoph +jg ; Manichaean_Resh ; Manichaean_Resh +jg ; Manichaean_Sadhe ; Manichaean_Sadhe +jg ; Manichaean_Samekh ; Manichaean_Samekh +jg ; Manichaean_Taw ; Manichaean_Taw +jg ; Manichaean_Ten ; Manichaean_Ten +jg ; Manichaean_Teth ; Manichaean_Teth +jg ; Manichaean_Thamedh ; Manichaean_Thamedh +jg ; Manichaean_Twenty ; Manichaean_Twenty +jg ; Manichaean_Waw ; Manichaean_Waw +jg ; Manichaean_Yodh ; Manichaean_Yodh +jg ; Manichaean_Zayin ; Manichaean_Zayin +jg ; Meem ; Meem +jg ; Mim ; Mim +jg ; No_Joining_Group ; No_Joining_Group +jg ; Noon ; Noon +jg ; Nun ; Nun +jg ; Nya ; Nya +jg ; Pe ; Pe +jg ; Qaf ; Qaf +jg ; Qaph ; Qaph +jg ; Reh ; Reh +jg ; Reversed_Pe ; Reversed_Pe +jg ; Rohingya_Yeh ; Rohingya_Yeh +jg ; Sad ; Sad +jg ; Sadhe ; Sadhe +jg ; Seen ; Seen +jg ; Semkath ; Semkath +jg ; Shin ; Shin +jg ; Straight_Waw ; Straight_Waw +jg ; Swash_Kaf ; Swash_Kaf +jg ; Syriac_Waw ; Syriac_Waw +jg ; Tah ; Tah +jg ; Taw ; Taw +jg ; Teh_Marbuta ; Teh_Marbuta +jg ; Teh_Marbuta_Goal ; Hamza_On_Heh_Goal +jg ; Teth ; Teth +jg ; Thin_Yeh ; Thin_Yeh +jg ; Vertical_Tail ; Vertical_Tail +jg ; Waw ; Waw +jg ; Yeh ; Yeh +jg ; Yeh_Barree ; Yeh_Barree +jg ; Yeh_With_Tail ; Yeh_With_Tail +jg ; Yudh ; Yudh +jg ; Yudh_He ; Yudh_He +jg ; Zain ; Zain +jg ; Zhain ; Zhain + +# Joining_Type (jt) + +jt ; C ; Join_Causing +jt ; D ; Dual_Joining +jt ; L ; Left_Joining +jt ; R ; Right_Joining +jt ; T ; Transparent +jt ; U ; Non_Joining + +# Line_Break (lb) + +lb ; AI ; Ambiguous +lb ; AL ; Alphabetic +lb ; B2 ; Break_Both +lb ; BA ; Break_After +lb ; BB ; Break_Before +lb ; BK ; Mandatory_Break +lb ; CB ; Contingent_Break +lb ; CJ ; Conditional_Japanese_Starter +lb ; CL ; Close_Punctuation +lb ; CM ; Combining_Mark +lb ; CP ; Close_Parenthesis +lb ; CR ; Carriage_Return +lb ; EB ; E_Base +lb ; EM ; E_Modifier +lb ; EX ; Exclamation +lb ; GL ; Glue +lb ; H2 ; H2 +lb ; H3 ; H3 +lb ; HL ; Hebrew_Letter +lb ; HY ; Hyphen +lb ; ID ; Ideographic +lb ; IN ; Inseparable ; Inseperable +lb ; IS ; Infix_Numeric +lb ; JL ; JL +lb ; JT ; JT +lb ; JV ; JV +lb ; LF ; Line_Feed +lb ; NL ; Next_Line +lb ; NS ; Nonstarter +lb ; NU ; Numeric +lb ; OP ; Open_Punctuation +lb ; PO ; Postfix_Numeric +lb ; PR ; Prefix_Numeric +lb ; QU ; Quotation +lb ; RI ; Regional_Indicator +lb ; SA ; Complex_Context +lb ; SG ; Surrogate +lb ; SP ; Space +lb ; SY ; Break_Symbols +lb ; WJ ; Word_Joiner +lb ; XX ; Unknown +lb ; ZW ; ZWSpace +lb ; ZWJ ; ZWJ + +# Logical_Order_Exception (LOE) + +LOE; N ; No ; F ; False +LOE; Y ; Yes ; T ; True + +# Lowercase (Lower) + +Lower; N ; No ; F ; False +Lower; Y ; Yes ; T ; True + +# Lowercase_Mapping (lc) + +# @missing: 0000..10FFFF; Lowercase_Mapping; <code point> + +# Math (Math) + +Math; N ; No ; F ; False +Math; Y ; Yes ; T ; True + +# NFC_Quick_Check (NFC_QC) + +NFC_QC; M ; Maybe +NFC_QC; N ; No +NFC_QC; Y ; Yes + +# NFD_Quick_Check (NFD_QC) + +NFD_QC; N ; No +NFD_QC; Y ; Yes + +# NFKC_Casefold (NFKC_CF) + +# @missing: 0000..10FFFF; NFKC_Casefold; <code point> + +# NFKC_Quick_Check (NFKC_QC) + +NFKC_QC; M ; Maybe +NFKC_QC; N ; No +NFKC_QC; Y ; Yes + +# NFKD_Quick_Check (NFKD_QC) + +NFKD_QC; N ; No +NFKD_QC; Y ; Yes + +# Name (na) + +# @missing: 0000..10FFFF; Name; <none> + +# Name_Alias (Name_Alias) + +# @missing: 0000..10FFFF; Name_Alias; <none> + +# Noncharacter_Code_Point (NChar) + +NChar; N ; No ; F ; False +NChar; Y ; Yes ; T ; True + +# Numeric_Type (nt) + +nt ; De ; Decimal +nt ; Di ; Digit +nt ; None ; None +nt ; Nu ; Numeric + +# Numeric_Value (nv) + +# @missing: 0000..10FFFF; Numeric_Value; NaN + +# Other_Alphabetic (OAlpha) + +OAlpha; N ; No ; F ; False +OAlpha; Y ; Yes ; T ; True + +# Other_Default_Ignorable_Code_Point (ODI) + +ODI; N ; No ; F ; False +ODI; Y ; Yes ; T ; True + +# Other_Grapheme_Extend (OGr_Ext) + +OGr_Ext; N ; No ; F ; False +OGr_Ext; Y ; Yes ; T ; True + +# Other_ID_Continue (OIDC) + +OIDC; N ; No ; F ; False +OIDC; Y ; Yes ; T ; True + +# Other_ID_Start (OIDS) + +OIDS; N ; No ; F ; False +OIDS; Y ; Yes ; T ; True + +# Other_Lowercase (OLower) + +OLower; N ; No ; F ; False +OLower; Y ; Yes ; T ; True + +# Other_Math (OMath) + +OMath; N ; No ; F ; False +OMath; Y ; Yes ; T ; True + +# Other_Uppercase (OUpper) + +OUpper; N ; No ; F ; False +OUpper; Y ; Yes ; T ; True + +# Pattern_Syntax (Pat_Syn) + +Pat_Syn; N ; No ; F ; False +Pat_Syn; Y ; Yes ; T ; True + +# Pattern_White_Space (Pat_WS) + +Pat_WS; N ; No ; F ; False +Pat_WS; Y ; Yes ; T ; True + +# Prepended_Concatenation_Mark (PCM) + +PCM; N ; No ; F ; False +PCM; Y ; Yes ; T ; True + +# Quotation_Mark (QMark) + +QMark; N ; No ; F ; False +QMark; Y ; Yes ; T ; True + +# Radical (Radical) + +Radical; N ; No ; F ; False +Radical; Y ; Yes ; T ; True + +# Regional_Indicator (RI) + +RI ; N ; No ; F ; False +RI ; Y ; Yes ; T ; True + +# Script (sc) + +sc ; Adlm ; Adlam +sc ; Aghb ; Caucasian_Albanian +sc ; Ahom ; Ahom +sc ; Arab ; Arabic +sc ; Armi ; Imperial_Aramaic +sc ; Armn ; Armenian +sc ; Avst ; Avestan +sc ; Bali ; Balinese +sc ; Bamu ; Bamum +sc ; Bass ; Bassa_Vah +sc ; Batk ; Batak +sc ; Beng ; Bengali +sc ; Bhks ; Bhaiksuki +sc ; Bopo ; Bopomofo +sc ; Brah ; Brahmi +sc ; Brai ; Braille +sc ; Bugi ; Buginese +sc ; Buhd ; Buhid +sc ; Cakm ; Chakma +sc ; Cans ; Canadian_Aboriginal +sc ; Cari ; Carian +sc ; Cham ; Cham +sc ; Cher ; Cherokee +sc ; Chrs ; Chorasmian +sc ; Copt ; Coptic ; Qaac +sc ; Cpmn ; Cypro_Minoan +sc ; Cprt ; Cypriot +sc ; Cyrl ; Cyrillic +sc ; Deva ; Devanagari +sc ; Diak ; Dives_Akuru +sc ; Dogr ; Dogra +sc ; Dsrt ; Deseret +sc ; Dupl ; Duployan +sc ; Egyp ; Egyptian_Hieroglyphs +sc ; Elba ; Elbasan +sc ; Elym ; Elymaic +sc ; Ethi ; Ethiopic +sc ; Geor ; Georgian +sc ; Glag ; Glagolitic +sc ; Gong ; Gunjala_Gondi +sc ; Gonm ; Masaram_Gondi +sc ; Goth ; Gothic +sc ; Gran ; Grantha +sc ; Grek ; Greek +sc ; Gujr ; Gujarati +sc ; Guru ; Gurmukhi +sc ; Hang ; Hangul +sc ; Hani ; Han +sc ; Hano ; Hanunoo +sc ; Hatr ; Hatran +sc ; Hebr ; Hebrew +sc ; Hira ; Hiragana +sc ; Hluw ; Anatolian_Hieroglyphs +sc ; Hmng ; Pahawh_Hmong +sc ; Hmnp ; Nyiakeng_Puachue_Hmong +sc ; Hrkt ; Katakana_Or_Hiragana +sc ; Hung ; Old_Hungarian +sc ; Ital ; Old_Italic +sc ; Java ; Javanese +sc ; Kali ; Kayah_Li +sc ; Kana ; Katakana +sc ; Khar ; Kharoshthi +sc ; Khmr ; Khmer +sc ; Khoj ; Khojki +sc ; Kits ; Khitan_Small_Script +sc ; Knda ; Kannada +sc ; Kthi ; Kaithi +sc ; Lana ; Tai_Tham +sc ; Laoo ; Lao +sc ; Latn ; Latin +sc ; Lepc ; Lepcha +sc ; Limb ; Limbu +sc ; Lina ; Linear_A +sc ; Linb ; Linear_B +sc ; Lisu ; Lisu +sc ; Lyci ; Lycian +sc ; Lydi ; Lydian +sc ; Mahj ; Mahajani +sc ; Maka ; Makasar +sc ; Mand ; Mandaic +sc ; Mani ; Manichaean +sc ; Marc ; Marchen +sc ; Medf ; Medefaidrin +sc ; Mend ; Mende_Kikakui +sc ; Merc ; Meroitic_Cursive +sc ; Mero ; Meroitic_Hieroglyphs +sc ; Mlym ; Malayalam +sc ; Modi ; Modi +sc ; Mong ; Mongolian +sc ; Mroo ; Mro +sc ; Mtei ; Meetei_Mayek +sc ; Mult ; Multani +sc ; Mymr ; Myanmar +sc ; Nand ; Nandinagari +sc ; Narb ; Old_North_Arabian +sc ; Nbat ; Nabataean +sc ; Newa ; Newa +sc ; Nkoo ; Nko +sc ; Nshu ; Nushu +sc ; Ogam ; Ogham +sc ; Olck ; Ol_Chiki +sc ; Orkh ; Old_Turkic +sc ; Orya ; Oriya +sc ; Osge ; Osage +sc ; Osma ; Osmanya +sc ; Ougr ; Old_Uyghur +sc ; Palm ; Palmyrene +sc ; Pauc ; Pau_Cin_Hau +sc ; Perm ; Old_Permic +sc ; Phag ; Phags_Pa +sc ; Phli ; Inscriptional_Pahlavi +sc ; Phlp ; Psalter_Pahlavi +sc ; Phnx ; Phoenician +sc ; Plrd ; Miao +sc ; Prti ; Inscriptional_Parthian +sc ; Rjng ; Rejang +sc ; Rohg ; Hanifi_Rohingya +sc ; Runr ; Runic +sc ; Samr ; Samaritan +sc ; Sarb ; Old_South_Arabian +sc ; Saur ; Saurashtra +sc ; Sgnw ; SignWriting +sc ; Shaw ; Shavian +sc ; Shrd ; Sharada +sc ; Sidd ; Siddham +sc ; Sind ; Khudawadi +sc ; Sinh ; Sinhala +sc ; Sogd ; Sogdian +sc ; Sogo ; Old_Sogdian +sc ; Sora ; Sora_Sompeng +sc ; Soyo ; Soyombo +sc ; Sund ; Sundanese +sc ; Sylo ; Syloti_Nagri +sc ; Syrc ; Syriac +sc ; Tagb ; Tagbanwa +sc ; Takr ; Takri +sc ; Tale ; Tai_Le +sc ; Talu ; New_Tai_Lue +sc ; Taml ; Tamil +sc ; Tang ; Tangut +sc ; Tavt ; Tai_Viet +sc ; Telu ; Telugu +sc ; Tfng ; Tifinagh +sc ; Tglg ; Tagalog +sc ; Thaa ; Thaana +sc ; Thai ; Thai +sc ; Tibt ; Tibetan +sc ; Tirh ; Tirhuta +sc ; Tnsa ; Tangsa +sc ; Toto ; Toto +sc ; Ugar ; Ugaritic +sc ; Vaii ; Vai +sc ; Vith ; Vithkuqi +sc ; Wara ; Warang_Citi +sc ; Wcho ; Wancho +sc ; Xpeo ; Old_Persian +sc ; Xsux ; Cuneiform +sc ; Yezi ; Yezidi +sc ; Yiii ; Yi +sc ; Zanb ; Zanabazar_Square +sc ; Zinh ; Inherited ; Qaai +sc ; Zyyy ; Common +sc ; Zzzz ; Unknown + +# Script_Extensions (scx) + +# @missing: 0000..10FFFF; Script_Extensions; <script> + +# Sentence_Break (SB) + +SB ; AT ; ATerm +SB ; CL ; Close +SB ; CR ; CR +SB ; EX ; Extend +SB ; FO ; Format +SB ; LE ; OLetter +SB ; LF ; LF +SB ; LO ; Lower +SB ; NU ; Numeric +SB ; SC ; SContinue +SB ; SE ; Sep +SB ; SP ; Sp +SB ; ST ; STerm +SB ; UP ; Upper +SB ; XX ; Other + +# Sentence_Terminal (STerm) + +STerm; N ; No ; F ; False +STerm; Y ; Yes ; T ; True + +# Simple_Case_Folding (scf) + +# @missing: 0000..10FFFF; Simple_Case_Folding; <code point> + +# Simple_Lowercase_Mapping (slc) + +# @missing: 0000..10FFFF; Simple_Lowercase_Mapping; <code point> + +# Simple_Titlecase_Mapping (stc) + +# @missing: 0000..10FFFF; Simple_Titlecase_Mapping; <code point> + +# Simple_Uppercase_Mapping (suc) + +# @missing: 0000..10FFFF; Simple_Uppercase_Mapping; <code point> + +# Soft_Dotted (SD) + +SD ; N ; No ; F ; False +SD ; Y ; Yes ; T ; True + +# Terminal_Punctuation (Term) + +Term; N ; No ; F ; False +Term; Y ; Yes ; T ; True + +# Titlecase_Mapping (tc) + +# @missing: 0000..10FFFF; Titlecase_Mapping; <code point> + +# Unicode_1_Name (na1) + +# @missing: 0000..10FFFF; Unicode_1_Name; <none> + +# Unified_Ideograph (UIdeo) + +UIdeo; N ; No ; F ; False +UIdeo; Y ; Yes ; T ; True + +# Uppercase (Upper) + +Upper; N ; No ; F ; False +Upper; Y ; Yes ; T ; True + +# Uppercase_Mapping (uc) + +# @missing: 0000..10FFFF; Uppercase_Mapping; <code point> + +# Variation_Selector (VS) + +VS ; N ; No ; F ; False +VS ; Y ; Yes ; T ; True + +# Vertical_Orientation (vo) + +vo ; R ; Rotated +vo ; Tr ; Transformed_Rotated +vo ; Tu ; Transformed_Upright +vo ; U ; Upright + +# White_Space (WSpace) + +WSpace; N ; No ; F ; False +WSpace; Y ; Yes ; T ; True + +# Word_Break (WB) + +WB ; CR ; CR +WB ; DQ ; Double_Quote +WB ; EB ; E_Base +WB ; EBG ; E_Base_GAZ +WB ; EM ; E_Modifier +WB ; EX ; ExtendNumLet +WB ; Extend ; Extend +WB ; FO ; Format +WB ; GAZ ; Glue_After_Zwj +WB ; HL ; Hebrew_Letter +WB ; KA ; Katakana +WB ; LE ; ALetter +WB ; LF ; LF +WB ; MB ; MidNumLet +WB ; ML ; MidLetter +WB ; MN ; MidNum +WB ; NL ; Newline +WB ; NU ; Numeric +WB ; RI ; Regional_Indicator +WB ; SQ ; Single_Quote +WB ; WSegSpace ; WSegSpace +WB ; XX ; Other +WB ; ZWJ ; ZWJ + +# XID_Continue (XIDC) + +XIDC; N ; No ; F ; False +XIDC; Y ; Yes ; T ; True + +# XID_Start (XIDS) + +XIDS; N ; No ; F ; False +XIDS; Y ; Yes ; T ; True + +# cjkAccountingNumeric (cjkAccountingNumeric) + +# @missing: 0000..10FFFF; cjkAccountingNumeric; NaN + +# cjkCompatibilityVariant (cjkCompatibilityVariant) + +# @missing: 0000..10FFFF; cjkCompatibilityVariant; <code point> + +# cjkIICore (cjkIICore) + +# @missing: 0000..10FFFF; cjkIICore; <none> + +# cjkIRG_GSource (cjkIRG_GSource) + +# @missing: 0000..10FFFF; cjkIRG_GSource; <none> + +# cjkIRG_HSource (cjkIRG_HSource) + +# @missing: 0000..10FFFF; cjkIRG_HSource; <none> + +# cjkIRG_JSource (cjkIRG_JSource) + +# @missing: 0000..10FFFF; cjkIRG_JSource; <none> + +# cjkIRG_KPSource (cjkIRG_KPSource) + +# @missing: 0000..10FFFF; cjkIRG_KPSource; <none> + +# cjkIRG_KSource (cjkIRG_KSource) + +# @missing: 0000..10FFFF; cjkIRG_KSource; <none> + +# cjkIRG_MSource (cjkIRG_MSource) + +# @missing: 0000..10FFFF; cjkIRG_MSource; <none> + +# cjkIRG_SSource (cjkIRG_SSource) + +# @missing: 0000..10FFFF; cjkIRG_SSource; <none> + +# cjkIRG_TSource (cjkIRG_TSource) + +# @missing: 0000..10FFFF; cjkIRG_TSource; <none> + +# cjkIRG_UKSource (cjkIRG_UKSource) + +# @missing: 0000..10FFFF; cjkIRG_UKSource; <none> + +# cjkIRG_USource (cjkIRG_USource) + +# @missing: 0000..10FFFF; cjkIRG_USource; <none> + +# cjkIRG_VSource (cjkIRG_VSource) + +# @missing: 0000..10FFFF; cjkIRG_VSource; <none> + +# cjkOtherNumeric (cjkOtherNumeric) + +# @missing: 0000..10FFFF; cjkOtherNumeric; NaN + +# cjkPrimaryNumeric (cjkPrimaryNumeric) + +# @missing: 0000..10FFFF; cjkPrimaryNumeric; NaN + +# cjkRSUnicode (cjkRSUnicode) + +# @missing: 0000..10FFFF; cjkRSUnicode; <none> + +# EOF diff --git a/admin/unidata/README b/admin/unidata/README index 4b8444b0fec..d7bb3feb985 100644 --- a/admin/unidata/README +++ b/admin/unidata/README @@ -48,3 +48,15 @@ https://www.unicode.org/Public/emoji/14.0/emoji-sequences.txt emoji-test.txt https://unicode.org/Public/emoji/14.0/emoji-test.txt 2021-10-28 + +ScriptExtensions.txt +https://www.unicode.org/Public/UCD/latest/ucd/ScriptExtensions.txt +2022-01-17 + +Scripts.txt +https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt +2022-01-17 + +PropertyValueAliases.txt +https://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt +2022-01-17 diff --git a/admin/unidata/ScriptExtensions.txt b/admin/unidata/ScriptExtensions.txt new file mode 100644 index 00000000000..3f5cd1c0dbb --- /dev/null +++ b/admin/unidata/ScriptExtensions.txt @@ -0,0 +1,628 @@ +# ScriptExtensions-14.0.0.txt +# Date: 2021-06-04, 02:19:38 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# +# The Script_Extensions property indicates which characters are commonly used +# with more than one script, but with a limited number of scripts. +# For each code point, there is one or more property values. Each such value is a Script property value. +# For more information, see: +# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ +# Especially the sections: +# https://www.unicode.org/reports/tr24/#Assignment_Script_Values +# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# +# Each Script_Extensions value in this file consists of a set +# of one or more abbreviated Script property values. The ordering of the +# values in that set is not material, but for stability in presentation +# it is given here as alphabetical. +# +# The Script_Extensions values are presented in sorted order in the file. +# They are sorted first by the number of Script property values in their sets, +# and then alphabetically by first differing Script property value. +# +# Following each distinct Script_Extensions value is the list of code +# points associated with that value, listed in code point order. +# +# All code points not explicitly listed for Script_Extensions +# have as their value the corresponding Script property value +# +# @missing: 0000..10FFFF; <script> + +# ================================================ + +# Property: Script_Extensions + +# ================================================ + +# Script_Extensions=Beng + +1CF7 ; Beng # Mc VEDIC SIGN ATIKRAMA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva + +1CD1 ; Deva # Mn VEDIC TONE SHARA +1CD4 ; Deva # Mn VEDIC SIGN YAJURVEDIC MIDLINE SVARITA +1CDB ; Deva # Mn VEDIC TONE TRIPLE SVARITA +1CDE..1CDF ; Deva # Mn [2] VEDIC TONE TWO DOTS BELOW..VEDIC TONE THREE DOTS BELOW +1CE2..1CE8 ; Deva # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CEB..1CEC ; Deva # Lo [2] VEDIC SIGN ANUSVARA VAMAGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF1 ; Deva # Lo [4] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ANUSVARA UBHAYATO MUKHA + +# Total code points: 18 + +# ================================================ + +# Script_Extensions=Dupl + +1BCA0..1BCA3 ; Dupl # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Grek + +0342 ; Grek # Mn COMBINING GREEK PERISPOMENI +0345 ; Grek # Mn COMBINING GREEK YPOGEGRAMMENI +1DC0..1DC1 ; Grek # Mn [2] COMBINING DOTTED GRAVE ACCENT..COMBINING DOTTED ACUTE ACCENT + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Hani + +3006 ; Hani # Lo IDEOGRAPHIC CLOSING MARK +303E..303F ; Hani # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +3190..3191 ; Hani # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Hani # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Hani # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31C0..31E3 ; Hani # So [36] CJK STROKE T..CJK STROKE Q +3220..3229 ; Hani # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; Hani # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3280..3289 ; Hani # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Hani # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32C0..32CB ; Hani # So [12] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DECEMBER +32FF ; Hani # So SQUARE ERA NAME REIWA +3358..3370 ; Hani # So [25] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR TWENTY-FOUR +337B..337F ; Hani # So [5] SQUARE ERA NAME HEISEI..SQUARE CORPORATION +33E0..33FE ; Hani # So [31] IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY ONE..IDEOGRAPHIC TELEGRAPH SYMBOL FOR DAY THIRTY-ONE +1D360..1D371 ; Hani # No [18] COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE +1F250..1F251 ; Hani # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT + +# Total code points: 238 + +# ================================================ + +# Script_Extensions=Latn + +0363..036F ; Latn # Mn [13] COMBINING LATIN SMALL LETTER A..COMBINING LATIN SMALL LETTER X + +# Total code points: 13 + +# ================================================ + +# Script_Extensions=Nand + +1CFA ; Nand # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Syrc + +1DFA ; Syrc # Mn COMBINING DOT BELOW LEFT + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Arab Copt + +102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK +102E1..102FB ; Arab Copt # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED + +# Total code points: 28 + +# ================================================ + +# Script_Extensions=Arab Rohg + +06D4 ; Arab Rohg # Po ARABIC FULL STOP + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Arab Nkoo + +FD3E ; Arab Nkoo # Pe ORNATE LEFT PARENTHESIS +FD3F ; Arab Nkoo # Ps ORNATE RIGHT PARENTHESIS + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Arab Syrc + +064B..0655 ; Arab Syrc # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW +0670 ; Arab Syrc # Mn ARABIC LETTER SUPERSCRIPT ALEF + +# Total code points: 12 + +# ================================================ + +# Script_Extensions=Arab Thaa + +FDF2 ; Arab Thaa # Lo ARABIC LIGATURE ALLAH ISOLATED FORM +FDFD ; Arab Thaa # So ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Beng Deva + +1CD5..1CD6 ; Beng Deva # Mn [2] VEDIC TONE YAJURVEDIC AGGRAVATED INDEPENDENT SVARITA..VEDIC TONE YAJURVEDIC INDEPENDENT SVARITA +1CD8 ; Beng Deva # Mn VEDIC TONE CANDRA BELOW +1CE1 ; Beng Deva # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CEA ; Beng Deva # Lo VEDIC SIGN ANUSVARA BAHIRGOMUKHA +1CED ; Beng Deva # Mn VEDIC SIGN TIRYAK +1CF5..1CF6 ; Beng Deva # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +A8F1 ; Beng Deva # Mn COMBINING DEVANAGARI SIGN AVAGRAHA + +# Total code points: 9 + +# ================================================ + +# Script_Extensions=Bopo Hani + +302A..302D ; Bopo Hani # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Bugi Java + +A9CF ; Bugi Java # Lm JAVANESE PANGRANGKEP + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Cprt Linb + +10102 ; Cprt Linb # Po AEGEAN CHECK MARK +10137..1013F ; Cprt Linb # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Cyrl Glag + +0484 ; Cyrl Glag # Mn COMBINING CYRILLIC PALATALIZATION +0487 ; Cyrl Glag # Mn COMBINING CYRILLIC POKRYTIE +2E43 ; Cyrl Glag # Po DASH WITH LEFT UPTURN +A66F ; Cyrl Glag # Mn COMBINING CYRILLIC VZMET + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Cyrl Latn + +0485..0486 ; Cyrl Latn # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Cyrl Perm + +0483 ; Cyrl Perm # Mn COMBINING CYRILLIC TITLO + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Cyrl Syrc + +1DF8 ; Cyrl Syrc # Mn COMBINING DOT ABOVE LEFT + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Gran + +1CD3 ; Deva Gran # Po VEDIC SIGN NIHSHVASA +1CF3 ; Deva Gran # Lo VEDIC SIGN ROTATED ARDHAVISARGA +1CF8..1CF9 ; Deva Gran # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Deva Nand + +1CE9 ; Deva Nand # Lo VEDIC SIGN ANUSVARA ANTARGOMUKHA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Shrd + +1CD7 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA +1CD9 ; Deva Shrd # Mn VEDIC TONE YAJURVEDIC KATHAKA INDEPENDENT SVARITA SCHROEDER +1CDC..1CDD ; Deva Shrd # Mn [2] VEDIC TONE KATHAKA ANUDATTA..VEDIC TONE DOT BELOW +1CE0 ; Deva Shrd # Mn VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA + +# Total code points: 5 + +# ================================================ + +# Script_Extensions=Deva Taml + +A8F3 ; Deva Taml # Lo DEVANAGARI SIGN CANDRABINDU VIRAMA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Geor Latn + +10FB ; Geor Latn # Po GEORGIAN PARAGRAPH SEPARATOR + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Gran Taml + +0BE6..0BEF ; Gran Taml # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Gran Taml # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3 ; Gran Taml # So TAMIL DAY SIGN +11301 ; Gran Taml # Mn GRANTHA SIGN CANDRABINDU +11303 ; Gran Taml # Mc GRANTHA SIGN VISARGA +1133B..1133C ; Gran Taml # Mn [2] COMBINING BINDU BELOW..GRANTHA SIGN NUKTA +11FD0..11FD1 ; Gran Taml # No [2] TAMIL FRACTION ONE QUARTER..TAMIL FRACTION ONE HALF-1 +11FD3 ; Gran Taml # No TAMIL FRACTION THREE QUARTERS + +# Total code points: 21 + +# ================================================ + +# Script_Extensions=Gujr Khoj + +0AE6..0AEF ; Gujr Khoj # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Guru Mult + +0A66..0A6F ; Guru Mult # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Hani Latn + +A700..A707 ; Hani Latn # Sk [8] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER CHINESE TONE YANG RU + +# Total code points: 8 + +# ================================================ + +# Script_Extensions=Hira Kana + +3031..3035 ; Hira Kana # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3099..309A ; Hira Kana # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +309B..309C ; Hira Kana # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Hira Kana # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FC ; Hira Kana # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF70 ; Hira Kana # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Hira Kana # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK + +# Total code points: 14 + +# ================================================ + +# Script_Extensions=Knda Nand + +0CE6..0CEF ; Knda Nand # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Latn Mong + +202F ; Latn Mong # Zs NARROW NO-BREAK SPACE + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Mani Ougr + +10AF2 ; Mani Ougr # Po MANICHAEAN PUNCTUATION DOUBLE DOT WITHIN DOT + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Mong Phag + +1802..1803 ; Mong Phag # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP +1805 ; Mong Phag # Po MONGOLIAN FOUR DOTS + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Arab Syrc Thaa + +061C ; Arab Syrc Thaa # Cf ARABIC LETTER MARK + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Arab Thaa Yezi + +0660..0669 ; Arab Thaa Yezi # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Beng Cakm Sylo + +09E6..09EF ; Beng Cakm Sylo # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Cakm Mymr Tale + +1040..1049 ; Cakm Mymr Tale # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Cpmn Cprt Linb + +10100..10101 ; Cpmn Cprt Linb # Po [2] AEGEAN WORD SEPARATOR LINE..AEGEAN WORD SEPARATOR DOT + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Cprt Lina Linb + +10107..10133 ; Cprt Lina Linb # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND + +# Total code points: 45 + +# ================================================ + +# Script_Extensions=Deva Gran Knda + +1CF4 ; Deva Gran Knda # Mn VEDIC TONE CANDRA ABOVE + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Gran Latn + +20F0 ; Deva Gran Latn # Mn COMBINING ASTERISK ABOVE + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Hani Hira Kana + +303C ; Hani Hira Kana # Lo MASU MARK +303D ; Hani Hira Kana # Po PART ALTERNATION MARK + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Kali Latn Mymr + +A92E ; Kali Latn Mymr # Po KAYAH LI SIGN CWI + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Knda + +1CD0 ; Beng Deva Gran Knda # Mn VEDIC TONE KARSHANA +1CD2 ; Beng Deva Gran Knda # Mn VEDIC TONE PRENKHA + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Buhd Hano Tagb Tglg + +1735..1736 ; Buhd Hano Tagb Tglg # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Deva Dogr Kthi Mahj + +0966..096F ; Deva Dogr Kthi Mahj # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Bopo Hang Hani Hira Kana + +3003 ; Bopo Hang Hani Hira Kana # Po DITTO MARK +3013 ; Bopo Hang Hani Hira Kana # So GETA MARK +301C ; Bopo Hang Hani Hira Kana # Pd WAVE DASH +301D ; Bopo Hang Hani Hira Kana # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Bopo Hang Hani Hira Kana # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3030 ; Bopo Hang Hani Hira Kana # Pd WAVY DASH +3037 ; Bopo Hang Hani Hira Kana # So IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +FE45..FE46 ; Bopo Hang Hani Hira Kana # Po [2] SESAME DOT..WHITE SESAME DOT + +# Total code points: 10 + +# ================================================ + +# Script_Extensions=Arab Nkoo Rohg Syrc Thaa Yezi + +060C ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC COMMA +061B ; Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC SEMICOLON + +# Total code points: 2 + +# ================================================ + +# Script_Extensions=Bopo Hang Hani Hira Kana Yiii + +3001..3002 ; Bopo Hang Hani Hira Kana Yiii # Po [2] IDEOGRAPHIC COMMA..IDEOGRAPHIC FULL STOP +3008 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT ANGLE BRACKET +3009 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT ANGLE BRACKET +300A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT CORNER BRACKET +300D ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT CORNER BRACKET +300E ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE CORNER BRACKET +300F ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE CORNER BRACKET +3010 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT BLACK LENTICULAR BRACKET +3014 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Bopo Hang Hani Hira Kana Yiii # Ps LEFT WHITE SQUARE BRACKET +301B ; Bopo Hang Hani Hira Kana Yiii # Pe RIGHT WHITE SQUARE BRACKET +30FB ; Bopo Hang Hani Hira Kana Yiii # Po KATAKANA MIDDLE DOT +FF61 ; Bopo Hang Hani Hira Kana Yiii # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Bopo Hang Hani Hira Kana Yiii # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Bopo Hang Hani Hira Kana Yiii # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Bopo Hang Hani Hira Kana Yiii # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT + +# Total code points: 26 + +# ================================================ + +# Script_Extensions=Deva Knda Mlym Orya Taml Telu + +1CDA ; Deva Knda Mlym Orya Taml Telu # Mn VEDIC TONE DOUBLE SVARITA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Adlm Arab Nkoo Rohg Syrc Thaa Yezi + +061F ; Adlm Arab Nkoo Rohg Syrc Thaa Yezi # Po ARABIC QUESTION MARK + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Knda Nand Orya Telu Tirh + +1CF2 ; Beng Deva Gran Knda Nand Orya Telu Tirh # Lo VEDIC SIGN ARDHAVISARGA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc + +0640 ; Adlm Arab Mand Mani Ougr Phlp Rohg Sogd Syrc # Lm ARABIC TATWEEL + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh + +A836..A837 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # Sc NORTH INDIC RUPEE MARK +A839 ; Deva Dogr Gujr Guru Khoj Kthi Mahj Modi Sind Takr Tirh # So NORTH INDIC QUANTITY MARK + +# Total code points: 4 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh + +0952 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN ANUDATTA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh + +0951 ; Beng Deva Gran Gujr Guru Knda Latn Mlym Orya Shrd Taml Telu Tirh # Mn DEVANAGARI STRESS SIGN UDATTA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh + +A833..A835 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE SIXTEENTH..NORTH INDIC FRACTION THREE SIXTEENTHS + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh + +A830..A832 ; Deva Dogr Gujr Guru Khoj Knda Kthi Mahj Mlym Modi Nand Sind Takr Tirh # No [3] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE QUARTERS + +# Total code points: 3 + +# ================================================ + +# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh + +0964 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DANDA + +# Total code points: 1 + +# ================================================ + +# Script_Extensions=Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh + +0965 ; Beng Deva Dogr Gong Gonm Gran Gujr Guru Knda Limb Mahj Mlym Nand Orya Sind Sinh Sylo Takr Taml Telu Tirh # Po DEVANAGARI DOUBLE DANDA + +# Total code points: 1 + +# EOF diff --git a/admin/unidata/Scripts.txt b/admin/unidata/Scripts.txt new file mode 100644 index 00000000000..a1383730119 --- /dev/null +++ b/admin/unidata/Scripts.txt @@ -0,0 +1,2991 @@ +# Scripts-14.0.0.txt +# Date: 2021-07-10, 00:35:31 GMT +# © 2021 Unicode®, Inc. +# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. +# For terms of use, see http://www.unicode.org/terms_of_use.html +# +# Unicode Character Database +# For documentation, see http://www.unicode.org/reports/tr44/ +# For more information, see: +# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ +# Especially the sections: +# https://www.unicode.org/reports/tr24/#Assignment_Script_Values +# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# + +# ================================================ + +# Property: Script + +# All code points not explicitly listed for Script +# have the value Unknown (Zzzz). + +# @missing: 0000..10FFFF; Unknown + +# ================================================ + +0000..001F ; Common # Cc [32] <control-0000>..<control-001F> +0020 ; Common # Zs SPACE +0021..0023 ; Common # Po [3] EXCLAMATION MARK..NUMBER SIGN +0024 ; Common # Sc DOLLAR SIGN +0025..0027 ; Common # Po [3] PERCENT SIGN..APOSTROPHE +0028 ; Common # Ps LEFT PARENTHESIS +0029 ; Common # Pe RIGHT PARENTHESIS +002A ; Common # Po ASTERISK +002B ; Common # Sm PLUS SIGN +002C ; Common # Po COMMA +002D ; Common # Pd HYPHEN-MINUS +002E..002F ; Common # Po [2] FULL STOP..SOLIDUS +0030..0039 ; Common # Nd [10] DIGIT ZERO..DIGIT NINE +003A..003B ; Common # Po [2] COLON..SEMICOLON +003C..003E ; Common # Sm [3] LESS-THAN SIGN..GREATER-THAN SIGN +003F..0040 ; Common # Po [2] QUESTION MARK..COMMERCIAL AT +005B ; Common # Ps LEFT SQUARE BRACKET +005C ; Common # Po REVERSE SOLIDUS +005D ; Common # Pe RIGHT SQUARE BRACKET +005E ; Common # Sk CIRCUMFLEX ACCENT +005F ; Common # Pc LOW LINE +0060 ; Common # Sk GRAVE ACCENT +007B ; Common # Ps LEFT CURLY BRACKET +007C ; Common # Sm VERTICAL LINE +007D ; Common # Pe RIGHT CURLY BRACKET +007E ; Common # Sm TILDE +007F..009F ; Common # Cc [33] <control-007F>..<control-009F> +00A0 ; Common # Zs NO-BREAK SPACE +00A1 ; Common # Po INVERTED EXCLAMATION MARK +00A2..00A5 ; Common # Sc [4] CENT SIGN..YEN SIGN +00A6 ; Common # So BROKEN BAR +00A7 ; Common # Po SECTION SIGN +00A8 ; Common # Sk DIAERESIS +00A9 ; Common # So COPYRIGHT SIGN +00AB ; Common # Pi LEFT-POINTING DOUBLE ANGLE QUOTATION MARK +00AC ; Common # Sm NOT SIGN +00AD ; Common # Cf SOFT HYPHEN +00AE ; Common # So REGISTERED SIGN +00AF ; Common # Sk MACRON +00B0 ; Common # So DEGREE SIGN +00B1 ; Common # Sm PLUS-MINUS SIGN +00B2..00B3 ; Common # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE +00B4 ; Common # Sk ACUTE ACCENT +00B5 ; Common # L& MICRO SIGN +00B6..00B7 ; Common # Po [2] PILCROW SIGN..MIDDLE DOT +00B8 ; Common # Sk CEDILLA +00B9 ; Common # No SUPERSCRIPT ONE +00BB ; Common # Pf RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK +00BC..00BE ; Common # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS +00BF ; Common # Po INVERTED QUESTION MARK +00D7 ; Common # Sm MULTIPLICATION SIGN +00F7 ; Common # Sm DIVISION SIGN +02B9..02C1 ; Common # Lm [9] MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP +02C2..02C5 ; Common # Sk [4] MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD +02C6..02D1 ; Common # Lm [12] MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON +02D2..02DF ; Common # Sk [14] MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER CROSS ACCENT +02E5..02E9 ; Common # Sk [5] MODIFIER LETTER EXTRA-HIGH TONE BAR..MODIFIER LETTER EXTRA-LOW TONE BAR +02EC ; Common # Lm MODIFIER LETTER VOICING +02ED ; Common # Sk MODIFIER LETTER UNASPIRATED +02EE ; Common # Lm MODIFIER LETTER DOUBLE APOSTROPHE +02EF..02FF ; Common # Sk [17] MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW +0374 ; Common # Lm GREEK NUMERAL SIGN +037E ; Common # Po GREEK QUESTION MARK +0385 ; Common # Sk GREEK DIALYTIKA TONOS +0387 ; Common # Po GREEK ANO TELEIA +0605 ; Common # Cf ARABIC NUMBER MARK ABOVE +060C ; Common # Po ARABIC COMMA +061B ; Common # Po ARABIC SEMICOLON +061F ; Common # Po ARABIC QUESTION MARK +0640 ; Common # Lm ARABIC TATWEEL +06DD ; Common # Cf ARABIC END OF AYAH +08E2 ; Common # Cf ARABIC DISPUTED END OF AYAH +0964..0965 ; Common # Po [2] DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA +0E3F ; Common # Sc THAI CURRENCY SYMBOL BAHT +0FD5..0FD8 ; Common # So [4] RIGHT-FACING SVASTI SIGN..LEFT-FACING SVASTI SIGN WITH DOTS +10FB ; Common # Po GEORGIAN PARAGRAPH SEPARATOR +16EB..16ED ; Common # Po [3] RUNIC SINGLE PUNCTUATION..RUNIC CROSS PUNCTUATION +1735..1736 ; Common # Po [2] PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION +1802..1803 ; Common # Po [2] MONGOLIAN COMMA..MONGOLIAN FULL STOP +1805 ; Common # Po MONGOLIAN FOUR DOTS +1CD3 ; Common # Po VEDIC SIGN NIHSHVASA +1CE1 ; Common # Mc VEDIC TONE ATHARVAVEDIC INDEPENDENT SVARITA +1CE9..1CEC ; Common # Lo [4] VEDIC SIGN ANUSVARA ANTARGOMUKHA..VEDIC SIGN ANUSVARA VAMAGOMUKHA WITH TAIL +1CEE..1CF3 ; Common # Lo [6] VEDIC SIGN HEXIFORM LONG ANUSVARA..VEDIC SIGN ROTATED ARDHAVISARGA +1CF5..1CF6 ; Common # Lo [2] VEDIC SIGN JIHVAMULIYA..VEDIC SIGN UPADHMANIYA +1CF7 ; Common # Mc VEDIC SIGN ATIKRAMA +1CFA ; Common # Lo VEDIC SIGN DOUBLE ANUSVARA ANTARGOMUKHA +2000..200A ; Common # Zs [11] EN QUAD..HAIR SPACE +200B ; Common # Cf ZERO WIDTH SPACE +200E..200F ; Common # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK +2010..2015 ; Common # Pd [6] HYPHEN..HORIZONTAL BAR +2016..2017 ; Common # Po [2] DOUBLE VERTICAL LINE..DOUBLE LOW LINE +2018 ; Common # Pi LEFT SINGLE QUOTATION MARK +2019 ; Common # Pf RIGHT SINGLE QUOTATION MARK +201A ; Common # Ps SINGLE LOW-9 QUOTATION MARK +201B..201C ; Common # Pi [2] SINGLE HIGH-REVERSED-9 QUOTATION MARK..LEFT DOUBLE QUOTATION MARK +201D ; Common # Pf RIGHT DOUBLE QUOTATION MARK +201E ; Common # Ps DOUBLE LOW-9 QUOTATION MARK +201F ; Common # Pi DOUBLE HIGH-REVERSED-9 QUOTATION MARK +2020..2027 ; Common # Po [8] DAGGER..HYPHENATION POINT +2028 ; Common # Zl LINE SEPARATOR +2029 ; Common # Zp PARAGRAPH SEPARATOR +202A..202E ; Common # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE +202F ; Common # Zs NARROW NO-BREAK SPACE +2030..2038 ; Common # Po [9] PER MILLE SIGN..CARET +2039 ; Common # Pi SINGLE LEFT-POINTING ANGLE QUOTATION MARK +203A ; Common # Pf SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +203B..203E ; Common # Po [4] REFERENCE MARK..OVERLINE +203F..2040 ; Common # Pc [2] UNDERTIE..CHARACTER TIE +2041..2043 ; Common # Po [3] CARET INSERTION POINT..HYPHEN BULLET +2044 ; Common # Sm FRACTION SLASH +2045 ; Common # Ps LEFT SQUARE BRACKET WITH QUILL +2046 ; Common # Pe RIGHT SQUARE BRACKET WITH QUILL +2047..2051 ; Common # Po [11] DOUBLE QUESTION MARK..TWO ASTERISKS ALIGNED VERTICALLY +2052 ; Common # Sm COMMERCIAL MINUS SIGN +2053 ; Common # Po SWUNG DASH +2054 ; Common # Pc INVERTED UNDERTIE +2055..205E ; Common # Po [10] FLOWER PUNCTUATION MARK..VERTICAL FOUR DOTS +205F ; Common # Zs MEDIUM MATHEMATICAL SPACE +2060..2064 ; Common # Cf [5] WORD JOINER..INVISIBLE PLUS +2066..206F ; Common # Cf [10] LEFT-TO-RIGHT ISOLATE..NOMINAL DIGIT SHAPES +2070 ; Common # No SUPERSCRIPT ZERO +2074..2079 ; Common # No [6] SUPERSCRIPT FOUR..SUPERSCRIPT NINE +207A..207C ; Common # Sm [3] SUPERSCRIPT PLUS SIGN..SUPERSCRIPT EQUALS SIGN +207D ; Common # Ps SUPERSCRIPT LEFT PARENTHESIS +207E ; Common # Pe SUPERSCRIPT RIGHT PARENTHESIS +2080..2089 ; Common # No [10] SUBSCRIPT ZERO..SUBSCRIPT NINE +208A..208C ; Common # Sm [3] SUBSCRIPT PLUS SIGN..SUBSCRIPT EQUALS SIGN +208D ; Common # Ps SUBSCRIPT LEFT PARENTHESIS +208E ; Common # Pe SUBSCRIPT RIGHT PARENTHESIS +20A0..20C0 ; Common # Sc [33] EURO-CURRENCY SIGN..SOM SIGN +2100..2101 ; Common # So [2] ACCOUNT OF..ADDRESSED TO THE SUBJECT +2102 ; Common # L& DOUBLE-STRUCK CAPITAL C +2103..2106 ; Common # So [4] DEGREE CELSIUS..CADA UNA +2107 ; Common # L& EULER CONSTANT +2108..2109 ; Common # So [2] SCRUPLE..DEGREE FAHRENHEIT +210A..2113 ; Common # L& [10] SCRIPT SMALL G..SCRIPT SMALL L +2114 ; Common # So L B BAR SYMBOL +2115 ; Common # L& DOUBLE-STRUCK CAPITAL N +2116..2117 ; Common # So [2] NUMERO SIGN..SOUND RECORDING COPYRIGHT +2118 ; Common # Sm SCRIPT CAPITAL P +2119..211D ; Common # L& [5] DOUBLE-STRUCK CAPITAL P..DOUBLE-STRUCK CAPITAL R +211E..2123 ; Common # So [6] PRESCRIPTION TAKE..VERSICLE +2124 ; Common # L& DOUBLE-STRUCK CAPITAL Z +2125 ; Common # So OUNCE SIGN +2127 ; Common # So INVERTED OHM SIGN +2128 ; Common # L& BLACK-LETTER CAPITAL Z +2129 ; Common # So TURNED GREEK SMALL LETTER IOTA +212C..212D ; Common # L& [2] SCRIPT CAPITAL B..BLACK-LETTER CAPITAL C +212E ; Common # So ESTIMATED SYMBOL +212F..2131 ; Common # L& [3] SCRIPT SMALL E..SCRIPT CAPITAL F +2133..2134 ; Common # L& [2] SCRIPT CAPITAL M..SCRIPT SMALL O +2135..2138 ; Common # Lo [4] ALEF SYMBOL..DALET SYMBOL +2139 ; Common # L& INFORMATION SOURCE +213A..213B ; Common # So [2] ROTATED CAPITAL Q..FACSIMILE SIGN +213C..213F ; Common # L& [4] DOUBLE-STRUCK SMALL PI..DOUBLE-STRUCK CAPITAL PI +2140..2144 ; Common # Sm [5] DOUBLE-STRUCK N-ARY SUMMATION..TURNED SANS-SERIF CAPITAL Y +2145..2149 ; Common # L& [5] DOUBLE-STRUCK ITALIC CAPITAL D..DOUBLE-STRUCK ITALIC SMALL J +214A ; Common # So PROPERTY LINE +214B ; Common # Sm TURNED AMPERSAND +214C..214D ; Common # So [2] PER SIGN..AKTIESELSKAB +214F ; Common # So SYMBOL FOR SAMARITAN SOURCE +2150..215F ; Common # No [16] VULGAR FRACTION ONE SEVENTH..FRACTION NUMERATOR ONE +2189 ; Common # No VULGAR FRACTION ZERO THIRDS +218A..218B ; Common # So [2] TURNED DIGIT TWO..TURNED DIGIT THREE +2190..2194 ; Common # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW +2195..2199 ; Common # So [5] UP DOWN ARROW..SOUTH WEST ARROW +219A..219B ; Common # Sm [2] LEFTWARDS ARROW WITH STROKE..RIGHTWARDS ARROW WITH STROKE +219C..219F ; Common # So [4] LEFTWARDS WAVE ARROW..UPWARDS TWO HEADED ARROW +21A0 ; Common # Sm RIGHTWARDS TWO HEADED ARROW +21A1..21A2 ; Common # So [2] DOWNWARDS TWO HEADED ARROW..LEFTWARDS ARROW WITH TAIL +21A3 ; Common # Sm RIGHTWARDS ARROW WITH TAIL +21A4..21A5 ; Common # So [2] LEFTWARDS ARROW FROM BAR..UPWARDS ARROW FROM BAR +21A6 ; Common # Sm RIGHTWARDS ARROW FROM BAR +21A7..21AD ; Common # So [7] DOWNWARDS ARROW FROM BAR..LEFT RIGHT WAVE ARROW +21AE ; Common # Sm LEFT RIGHT ARROW WITH STROKE +21AF..21CD ; Common # So [31] DOWNWARDS ZIGZAG ARROW..LEFTWARDS DOUBLE ARROW WITH STROKE +21CE..21CF ; Common # Sm [2] LEFT RIGHT DOUBLE ARROW WITH STROKE..RIGHTWARDS DOUBLE ARROW WITH STROKE +21D0..21D1 ; Common # So [2] LEFTWARDS DOUBLE ARROW..UPWARDS DOUBLE ARROW +21D2 ; Common # Sm RIGHTWARDS DOUBLE ARROW +21D3 ; Common # So DOWNWARDS DOUBLE ARROW +21D4 ; Common # Sm LEFT RIGHT DOUBLE ARROW +21D5..21F3 ; Common # So [31] UP DOWN DOUBLE ARROW..UP DOWN WHITE ARROW +21F4..22FF ; Common # Sm [268] RIGHT ARROW WITH SMALL CIRCLE..Z NOTATION BAG MEMBERSHIP +2300..2307 ; Common # So [8] DIAMETER SIGN..WAVY LINE +2308 ; Common # Ps LEFT CEILING +2309 ; Common # Pe RIGHT CEILING +230A ; Common # Ps LEFT FLOOR +230B ; Common # Pe RIGHT FLOOR +230C..231F ; Common # So [20] BOTTOM RIGHT CROP..BOTTOM RIGHT CORNER +2320..2321 ; Common # Sm [2] TOP HALF INTEGRAL..BOTTOM HALF INTEGRAL +2322..2328 ; Common # So [7] FROWN..KEYBOARD +2329 ; Common # Ps LEFT-POINTING ANGLE BRACKET +232A ; Common # Pe RIGHT-POINTING ANGLE BRACKET +232B..237B ; Common # So [81] ERASE TO THE LEFT..NOT CHECK MARK +237C ; Common # Sm RIGHT ANGLE WITH DOWNWARDS ZIGZAG ARROW +237D..239A ; Common # So [30] SHOULDERED OPEN BOX..CLEAR SCREEN SYMBOL +239B..23B3 ; Common # Sm [25] LEFT PARENTHESIS UPPER HOOK..SUMMATION BOTTOM +23B4..23DB ; Common # So [40] TOP SQUARE BRACKET..FUSE +23DC..23E1 ; Common # Sm [6] TOP PARENTHESIS..BOTTOM TORTOISE SHELL BRACKET +23E2..2426 ; Common # So [69] WHITE TRAPEZIUM..SYMBOL FOR SUBSTITUTE FORM TWO +2440..244A ; Common # So [11] OCR HOOK..OCR DOUBLE BACKSLASH +2460..249B ; Common # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP +249C..24E9 ; Common # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z +24EA..24FF ; Common # No [22] CIRCLED DIGIT ZERO..NEGATIVE CIRCLED DIGIT ZERO +2500..25B6 ; Common # So [183] BOX DRAWINGS LIGHT HORIZONTAL..BLACK RIGHT-POINTING TRIANGLE +25B7 ; Common # Sm WHITE RIGHT-POINTING TRIANGLE +25B8..25C0 ; Common # So [9] BLACK RIGHT-POINTING SMALL TRIANGLE..BLACK LEFT-POINTING TRIANGLE +25C1 ; Common # Sm WHITE LEFT-POINTING TRIANGLE +25C2..25F7 ; Common # So [54] BLACK LEFT-POINTING SMALL TRIANGLE..WHITE CIRCLE WITH UPPER RIGHT QUADRANT +25F8..25FF ; Common # Sm [8] UPPER LEFT TRIANGLE..LOWER RIGHT TRIANGLE +2600..266E ; Common # So [111] BLACK SUN WITH RAYS..MUSIC NATURAL SIGN +266F ; Common # Sm MUSIC SHARP SIGN +2670..2767 ; Common # So [248] WEST SYRIAC CROSS..ROTATED FLORAL HEART BULLET +2768 ; Common # Ps MEDIUM LEFT PARENTHESIS ORNAMENT +2769 ; Common # Pe MEDIUM RIGHT PARENTHESIS ORNAMENT +276A ; Common # Ps MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT +276B ; Common # Pe MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT +276C ; Common # Ps MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT +276D ; Common # Pe MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT +276E ; Common # Ps HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT +276F ; Common # Pe HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT +2770 ; Common # Ps HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT +2771 ; Common # Pe HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT +2772 ; Common # Ps LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT +2773 ; Common # Pe LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT +2774 ; Common # Ps MEDIUM LEFT CURLY BRACKET ORNAMENT +2775 ; Common # Pe MEDIUM RIGHT CURLY BRACKET ORNAMENT +2776..2793 ; Common # No [30] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN +2794..27BF ; Common # So [44] HEAVY WIDE-HEADED RIGHTWARDS ARROW..DOUBLE CURLY LOOP +27C0..27C4 ; Common # Sm [5] THREE DIMENSIONAL ANGLE..OPEN SUPERSET +27C5 ; Common # Ps LEFT S-SHAPED BAG DELIMITER +27C6 ; Common # Pe RIGHT S-SHAPED BAG DELIMITER +27C7..27E5 ; Common # Sm [31] OR WITH DOT INSIDE..WHITE SQUARE WITH RIGHTWARDS TICK +27E6 ; Common # Ps MATHEMATICAL LEFT WHITE SQUARE BRACKET +27E7 ; Common # Pe MATHEMATICAL RIGHT WHITE SQUARE BRACKET +27E8 ; Common # Ps MATHEMATICAL LEFT ANGLE BRACKET +27E9 ; Common # Pe MATHEMATICAL RIGHT ANGLE BRACKET +27EA ; Common # Ps MATHEMATICAL LEFT DOUBLE ANGLE BRACKET +27EB ; Common # Pe MATHEMATICAL RIGHT DOUBLE ANGLE BRACKET +27EC ; Common # Ps MATHEMATICAL LEFT WHITE TORTOISE SHELL BRACKET +27ED ; Common # Pe MATHEMATICAL RIGHT WHITE TORTOISE SHELL BRACKET +27EE ; Common # Ps MATHEMATICAL LEFT FLATTENED PARENTHESIS +27EF ; Common # Pe MATHEMATICAL RIGHT FLATTENED PARENTHESIS +27F0..27FF ; Common # Sm [16] UPWARDS QUADRUPLE ARROW..LONG RIGHTWARDS SQUIGGLE ARROW +2900..2982 ; Common # Sm [131] RIGHTWARDS TWO-HEADED ARROW WITH VERTICAL STROKE..Z NOTATION TYPE COLON +2983 ; Common # Ps LEFT WHITE CURLY BRACKET +2984 ; Common # Pe RIGHT WHITE CURLY BRACKET +2985 ; Common # Ps LEFT WHITE PARENTHESIS +2986 ; Common # Pe RIGHT WHITE PARENTHESIS +2987 ; Common # Ps Z NOTATION LEFT IMAGE BRACKET +2988 ; Common # Pe Z NOTATION RIGHT IMAGE BRACKET +2989 ; Common # Ps Z NOTATION LEFT BINDING BRACKET +298A ; Common # Pe Z NOTATION RIGHT BINDING BRACKET +298B ; Common # Ps LEFT SQUARE BRACKET WITH UNDERBAR +298C ; Common # Pe RIGHT SQUARE BRACKET WITH UNDERBAR +298D ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN TOP CORNER +298E ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +298F ; Common # Ps LEFT SQUARE BRACKET WITH TICK IN BOTTOM CORNER +2990 ; Common # Pe RIGHT SQUARE BRACKET WITH TICK IN TOP CORNER +2991 ; Common # Ps LEFT ANGLE BRACKET WITH DOT +2992 ; Common # Pe RIGHT ANGLE BRACKET WITH DOT +2993 ; Common # Ps LEFT ARC LESS-THAN BRACKET +2994 ; Common # Pe RIGHT ARC GREATER-THAN BRACKET +2995 ; Common # Ps DOUBLE LEFT ARC GREATER-THAN BRACKET +2996 ; Common # Pe DOUBLE RIGHT ARC LESS-THAN BRACKET +2997 ; Common # Ps LEFT BLACK TORTOISE SHELL BRACKET +2998 ; Common # Pe RIGHT BLACK TORTOISE SHELL BRACKET +2999..29D7 ; Common # Sm [63] DOTTED FENCE..BLACK HOURGLASS +29D8 ; Common # Ps LEFT WIGGLY FENCE +29D9 ; Common # Pe RIGHT WIGGLY FENCE +29DA ; Common # Ps LEFT DOUBLE WIGGLY FENCE +29DB ; Common # Pe RIGHT DOUBLE WIGGLY FENCE +29DC..29FB ; Common # Sm [32] INCOMPLETE INFINITY..TRIPLE PLUS +29FC ; Common # Ps LEFT-POINTING CURVED ANGLE BRACKET +29FD ; Common # Pe RIGHT-POINTING CURVED ANGLE BRACKET +29FE..2AFF ; Common # Sm [258] TINY..N-ARY WHITE VERTICAL BAR +2B00..2B2F ; Common # So [48] NORTH EAST WHITE ARROW..WHITE VERTICAL ELLIPSE +2B30..2B44 ; Common # Sm [21] LEFT ARROW WITH SMALL CIRCLE..RIGHTWARDS ARROW THROUGH SUPERSET +2B45..2B46 ; Common # So [2] LEFTWARDS QUADRUPLE ARROW..RIGHTWARDS QUADRUPLE ARROW +2B47..2B4C ; Common # Sm [6] REVERSE TILDE OPERATOR ABOVE RIGHTWARDS ARROW..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR +2B4D..2B73 ; Common # So [39] DOWNWARDS TRIANGLE-HEADED ZIGZAG ARROW..DOWNWARDS TRIANGLE-HEADED ARROW TO BAR +2B76..2B95 ; Common # So [32] NORTH WEST TRIANGLE-HEADED ARROW TO BAR..RIGHTWARDS BLACK ARROW +2B97..2BFF ; Common # So [105] SYMBOL FOR TYPE A ELECTRONICS..HELLSCHREIBER PAUSE SYMBOL +2E00..2E01 ; Common # Po [2] RIGHT ANGLE SUBSTITUTION MARKER..RIGHT ANGLE DOTTED SUBSTITUTION MARKER +2E02 ; Common # Pi LEFT SUBSTITUTION BRACKET +2E03 ; Common # Pf RIGHT SUBSTITUTION BRACKET +2E04 ; Common # Pi LEFT DOTTED SUBSTITUTION BRACKET +2E05 ; Common # Pf RIGHT DOTTED SUBSTITUTION BRACKET +2E06..2E08 ; Common # Po [3] RAISED INTERPOLATION MARKER..DOTTED TRANSPOSITION MARKER +2E09 ; Common # Pi LEFT TRANSPOSITION BRACKET +2E0A ; Common # Pf RIGHT TRANSPOSITION BRACKET +2E0B ; Common # Po RAISED SQUARE +2E0C ; Common # Pi LEFT RAISED OMISSION BRACKET +2E0D ; Common # Pf RIGHT RAISED OMISSION BRACKET +2E0E..2E16 ; Common # Po [9] EDITORIAL CORONIS..DOTTED RIGHT-POINTING ANGLE +2E17 ; Common # Pd DOUBLE OBLIQUE HYPHEN +2E18..2E19 ; Common # Po [2] INVERTED INTERROBANG..PALM BRANCH +2E1A ; Common # Pd HYPHEN WITH DIAERESIS +2E1B ; Common # Po TILDE WITH RING ABOVE +2E1C ; Common # Pi LEFT LOW PARAPHRASE BRACKET +2E1D ; Common # Pf RIGHT LOW PARAPHRASE BRACKET +2E1E..2E1F ; Common # Po [2] TILDE WITH DOT ABOVE..TILDE WITH DOT BELOW +2E20 ; Common # Pi LEFT VERTICAL BAR WITH QUILL +2E21 ; Common # Pf RIGHT VERTICAL BAR WITH QUILL +2E22 ; Common # Ps TOP LEFT HALF BRACKET +2E23 ; Common # Pe TOP RIGHT HALF BRACKET +2E24 ; Common # Ps BOTTOM LEFT HALF BRACKET +2E25 ; Common # Pe BOTTOM RIGHT HALF BRACKET +2E26 ; Common # Ps LEFT SIDEWAYS U BRACKET +2E27 ; Common # Pe RIGHT SIDEWAYS U BRACKET +2E28 ; Common # Ps LEFT DOUBLE PARENTHESIS +2E29 ; Common # Pe RIGHT DOUBLE PARENTHESIS +2E2A..2E2E ; Common # Po [5] TWO DOTS OVER ONE DOT PUNCTUATION..REVERSED QUESTION MARK +2E2F ; Common # Lm VERTICAL TILDE +2E30..2E39 ; Common # Po [10] RING POINT..TOP HALF SECTION SIGN +2E3A..2E3B ; Common # Pd [2] TWO-EM DASH..THREE-EM DASH +2E3C..2E3F ; Common # Po [4] STENOGRAPHIC FULL STOP..CAPITULUM +2E40 ; Common # Pd DOUBLE HYPHEN +2E41 ; Common # Po REVERSED COMMA +2E42 ; Common # Ps DOUBLE LOW-REVERSED-9 QUOTATION MARK +2E43..2E4F ; Common # Po [13] DASH WITH LEFT UPTURN..CORNISH VERSE DIVIDER +2E50..2E51 ; Common # So [2] CROSS PATTY WITH RIGHT CROSSBAR..CROSS PATTY WITH LEFT CROSSBAR +2E52..2E54 ; Common # Po [3] TIRONIAN SIGN CAPITAL ET..MEDIEVAL QUESTION MARK +2E55 ; Common # Ps LEFT SQUARE BRACKET WITH STROKE +2E56 ; Common # Pe RIGHT SQUARE BRACKET WITH STROKE +2E57 ; Common # Ps LEFT SQUARE BRACKET WITH DOUBLE STROKE +2E58 ; Common # Pe RIGHT SQUARE BRACKET WITH DOUBLE STROKE +2E59 ; Common # Ps TOP HALF LEFT PARENTHESIS +2E5A ; Common # Pe TOP HALF RIGHT PARENTHESIS +2E5B ; Common # Ps BOTTOM HALF LEFT PARENTHESIS +2E5C ; Common # Pe BOTTOM HALF RIGHT PARENTHESIS +2E5D ; Common # Pd OBLIQUE HYPHEN +2FF0..2FFB ; Common # So [12] IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID +3000 ; Common # Zs IDEOGRAPHIC SPACE +3001..3003 ; Common # Po [3] IDEOGRAPHIC COMMA..DITTO MARK +3004 ; Common # So JAPANESE INDUSTRIAL STANDARD SYMBOL +3006 ; Common # Lo IDEOGRAPHIC CLOSING MARK +3008 ; Common # Ps LEFT ANGLE BRACKET +3009 ; Common # Pe RIGHT ANGLE BRACKET +300A ; Common # Ps LEFT DOUBLE ANGLE BRACKET +300B ; Common # Pe RIGHT DOUBLE ANGLE BRACKET +300C ; Common # Ps LEFT CORNER BRACKET +300D ; Common # Pe RIGHT CORNER BRACKET +300E ; Common # Ps LEFT WHITE CORNER BRACKET +300F ; Common # Pe RIGHT WHITE CORNER BRACKET +3010 ; Common # Ps LEFT BLACK LENTICULAR BRACKET +3011 ; Common # Pe RIGHT BLACK LENTICULAR BRACKET +3012..3013 ; Common # So [2] POSTAL MARK..GETA MARK +3014 ; Common # Ps LEFT TORTOISE SHELL BRACKET +3015 ; Common # Pe RIGHT TORTOISE SHELL BRACKET +3016 ; Common # Ps LEFT WHITE LENTICULAR BRACKET +3017 ; Common # Pe RIGHT WHITE LENTICULAR BRACKET +3018 ; Common # Ps LEFT WHITE TORTOISE SHELL BRACKET +3019 ; Common # Pe RIGHT WHITE TORTOISE SHELL BRACKET +301A ; Common # Ps LEFT WHITE SQUARE BRACKET +301B ; Common # Pe RIGHT WHITE SQUARE BRACKET +301C ; Common # Pd WAVE DASH +301D ; Common # Ps REVERSED DOUBLE PRIME QUOTATION MARK +301E..301F ; Common # Pe [2] DOUBLE PRIME QUOTATION MARK..LOW DOUBLE PRIME QUOTATION MARK +3020 ; Common # So POSTAL MARK FACE +3030 ; Common # Pd WAVY DASH +3031..3035 ; Common # Lm [5] VERTICAL KANA REPEAT MARK..VERTICAL KANA REPEAT MARK LOWER HALF +3036..3037 ; Common # So [2] CIRCLED POSTAL MARK..IDEOGRAPHIC TELEGRAPH LINE FEED SEPARATOR SYMBOL +303C ; Common # Lo MASU MARK +303D ; Common # Po PART ALTERNATION MARK +303E..303F ; Common # So [2] IDEOGRAPHIC VARIATION INDICATOR..IDEOGRAPHIC HALF FILL SPACE +309B..309C ; Common # Sk [2] KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +30A0 ; Common # Pd KATAKANA-HIRAGANA DOUBLE HYPHEN +30FB ; Common # Po KATAKANA MIDDLE DOT +30FC ; Common # Lm KATAKANA-HIRAGANA PROLONGED SOUND MARK +3190..3191 ; Common # So [2] IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION REVERSE MARK +3192..3195 ; Common # No [4] IDEOGRAPHIC ANNOTATION ONE MARK..IDEOGRAPHIC ANNOTATION FOUR MARK +3196..319F ; Common # So [10] IDEOGRAPHIC ANNOTATION TOP MARK..IDEOGRAPHIC ANNOTATION MAN MARK +31C0..31E3 ; Common # So [36] CJK STROKE T..CJK STROKE Q +3220..3229 ; Common # No [10] PARENTHESIZED IDEOGRAPH ONE..PARENTHESIZED IDEOGRAPH TEN +322A..3247 ; Common # So [30] PARENTHESIZED IDEOGRAPH MOON..CIRCLED IDEOGRAPH KOTO +3248..324F ; Common # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE +3250 ; Common # So PARTNERSHIP SIGN +3251..325F ; Common # No [15] CIRCLED NUMBER TWENTY ONE..CIRCLED NUMBER THIRTY FIVE +327F ; Common # So KOREAN STANDARD SYMBOL +3280..3289 ; Common # No [10] CIRCLED IDEOGRAPH ONE..CIRCLED IDEOGRAPH TEN +328A..32B0 ; Common # So [39] CIRCLED IDEOGRAPH MOON..CIRCLED IDEOGRAPH NIGHT +32B1..32BF ; Common # No [15] CIRCLED NUMBER THIRTY SIX..CIRCLED NUMBER FIFTY +32C0..32CF ; Common # So [16] IDEOGRAPHIC TELEGRAPH SYMBOL FOR JANUARY..LIMITED LIABILITY SIGN +32FF ; Common # So SQUARE ERA NAME REIWA +3358..33FF ; Common # So [168] IDEOGRAPHIC TELEGRAPH SYMBOL FOR HOUR ZERO..SQUARE GAL +4DC0..4DFF ; Common # So [64] HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION +A700..A716 ; Common # Sk [23] MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR +A717..A71F ; Common # Lm [9] MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK +A720..A721 ; Common # Sk [2] MODIFIER LETTER STRESS AND HIGH TONE..MODIFIER LETTER STRESS AND LOW TONE +A788 ; Common # Lm MODIFIER LETTER LOW CIRCUMFLEX ACCENT +A789..A78A ; Common # Sk [2] MODIFIER LETTER COLON..MODIFIER LETTER SHORT EQUALS SIGN +A830..A835 ; Common # No [6] NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC FRACTION THREE SIXTEENTHS +A836..A837 ; Common # So [2] NORTH INDIC QUARTER MARK..NORTH INDIC PLACEHOLDER MARK +A838 ; Common # Sc NORTH INDIC RUPEE MARK +A839 ; Common # So NORTH INDIC QUANTITY MARK +A92E ; Common # Po KAYAH LI SIGN CWI +A9CF ; Common # Lm JAVANESE PANGRANGKEP +AB5B ; Common # Sk MODIFIER BREVE WITH INVERTED BREVE +AB6A..AB6B ; Common # Sk [2] MODIFIER LETTER LEFT TACK..MODIFIER LETTER RIGHT TACK +FD3E ; Common # Pe ORNATE LEFT PARENTHESIS +FD3F ; Common # Ps ORNATE RIGHT PARENTHESIS +FE10..FE16 ; Common # Po [7] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL QUESTION MARK +FE17 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE LENTICULAR BRACKET +FE18 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRAKCET +FE19 ; Common # Po PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS +FE30 ; Common # Po PRESENTATION FORM FOR VERTICAL TWO DOT LEADER +FE31..FE32 ; Common # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH +FE33..FE34 ; Common # Pc [2] PRESENTATION FORM FOR VERTICAL LOW LINE..PRESENTATION FORM FOR VERTICAL WAVY LOW LINE +FE35 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS +FE36 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT PARENTHESIS +FE37 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CURLY BRACKET +FE38 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CURLY BRACKET +FE39 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT TORTOISE SHELL BRACKET +FE3A ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT TORTOISE SHELL BRACKET +FE3B ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT BLACK LENTICULAR BRACKET +FE3C ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT BLACK LENTICULAR BRACKET +FE3D ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT DOUBLE ANGLE BRACKET +FE3E ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT DOUBLE ANGLE BRACKET +FE3F ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET +FE40 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT ANGLE BRACKET +FE41 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT CORNER BRACKET +FE42 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT CORNER BRACKET +FE43 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT WHITE CORNER BRACKET +FE44 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT WHITE CORNER BRACKET +FE45..FE46 ; Common # Po [2] SESAME DOT..WHITE SESAME DOT +FE47 ; Common # Ps PRESENTATION FORM FOR VERTICAL LEFT SQUARE BRACKET +FE48 ; Common # Pe PRESENTATION FORM FOR VERTICAL RIGHT SQUARE BRACKET +FE49..FE4C ; Common # Po [4] DASHED OVERLINE..DOUBLE WAVY OVERLINE +FE4D..FE4F ; Common # Pc [3] DASHED LOW LINE..WAVY LOW LINE +FE50..FE52 ; Common # Po [3] SMALL COMMA..SMALL FULL STOP +FE54..FE57 ; Common # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK +FE58 ; Common # Pd SMALL EM DASH +FE59 ; Common # Ps SMALL LEFT PARENTHESIS +FE5A ; Common # Pe SMALL RIGHT PARENTHESIS +FE5B ; Common # Ps SMALL LEFT CURLY BRACKET +FE5C ; Common # Pe SMALL RIGHT CURLY BRACKET +FE5D ; Common # Ps SMALL LEFT TORTOISE SHELL BRACKET +FE5E ; Common # Pe SMALL RIGHT TORTOISE SHELL BRACKET +FE5F..FE61 ; Common # Po [3] SMALL NUMBER SIGN..SMALL ASTERISK +FE62 ; Common # Sm SMALL PLUS SIGN +FE63 ; Common # Pd SMALL HYPHEN-MINUS +FE64..FE66 ; Common # Sm [3] SMALL LESS-THAN SIGN..SMALL EQUALS SIGN +FE68 ; Common # Po SMALL REVERSE SOLIDUS +FE69 ; Common # Sc SMALL DOLLAR SIGN +FE6A..FE6B ; Common # Po [2] SMALL PERCENT SIGN..SMALL COMMERCIAL AT +FEFF ; Common # Cf ZERO WIDTH NO-BREAK SPACE +FF01..FF03 ; Common # Po [3] FULLWIDTH EXCLAMATION MARK..FULLWIDTH NUMBER SIGN +FF04 ; Common # Sc FULLWIDTH DOLLAR SIGN +FF05..FF07 ; Common # Po [3] FULLWIDTH PERCENT SIGN..FULLWIDTH APOSTROPHE +FF08 ; Common # Ps FULLWIDTH LEFT PARENTHESIS +FF09 ; Common # Pe FULLWIDTH RIGHT PARENTHESIS +FF0A ; Common # Po FULLWIDTH ASTERISK +FF0B ; Common # Sm FULLWIDTH PLUS SIGN +FF0C ; Common # Po FULLWIDTH COMMA +FF0D ; Common # Pd FULLWIDTH HYPHEN-MINUS +FF0E..FF0F ; Common # Po [2] FULLWIDTH FULL STOP..FULLWIDTH SOLIDUS +FF10..FF19 ; Common # Nd [10] FULLWIDTH DIGIT ZERO..FULLWIDTH DIGIT NINE +FF1A..FF1B ; Common # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON +FF1C..FF1E ; Common # Sm [3] FULLWIDTH LESS-THAN SIGN..FULLWIDTH GREATER-THAN SIGN +FF1F..FF20 ; Common # Po [2] FULLWIDTH QUESTION MARK..FULLWIDTH COMMERCIAL AT +FF3B ; Common # Ps FULLWIDTH LEFT SQUARE BRACKET +FF3C ; Common # Po FULLWIDTH REVERSE SOLIDUS +FF3D ; Common # Pe FULLWIDTH RIGHT SQUARE BRACKET +FF3E ; Common # Sk FULLWIDTH CIRCUMFLEX ACCENT +FF3F ; Common # Pc FULLWIDTH LOW LINE +FF40 ; Common # Sk FULLWIDTH GRAVE ACCENT +FF5B ; Common # Ps FULLWIDTH LEFT CURLY BRACKET +FF5C ; Common # Sm FULLWIDTH VERTICAL LINE +FF5D ; Common # Pe FULLWIDTH RIGHT CURLY BRACKET +FF5E ; Common # Sm FULLWIDTH TILDE +FF5F ; Common # Ps FULLWIDTH LEFT WHITE PARENTHESIS +FF60 ; Common # Pe FULLWIDTH RIGHT WHITE PARENTHESIS +FF61 ; Common # Po HALFWIDTH IDEOGRAPHIC FULL STOP +FF62 ; Common # Ps HALFWIDTH LEFT CORNER BRACKET +FF63 ; Common # Pe HALFWIDTH RIGHT CORNER BRACKET +FF64..FF65 ; Common # Po [2] HALFWIDTH IDEOGRAPHIC COMMA..HALFWIDTH KATAKANA MIDDLE DOT +FF70 ; Common # Lm HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK +FF9E..FF9F ; Common # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK +FFE0..FFE1 ; Common # Sc [2] FULLWIDTH CENT SIGN..FULLWIDTH POUND SIGN +FFE2 ; Common # Sm FULLWIDTH NOT SIGN +FFE3 ; Common # Sk FULLWIDTH MACRON +FFE4 ; Common # So FULLWIDTH BROKEN BAR +FFE5..FFE6 ; Common # Sc [2] FULLWIDTH YEN SIGN..FULLWIDTH WON SIGN +FFE8 ; Common # So HALFWIDTH FORMS LIGHT VERTICAL +FFE9..FFEC ; Common # Sm [4] HALFWIDTH LEFTWARDS ARROW..HALFWIDTH DOWNWARDS ARROW +FFED..FFEE ; Common # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE +FFF9..FFFB ; Common # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR +FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER +10100..10102 ; Common # Po [3] AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK +10107..10133 ; Common # No [45] AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND +10137..1013F ; Common # So [9] AEGEAN WEIGHT BASE UNIT..AEGEAN MEASURE THIRD SUBUNIT +10190..1019C ; Common # So [13] ROMAN SEXTANS SIGN..ASCIA SYMBOL +101D0..101FC ; Common # So [45] PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND +102E1..102FB ; Common # No [27] COPTIC EPACT DIGIT ONE..COPTIC EPACT NUMBER NINE HUNDRED +1BCA0..1BCA3 ; Common # Cf [4] SHORTHAND FORMAT LETTER OVERLAP..SHORTHAND FORMAT UP STEP +1CF50..1CFC3 ; Common # So [116] ZNAMENNY NEUME KRYUK..ZNAMENNY NEUME PAUK +1D000..1D0F5 ; Common # So [246] BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO +1D100..1D126 ; Common # So [39] MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2 +1D129..1D164 ; Common # So [60] MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE +1D165..1D166 ; Common # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM +1D16A..1D16C ; Common # So [3] MUSICAL SYMBOL FINGERED TREMOLO-1..MUSICAL SYMBOL FINGERED TREMOLO-3 +1D16D..1D172 ; Common # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D173..1D17A ; Common # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE +1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN +1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH +1D1AE..1D1EA ; Common # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D2E0..1D2F3 ; Common # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN +1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING +1D360..1D378 ; Common # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE +1D400..1D454 ; Common # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G +1D456..1D49C ; Common # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A +1D49E..1D49F ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D +1D4A2 ; Common # L& MATHEMATICAL SCRIPT CAPITAL G +1D4A5..1D4A6 ; Common # L& [2] MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K +1D4A9..1D4AC ; Common # L& [4] MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q +1D4AE..1D4B9 ; Common # L& [12] MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D +1D4BB ; Common # L& MATHEMATICAL SCRIPT SMALL F +1D4BD..1D4C3 ; Common # L& [7] MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N +1D4C5..1D505 ; Common # L& [65] MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B +1D507..1D50A ; Common # L& [4] MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G +1D50D..1D514 ; Common # L& [8] MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q +1D516..1D51C ; Common # L& [7] MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y +1D51E..1D539 ; Common # L& [28] MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B +1D53B..1D53E ; Common # L& [4] MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G +1D540..1D544 ; Common # L& [5] MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M +1D546 ; Common # L& MATHEMATICAL DOUBLE-STRUCK CAPITAL O +1D54A..1D550 ; Common # L& [7] MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y +1D552..1D6A5 ; Common # L& [340] MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J +1D6A8..1D6C0 ; Common # L& [25] MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD CAPITAL OMEGA +1D6C1 ; Common # Sm MATHEMATICAL BOLD NABLA +1D6C2..1D6DA ; Common # L& [25] MATHEMATICAL BOLD SMALL ALPHA..MATHEMATICAL BOLD SMALL OMEGA +1D6DB ; Common # Sm MATHEMATICAL BOLD PARTIAL DIFFERENTIAL +1D6DC..1D6FA ; Common # L& [31] MATHEMATICAL BOLD EPSILON SYMBOL..MATHEMATICAL ITALIC CAPITAL OMEGA +1D6FB ; Common # Sm MATHEMATICAL ITALIC NABLA +1D6FC..1D714 ; Common # L& [25] MATHEMATICAL ITALIC SMALL ALPHA..MATHEMATICAL ITALIC SMALL OMEGA +1D715 ; Common # Sm MATHEMATICAL ITALIC PARTIAL DIFFERENTIAL +1D716..1D734 ; Common # L& [31] MATHEMATICAL ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD ITALIC CAPITAL OMEGA +1D735 ; Common # Sm MATHEMATICAL BOLD ITALIC NABLA +1D736..1D74E ; Common # L& [25] MATHEMATICAL BOLD ITALIC SMALL ALPHA..MATHEMATICAL BOLD ITALIC SMALL OMEGA +1D74F ; Common # Sm MATHEMATICAL BOLD ITALIC PARTIAL DIFFERENTIAL +1D750..1D76E ; Common # L& [31] MATHEMATICAL BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD CAPITAL OMEGA +1D76F ; Common # Sm MATHEMATICAL SANS-SERIF BOLD NABLA +1D770..1D788 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD SMALL OMEGA +1D789 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD PARTIAL DIFFERENTIAL +1D78A..1D7A8 ; Common # L& [31] MATHEMATICAL SANS-SERIF BOLD EPSILON SYMBOL..MATHEMATICAL SANS-SERIF BOLD ITALIC CAPITAL OMEGA +1D7A9 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC NABLA +1D7AA..1D7C2 ; Common # L& [25] MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL ALPHA..MATHEMATICAL SANS-SERIF BOLD ITALIC SMALL OMEGA +1D7C3 ; Common # Sm MATHEMATICAL SANS-SERIF BOLD ITALIC PARTIAL DIFFERENTIAL +1D7C4..1D7CB ; Common # L& [8] MATHEMATICAL SANS-SERIF BOLD ITALIC EPSILON SYMBOL..MATHEMATICAL BOLD SMALL DIGAMMA +1D7CE..1D7FF ; Common # Nd [50] MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE +1EC71..1ECAB ; Common # No [59] INDIC SIYAQ NUMBER ONE..INDIC SIYAQ NUMBER PREFIXED NINE +1ECAC ; Common # So INDIC SIYAQ PLACEHOLDER +1ECAD..1ECAF ; Common # No [3] INDIC SIYAQ FRACTION ONE QUARTER..INDIC SIYAQ FRACTION THREE QUARTERS +1ECB0 ; Common # Sc INDIC SIYAQ RUPEE MARK +1ECB1..1ECB4 ; Common # No [4] INDIC SIYAQ NUMBER ALTERNATE ONE..INDIC SIYAQ ALTERNATE LAKH MARK +1ED01..1ED2D ; Common # No [45] OTTOMAN SIYAQ NUMBER ONE..OTTOMAN SIYAQ NUMBER NINETY THOUSAND +1ED2E ; Common # So OTTOMAN SIYAQ MARRATAN +1ED2F..1ED3D ; Common # No [15] OTTOMAN SIYAQ ALTERNATE NUMBER TWO..OTTOMAN SIYAQ FRACTION ONE SIXTH +1F000..1F02B ; Common # So [44] MAHJONG TILE EAST WIND..MAHJONG TILE BACK +1F030..1F093 ; Common # So [100] DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06 +1F0A0..1F0AE ; Common # So [15] PLAYING CARD BACK..PLAYING CARD KING OF SPADES +1F0B1..1F0BF ; Common # So [15] PLAYING CARD ACE OF HEARTS..PLAYING CARD RED JOKER +1F0C1..1F0CF ; Common # So [15] PLAYING CARD ACE OF DIAMONDS..PLAYING CARD BLACK JOKER +1F0D1..1F0F5 ; Common # So [37] PLAYING CARD ACE OF CLUBS..PLAYING CARD TRUMP-21 +1F100..1F10C ; Common # No [13] DIGIT ZERO FULL STOP..DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ZERO +1F10D..1F1AD ; Common # So [161] CIRCLED ZERO WITH SLASH..MASK WORK SYMBOL +1F1E6..1F1FF ; Common # So [26] REGIONAL INDICATOR SYMBOL LETTER A..REGIONAL INDICATOR SYMBOL LETTER Z +1F201..1F202 ; Common # So [2] SQUARED KATAKANA KOKO..SQUARED KATAKANA SA +1F210..1F23B ; Common # So [44] SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-914D +1F240..1F248 ; Common # So [9] TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557 +1F250..1F251 ; Common # So [2] CIRCLED IDEOGRAPH ADVANTAGE..CIRCLED IDEOGRAPH ACCEPT +1F260..1F265 ; Common # So [6] ROUNDED SYMBOL FOR FU..ROUNDED SYMBOL FOR CAI +1F300..1F3FA ; Common # So [251] CYCLONE..AMPHORA +1F3FB..1F3FF ; Common # Sk [5] EMOJI MODIFIER FITZPATRICK TYPE-1-2..EMOJI MODIFIER FITZPATRICK TYPE-6 +1F400..1F6D7 ; Common # So [728] RAT..ELEVATOR +1F6DD..1F6EC ; Common # So [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING +1F6F0..1F6FC ; Common # So [13] SATELLITE..ROLLER SKATE +1F700..1F773 ; Common # So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE +1F780..1F7D8 ; Common # So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE +1F7E0..1F7EB ; Common # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE +1F7F0 ; Common # So HEAVY EQUALS SIGN +1F800..1F80B ; Common # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD +1F810..1F847 ; Common # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW +1F850..1F859 ; Common # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW +1F860..1F887 ; Common # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW +1F890..1F8AD ; Common # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS +1F8B0..1F8B1 ; Common # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST +1F900..1FA53 ; Common # So [340] CIRCLED CROSS FORMEE WITH FOUR DOTS..BLACK CHESS KNIGHT-BISHOP +1FA60..1FA6D ; Common # So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER +1FA70..1FA74 ; Common # So [5] BALLET SHOES..THONG SANDAL +1FA78..1FA7C ; Common # So [5] DROP OF BLOOD..CRUTCH +1FA80..1FA86 ; Common # So [7] YO-YO..NESTING DOLLS +1FA90..1FAAC ; Common # So [29] RINGED PLANET..HAMSA +1FAB0..1FABA ; Common # So [11] FLY..NEST WITH EGGS +1FAC0..1FAC5 ; Common # So [6] ANATOMICAL HEART..PERSON WITH CROWN +1FAD0..1FAD9 ; Common # So [10] BLUEBERRIES..JAR +1FAE0..1FAE7 ; Common # So [8] MELTING FACE..BUBBLES +1FAF0..1FAF6 ; Common # So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS +1FB00..1FB92 ; Common # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK +1FB94..1FBCA ; Common # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON +1FBF0..1FBF9 ; Common # Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE +E0001 ; Common # Cf LANGUAGE TAG +E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG + +# Total code points: 8252 + +# ================================================ + +0041..005A ; Latin # L& [26] LATIN CAPITAL LETTER A..LATIN CAPITAL LETTER Z +0061..007A ; Latin # L& [26] LATIN SMALL LETTER A..LATIN SMALL LETTER Z +00AA ; Latin # Lo FEMININE ORDINAL INDICATOR +00BA ; Latin # Lo MASCULINE ORDINAL INDICATOR +00C0..00D6 ; Latin # L& [23] LATIN CAPITAL LETTER A WITH GRAVE..LATIN CAPITAL LETTER O WITH DIAERESIS +00D8..00F6 ; Latin # L& [31] LATIN CAPITAL LETTER O WITH STROKE..LATIN SMALL LETTER O WITH DIAERESIS +00F8..01BA ; Latin # L& [195] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER EZH WITH TAIL +01BB ; Latin # Lo LATIN LETTER TWO WITH STROKE +01BC..01BF ; Latin # L& [4] LATIN CAPITAL LETTER TONE FIVE..LATIN LETTER WYNN +01C0..01C3 ; Latin # Lo [4] LATIN LETTER DENTAL CLICK..LATIN LETTER RETROFLEX CLICK +01C4..0293 ; Latin # L& [208] LATIN CAPITAL LETTER DZ WITH CARON..LATIN SMALL LETTER EZH WITH CURL +0294 ; Latin # Lo LATIN LETTER GLOTTAL STOP +0295..02AF ; Latin # L& [27] LATIN LETTER PHARYNGEAL VOICED FRICATIVE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL +02B0..02B8 ; Latin # Lm [9] MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y +02E0..02E4 ; Latin # Lm [5] MODIFIER LETTER SMALL GAMMA..MODIFIER LETTER SMALL REVERSED GLOTTAL STOP +1D00..1D25 ; Latin # L& [38] LATIN LETTER SMALL CAPITAL A..LATIN LETTER AIN +1D2C..1D5C ; Latin # Lm [49] MODIFIER LETTER CAPITAL A..MODIFIER LETTER SMALL AIN +1D62..1D65 ; Latin # Lm [4] LATIN SUBSCRIPT SMALL LETTER I..LATIN SUBSCRIPT SMALL LETTER V +1D6B..1D77 ; Latin # L& [13] LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G +1D79..1D9A ; Latin # L& [34] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK +1D9B..1DBE ; Latin # Lm [36] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL EZH +1E00..1EFF ; Latin # L& [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP +2071 ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER I +207F ; Latin # Lm SUPERSCRIPT LATIN SMALL LETTER N +2090..209C ; Latin # Lm [13] LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER T +212A..212B ; Latin # L& [2] KELVIN SIGN..ANGSTROM SIGN +2132 ; Latin # L& TURNED CAPITAL F +214E ; Latin # L& TURNED SMALL F +2160..2182 ; Latin # Nl [35] ROMAN NUMERAL ONE..ROMAN NUMERAL TEN THOUSAND +2183..2184 ; Latin # L& [2] ROMAN NUMERAL REVERSED ONE HUNDRED..LATIN SMALL LETTER REVERSED C +2185..2188 ; Latin # Nl [4] ROMAN NUMERAL SIX LATE FORM..ROMAN NUMERAL ONE HUNDRED THOUSAND +2C60..2C7B ; Latin # L& [28] LATIN CAPITAL LETTER L WITH DOUBLE BAR..LATIN LETTER SMALL CAPITAL TURNED E +2C7C..2C7D ; Latin # Lm [2] LATIN SUBSCRIPT SMALL LETTER J..MODIFIER LETTER CAPITAL V +2C7E..2C7F ; Latin # L& [2] LATIN CAPITAL LETTER S WITH SWASH TAIL..LATIN CAPITAL LETTER Z WITH SWASH TAIL +A722..A76F ; Latin # L& [78] LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF..LATIN SMALL LETTER CON +A770 ; Latin # Lm MODIFIER LETTER US +A771..A787 ; Latin # L& [23] LATIN SMALL LETTER DUM..LATIN SMALL LETTER INSULAR T +A78B..A78E ; Latin # L& [4] LATIN CAPITAL LETTER SALTILLO..LATIN SMALL LETTER L WITH RETROFLEX HOOK AND BELT +A78F ; Latin # Lo LATIN LETTER SINOLOGICAL DOT +A790..A7CA ; Latin # L& [59] LATIN CAPITAL LETTER N WITH DESCENDER..LATIN SMALL LETTER S WITH SHORT STROKE OVERLAY +A7D0..A7D1 ; Latin # L& [2] LATIN CAPITAL LETTER CLOSED INSULAR G..LATIN SMALL LETTER CLOSED INSULAR G +A7D3 ; Latin # L& LATIN SMALL LETTER DOUBLE THORN +A7D5..A7D9 ; Latin # L& [5] LATIN SMALL LETTER DOUBLE WYNN..LATIN SMALL LETTER SIGMOID S +A7F2..A7F4 ; Latin # Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q +A7F5..A7F6 ; Latin # L& [2] LATIN CAPITAL LETTER REVERSED HALF H..LATIN SMALL LETTER REVERSED HALF H +A7F7 ; Latin # Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I +A7F8..A7F9 ; Latin # Lm [2] MODIFIER LETTER CAPITAL H WITH STROKE..MODIFIER LETTER SMALL LIGATURE OE +A7FA ; Latin # L& LATIN LETTER SMALL CAPITAL TURNED M +A7FB..A7FF ; Latin # Lo [5] LATIN EPIGRAPHIC LETTER REVERSED F..LATIN EPIGRAPHIC LETTER ARCHAIC M +AB30..AB5A ; Latin # L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG +AB5C..AB5F ; Latin # Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK +AB60..AB64 ; Latin # L& [5] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER INVERTED ALPHA +AB66..AB68 ; Latin # L& [3] LATIN SMALL LETTER DZ DIGRAPH WITH RETROFLEX HOOK..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE +AB69 ; Latin # Lm MODIFIER LETTER SMALL TURNED W +FB00..FB06 ; Latin # L& [7] LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST +FF21..FF3A ; Latin # L& [26] FULLWIDTH LATIN CAPITAL LETTER A..FULLWIDTH LATIN CAPITAL LETTER Z +FF41..FF5A ; Latin # L& [26] FULLWIDTH LATIN SMALL LETTER A..FULLWIDTH LATIN SMALL LETTER Z +10780..10785 ; Latin # Lm [6] MODIFIER LETTER SMALL CAPITAL AA..MODIFIER LETTER SMALL B WITH HOOK +10787..107B0 ; Latin # Lm [42] MODIFIER LETTER SMALL DZ DIGRAPH..MODIFIER LETTER SMALL V WITH RIGHT HOOK +107B2..107BA ; Latin # Lm [9] MODIFIER LETTER SMALL CAPITAL Y..MODIFIER LETTER SMALL S WITH CURL +1DF00..1DF09 ; Latin # L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK +1DF0A ; Latin # Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK +1DF0B..1DF1E ; Latin # L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL + +# Total code points: 1475 + +# ================================================ + +0370..0373 ; Greek # L& [4] GREEK CAPITAL LETTER HETA..GREEK SMALL LETTER ARCHAIC SAMPI +0375 ; Greek # Sk GREEK LOWER NUMERAL SIGN +0376..0377 ; Greek # L& [2] GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA..GREEK SMALL LETTER PAMPHYLIAN DIGAMMA +037A ; Greek # Lm GREEK YPOGEGRAMMENI +037B..037D ; Greek # L& [3] GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL +037F ; Greek # L& GREEK CAPITAL LETTER YOT +0384 ; Greek # Sk GREEK TONOS +0386 ; Greek # L& GREEK CAPITAL LETTER ALPHA WITH TONOS +0388..038A ; Greek # L& [3] GREEK CAPITAL LETTER EPSILON WITH TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS +038C ; Greek # L& GREEK CAPITAL LETTER OMICRON WITH TONOS +038E..03A1 ; Greek # L& [20] GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER RHO +03A3..03E1 ; Greek # L& [63] GREEK CAPITAL LETTER SIGMA..GREEK SMALL LETTER SAMPI +03F0..03F5 ; Greek # L& [6] GREEK KAPPA SYMBOL..GREEK LUNATE EPSILON SYMBOL +03F6 ; Greek # Sm GREEK REVERSED LUNATE EPSILON SYMBOL +03F7..03FF ; Greek # L& [9] GREEK CAPITAL LETTER SHO..GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL +1D26..1D2A ; Greek # L& [5] GREEK LETTER SMALL CAPITAL GAMMA..GREEK LETTER SMALL CAPITAL PSI +1D5D..1D61 ; Greek # Lm [5] MODIFIER LETTER SMALL BETA..MODIFIER LETTER SMALL CHI +1D66..1D6A ; Greek # Lm [5] GREEK SUBSCRIPT SMALL LETTER BETA..GREEK SUBSCRIPT SMALL LETTER CHI +1DBF ; Greek # Lm MODIFIER LETTER SMALL THETA +1F00..1F15 ; Greek # L& [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA +1F18..1F1D ; Greek # L& [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA +1F20..1F45 ; Greek # L& [38] GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA +1F48..1F4D ; Greek # L& [6] GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA +1F50..1F57 ; Greek # L& [8] GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI +1F59 ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA +1F5B ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA +1F5D ; Greek # L& GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA +1F5F..1F7D ; Greek # L& [31] GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI..GREEK SMALL LETTER OMEGA WITH OXIA +1F80..1FB4 ; Greek # L& [53] GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI +1FB6..1FBC ; Greek # L& [7] GREEK SMALL LETTER ALPHA WITH PERISPOMENI..GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI +1FBD ; Greek # Sk GREEK KORONIS +1FBE ; Greek # L& GREEK PROSGEGRAMMENI +1FBF..1FC1 ; Greek # Sk [3] GREEK PSILI..GREEK DIALYTIKA AND PERISPOMENI +1FC2..1FC4 ; Greek # L& [3] GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI +1FC6..1FCC ; Greek # L& [7] GREEK SMALL LETTER ETA WITH PERISPOMENI..GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI +1FCD..1FCF ; Greek # Sk [3] GREEK PSILI AND VARIA..GREEK PSILI AND PERISPOMENI +1FD0..1FD3 ; Greek # L& [4] GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA +1FD6..1FDB ; Greek # L& [6] GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK CAPITAL LETTER IOTA WITH OXIA +1FDD..1FDF ; Greek # Sk [3] GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI +1FE0..1FEC ; Greek # L& [13] GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK CAPITAL LETTER RHO WITH DASIA +1FED..1FEF ; Greek # Sk [3] GREEK DIALYTIKA AND VARIA..GREEK VARIA +1FF2..1FF4 ; Greek # L& [3] GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI +1FF6..1FFC ; Greek # L& [7] GREEK SMALL LETTER OMEGA WITH PERISPOMENI..GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI +1FFD..1FFE ; Greek # Sk [2] GREEK OXIA..GREEK DASIA +2126 ; Greek # L& OHM SIGN +AB65 ; Greek # L& GREEK LETTER SMALL CAPITAL OMEGA +10140..10174 ; Greek # Nl [53] GREEK ACROPHONIC ATTIC ONE QUARTER..GREEK ACROPHONIC STRATIAN FIFTY MNAS +10175..10178 ; Greek # No [4] GREEK ONE HALF SIGN..GREEK THREE QUARTERS SIGN +10179..10189 ; Greek # So [17] GREEK YEAR SIGN..GREEK TRYBLION BASE SIGN +1018A..1018B ; Greek # No [2] GREEK ZERO SIGN..GREEK ONE QUARTER SIGN +1018C..1018E ; Greek # So [3] GREEK SINUSOID SIGN..NOMISMA SIGN +101A0 ; Greek # So GREEK SYMBOL TAU RHO +1D200..1D241 ; Greek # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 +1D242..1D244 ; Greek # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D245 ; Greek # So GREEK MUSICAL LEIMMA + +# Total code points: 518 + +# ================================================ + +0400..0481 ; Cyrillic # L& [130] CYRILLIC CAPITAL LETTER IE WITH GRAVE..CYRILLIC SMALL LETTER KOPPA +0482 ; Cyrillic # So CYRILLIC THOUSANDS SIGN +0483..0484 ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC PALATALIZATION +0487 ; Cyrillic # Mn COMBINING CYRILLIC POKRYTIE +0488..0489 ; Cyrillic # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN +048A..052F ; Cyrillic # L& [166] CYRILLIC CAPITAL LETTER SHORT I WITH TAIL..CYRILLIC SMALL LETTER EL WITH DESCENDER +1C80..1C88 ; Cyrillic # L& [9] CYRILLIC SMALL LETTER ROUNDED VE..CYRILLIC SMALL LETTER UNBLENDED UK +1D2B ; Cyrillic # L& CYRILLIC LETTER SMALL CAPITAL EL +1D78 ; Cyrillic # Lm MODIFIER LETTER CYRILLIC EN +2DE0..2DFF ; Cyrillic # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS +A640..A66D ; Cyrillic # L& [46] CYRILLIC CAPITAL LETTER ZEMLYA..CYRILLIC SMALL LETTER DOUBLE MONOCULAR O +A66E ; Cyrillic # Lo CYRILLIC LETTER MULTIOCULAR O +A66F ; Cyrillic # Mn COMBINING CYRILLIC VZMET +A670..A672 ; Cyrillic # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN +A673 ; Cyrillic # Po SLAVONIC ASTERISK +A674..A67D ; Cyrillic # Mn [10] COMBINING CYRILLIC LETTER UKRAINIAN IE..COMBINING CYRILLIC PAYEROK +A67E ; Cyrillic # Po CYRILLIC KAVYKA +A67F ; Cyrillic # Lm CYRILLIC PAYEROK +A680..A69B ; Cyrillic # L& [28] CYRILLIC CAPITAL LETTER DWE..CYRILLIC SMALL LETTER CROSSED O +A69C..A69D ; Cyrillic # Lm [2] MODIFIER LETTER CYRILLIC HARD SIGN..MODIFIER LETTER CYRILLIC SOFT SIGN +A69E..A69F ; Cyrillic # Mn [2] COMBINING CYRILLIC LETTER EF..COMBINING CYRILLIC LETTER IOTIFIED E +FE2E..FE2F ; Cyrillic # Mn [2] COMBINING CYRILLIC TITLO LEFT HALF..COMBINING CYRILLIC TITLO RIGHT HALF + +# Total code points: 443 + +# ================================================ + +0531..0556 ; Armenian # L& [38] ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH +0559 ; Armenian # Lm ARMENIAN MODIFIER LETTER LEFT HALF RING +055A..055F ; Armenian # Po [6] ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK +0560..0588 ; Armenian # L& [41] ARMENIAN SMALL LETTER TURNED AYB..ARMENIAN SMALL LETTER YI WITH STROKE +0589 ; Armenian # Po ARMENIAN FULL STOP +058A ; Armenian # Pd ARMENIAN HYPHEN +058D..058E ; Armenian # So [2] RIGHT-FACING ARMENIAN ETERNITY SIGN..LEFT-FACING ARMENIAN ETERNITY SIGN +058F ; Armenian # Sc ARMENIAN DRAM SIGN +FB13..FB17 ; Armenian # L& [5] ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH + +# Total code points: 96 + +# ================================================ + +0591..05BD ; Hebrew # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG +05BE ; Hebrew # Pd HEBREW PUNCTUATION MAQAF +05BF ; Hebrew # Mn HEBREW POINT RAFE +05C0 ; Hebrew # Po HEBREW PUNCTUATION PASEQ +05C1..05C2 ; Hebrew # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT +05C3 ; Hebrew # Po HEBREW PUNCTUATION SOF PASUQ +05C4..05C5 ; Hebrew # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT +05C6 ; Hebrew # Po HEBREW PUNCTUATION NUN HAFUKHA +05C7 ; Hebrew # Mn HEBREW POINT QAMATS QATAN +05D0..05EA ; Hebrew # Lo [27] HEBREW LETTER ALEF..HEBREW LETTER TAV +05EF..05F2 ; Hebrew # Lo [4] HEBREW YOD TRIANGLE..HEBREW LIGATURE YIDDISH DOUBLE YOD +05F3..05F4 ; Hebrew # Po [2] HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM +FB1D ; Hebrew # Lo HEBREW LETTER YOD WITH HIRIQ +FB1E ; Hebrew # Mn HEBREW POINT JUDEO-SPANISH VARIKA +FB1F..FB28 ; Hebrew # Lo [10] HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER WIDE TAV +FB29 ; Hebrew # Sm HEBREW LETTER ALTERNATIVE PLUS SIGN +FB2A..FB36 ; Hebrew # Lo [13] HEBREW LETTER SHIN WITH SHIN DOT..HEBREW LETTER ZAYIN WITH DAGESH +FB38..FB3C ; Hebrew # Lo [5] HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH +FB3E ; Hebrew # Lo HEBREW LETTER MEM WITH DAGESH +FB40..FB41 ; Hebrew # Lo [2] HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH +FB43..FB44 ; Hebrew # Lo [2] HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH +FB46..FB4F ; Hebrew # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE ALEF LAMED + +# Total code points: 134 + +# ================================================ + +0600..0604 ; Arabic # Cf [5] ARABIC NUMBER SIGN..ARABIC SIGN SAMVAT +0606..0608 ; Arabic # Sm [3] ARABIC-INDIC CUBE ROOT..ARABIC RAY +0609..060A ; Arabic # Po [2] ARABIC-INDIC PER MILLE SIGN..ARABIC-INDIC PER TEN THOUSAND SIGN +060B ; Arabic # Sc AFGHANI SIGN +060D ; Arabic # Po ARABIC DATE SEPARATOR +060E..060F ; Arabic # So [2] ARABIC POETIC VERSE SIGN..ARABIC SIGN MISRA +0610..061A ; Arabic # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA +061C ; Arabic # Cf ARABIC LETTER MARK +061D..061E ; Arabic # Po [2] ARABIC END OF TEXT MARK..ARABIC TRIPLE DOT PUNCTUATION MARK +0620..063F ; Arabic # Lo [32] ARABIC LETTER KASHMIRI YEH..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE +0641..064A ; Arabic # Lo [10] ARABIC LETTER FEH..ARABIC LETTER YEH +0656..065F ; Arabic # Mn [10] ARABIC SUBSCRIPT ALEF..ARABIC WAVY HAMZA BELOW +0660..0669 ; Arabic # Nd [10] ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE +066A..066D ; Arabic # Po [4] ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR +066E..066F ; Arabic # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF +0671..06D3 ; Arabic # Lo [99] ARABIC LETTER ALEF WASLA..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE +06D4 ; Arabic # Po ARABIC FULL STOP +06D5 ; Arabic # Lo ARABIC LETTER AE +06D6..06DC ; Arabic # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN +06DE ; Arabic # So ARABIC START OF RUB EL HIZB +06DF..06E4 ; Arabic # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA +06E5..06E6 ; Arabic # Lm [2] ARABIC SMALL WAW..ARABIC SMALL YEH +06E7..06E8 ; Arabic # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON +06E9 ; Arabic # So ARABIC PLACE OF SAJDAH +06EA..06ED ; Arabic # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM +06EE..06EF ; Arabic # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V +06F0..06F9 ; Arabic # Nd [10] EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE +06FA..06FC ; Arabic # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW +06FD..06FE ; Arabic # So [2] ARABIC SIGN SINDHI AMPERSAND..ARABIC SIGN SINDHI POSTPOSITION MEN +06FF ; Arabic # Lo ARABIC LETTER HEH WITH INVERTED V +0750..077F ; Arabic # Lo [48] ARABIC LETTER BEH WITH THREE DOTS HORIZONTALLY BELOW..ARABIC LETTER KAF WITH TWO DOTS ABOVE +0870..0887 ; Arabic # Lo [24] ARABIC LETTER ALEF WITH ATTACHED FATHA..ARABIC BASELINE ROUND DOT +0888 ; Arabic # Sk ARABIC RAISED ROUND DOT +0889..088E ; Arabic # Lo [6] ARABIC LETTER NOON WITH INVERTED SMALL V..ARABIC VERTICAL TAIL +0890..0891 ; Arabic # Cf [2] ARABIC POUND MARK ABOVE..ARABIC PIASTRE MARK ABOVE +0898..089F ; Arabic # Mn [8] ARABIC SMALL HIGH WORD AL-JUZ..ARABIC HALF MADDA OVER MADDA +08A0..08C8 ; Arabic # Lo [41] ARABIC LETTER BEH WITH SMALL V BELOW..ARABIC LETTER GRAF +08C9 ; Arabic # Lm ARABIC SMALL FARSI YEH +08CA..08E1 ; Arabic # Mn [24] ARABIC SMALL HIGH FARSI YEH..ARABIC SMALL HIGH SIGN SAFHA +08E3..08FF ; Arabic # Mn [29] ARABIC TURNED DAMMA BELOW..ARABIC MARK SIDEWAYS NOON GHUNNA +FB50..FBB1 ; Arabic # Lo [98] ARABIC LETTER ALEF WASLA ISOLATED FORM..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM +FBB2..FBC2 ; Arabic # Sk [17] ARABIC SYMBOL DOT ABOVE..ARABIC SYMBOL WASLA ABOVE +FBD3..FD3D ; Arabic # Lo [363] ARABIC LETTER NG ISOLATED FORM..ARABIC LIGATURE ALEF WITH FATHATAN ISOLATED FORM +FD40..FD4F ; Arabic # So [16] ARABIC LIGATURE RAHIMAHU ALLAAH..ARABIC LIGATURE RAHIMAHUM ALLAAH +FD50..FD8F ; Arabic # Lo [64] ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM +FD92..FDC7 ; Arabic # Lo [54] ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM +FDCF ; Arabic # So ARABIC LIGATURE SALAAMUHU ALAYNAA +FDF0..FDFB ; Arabic # Lo [12] ARABIC LIGATURE SALLA USED AS KORANIC STOP SIGN ISOLATED FORM..ARABIC LIGATURE JALLAJALALOUHOU +FDFC ; Arabic # Sc RIAL SIGN +FDFD..FDFF ; Arabic # So [3] ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM..ARABIC LIGATURE AZZA WA JALL +FE70..FE74 ; Arabic # Lo [5] ARABIC FATHATAN ISOLATED FORM..ARABIC KASRATAN ISOLATED FORM +FE76..FEFC ; Arabic # Lo [135] ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM +10E60..10E7E ; Arabic # No [31] RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS +1EE00..1EE03 ; Arabic # Lo [4] ARABIC MATHEMATICAL ALEF..ARABIC MATHEMATICAL DAL +1EE05..1EE1F ; Arabic # Lo [27] ARABIC MATHEMATICAL WAW..ARABIC MATHEMATICAL DOTLESS QAF +1EE21..1EE22 ; Arabic # Lo [2] ARABIC MATHEMATICAL INITIAL BEH..ARABIC MATHEMATICAL INITIAL JEEM +1EE24 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL HEH +1EE27 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL HAH +1EE29..1EE32 ; Arabic # Lo [10] ARABIC MATHEMATICAL INITIAL YEH..ARABIC MATHEMATICAL INITIAL QAF +1EE34..1EE37 ; Arabic # Lo [4] ARABIC MATHEMATICAL INITIAL SHEEN..ARABIC MATHEMATICAL INITIAL KHAH +1EE39 ; Arabic # Lo ARABIC MATHEMATICAL INITIAL DAD +1EE3B ; Arabic # Lo ARABIC MATHEMATICAL INITIAL GHAIN +1EE42 ; Arabic # Lo ARABIC MATHEMATICAL TAILED JEEM +1EE47 ; Arabic # Lo ARABIC MATHEMATICAL TAILED HAH +1EE49 ; Arabic # Lo ARABIC MATHEMATICAL TAILED YEH +1EE4B ; Arabic # Lo ARABIC MATHEMATICAL TAILED LAM +1EE4D..1EE4F ; Arabic # Lo [3] ARABIC MATHEMATICAL TAILED NOON..ARABIC MATHEMATICAL TAILED AIN +1EE51..1EE52 ; Arabic # Lo [2] ARABIC MATHEMATICAL TAILED SAD..ARABIC MATHEMATICAL TAILED QAF +1EE54 ; Arabic # Lo ARABIC MATHEMATICAL TAILED SHEEN +1EE57 ; Arabic # Lo ARABIC MATHEMATICAL TAILED KHAH +1EE59 ; Arabic # Lo ARABIC MATHEMATICAL TAILED DAD +1EE5B ; Arabic # Lo ARABIC MATHEMATICAL TAILED GHAIN +1EE5D ; Arabic # Lo ARABIC MATHEMATICAL TAILED DOTLESS NOON +1EE5F ; Arabic # Lo ARABIC MATHEMATICAL TAILED DOTLESS QAF +1EE61..1EE62 ; Arabic # Lo [2] ARABIC MATHEMATICAL STRETCHED BEH..ARABIC MATHEMATICAL STRETCHED JEEM +1EE64 ; Arabic # Lo ARABIC MATHEMATICAL STRETCHED HEH +1EE67..1EE6A ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED HAH..ARABIC MATHEMATICAL STRETCHED KAF +1EE6C..1EE72 ; Arabic # Lo [7] ARABIC MATHEMATICAL STRETCHED MEEM..ARABIC MATHEMATICAL STRETCHED QAF +1EE74..1EE77 ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED SHEEN..ARABIC MATHEMATICAL STRETCHED KHAH +1EE79..1EE7C ; Arabic # Lo [4] ARABIC MATHEMATICAL STRETCHED DAD..ARABIC MATHEMATICAL STRETCHED DOTLESS BEH +1EE7E ; Arabic # Lo ARABIC MATHEMATICAL STRETCHED DOTLESS FEH +1EE80..1EE89 ; Arabic # Lo [10] ARABIC MATHEMATICAL LOOPED ALEF..ARABIC MATHEMATICAL LOOPED YEH +1EE8B..1EE9B ; Arabic # Lo [17] ARABIC MATHEMATICAL LOOPED LAM..ARABIC MATHEMATICAL LOOPED GHAIN +1EEA1..1EEA3 ; Arabic # Lo [3] ARABIC MATHEMATICAL DOUBLE-STRUCK BEH..ARABIC MATHEMATICAL DOUBLE-STRUCK DAL +1EEA5..1EEA9 ; Arabic # Lo [5] ARABIC MATHEMATICAL DOUBLE-STRUCK WAW..ARABIC MATHEMATICAL DOUBLE-STRUCK YEH +1EEAB..1EEBB ; Arabic # Lo [17] ARABIC MATHEMATICAL DOUBLE-STRUCK LAM..ARABIC MATHEMATICAL DOUBLE-STRUCK GHAIN +1EEF0..1EEF1 ; Arabic # Sm [2] ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL..ARABIC MATHEMATICAL OPERATOR HAH WITH DAL + +# Total code points: 1365 + +# ================================================ + +0700..070D ; Syriac # Po [14] SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS +070F ; Syriac # Cf SYRIAC ABBREVIATION MARK +0710 ; Syriac # Lo SYRIAC LETTER ALAPH +0711 ; Syriac # Mn SYRIAC LETTER SUPERSCRIPT ALAPH +0712..072F ; Syriac # Lo [30] SYRIAC LETTER BETH..SYRIAC LETTER PERSIAN DHALATH +0730..074A ; Syriac # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH +074D..074F ; Syriac # Lo [3] SYRIAC LETTER SOGDIAN ZHAIN..SYRIAC LETTER SOGDIAN FE +0860..086A ; Syriac # Lo [11] SYRIAC LETTER MALAYALAM NGA..SYRIAC LETTER MALAYALAM SSA + +# Total code points: 88 + +# ================================================ + +0780..07A5 ; Thaana # Lo [38] THAANA LETTER HAA..THAANA LETTER WAAVU +07A6..07B0 ; Thaana # Mn [11] THAANA ABAFILI..THAANA SUKUN +07B1 ; Thaana # Lo THAANA LETTER NAA + +# Total code points: 50 + +# ================================================ + +0900..0902 ; Devanagari # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA +0903 ; Devanagari # Mc DEVANAGARI SIGN VISARGA +0904..0939 ; Devanagari # Lo [54] DEVANAGARI LETTER SHORT A..DEVANAGARI LETTER HA +093A ; Devanagari # Mn DEVANAGARI VOWEL SIGN OE +093B ; Devanagari # Mc DEVANAGARI VOWEL SIGN OOE +093C ; Devanagari # Mn DEVANAGARI SIGN NUKTA +093D ; Devanagari # Lo DEVANAGARI SIGN AVAGRAHA +093E..0940 ; Devanagari # Mc [3] DEVANAGARI VOWEL SIGN AA..DEVANAGARI VOWEL SIGN II +0941..0948 ; Devanagari # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI +0949..094C ; Devanagari # Mc [4] DEVANAGARI VOWEL SIGN CANDRA O..DEVANAGARI VOWEL SIGN AU +094D ; Devanagari # Mn DEVANAGARI SIGN VIRAMA +094E..094F ; Devanagari # Mc [2] DEVANAGARI VOWEL SIGN PRISHTHAMATRA E..DEVANAGARI VOWEL SIGN AW +0950 ; Devanagari # Lo DEVANAGARI OM +0955..0957 ; Devanagari # Mn [3] DEVANAGARI VOWEL SIGN CANDRA LONG E..DEVANAGARI VOWEL SIGN UUE +0958..0961 ; Devanagari # Lo [10] DEVANAGARI LETTER QA..DEVANAGARI LETTER VOCALIC LL +0962..0963 ; Devanagari # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL +0966..096F ; Devanagari # Nd [10] DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE +0970 ; Devanagari # Po DEVANAGARI ABBREVIATION SIGN +0971 ; Devanagari # Lm DEVANAGARI SIGN HIGH SPACING DOT +0972..097F ; Devanagari # Lo [14] DEVANAGARI LETTER CANDRA A..DEVANAGARI LETTER BBA +A8E0..A8F1 ; Devanagari # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA +A8F2..A8F7 ; Devanagari # Lo [6] DEVANAGARI SIGN SPACING CANDRABINDU..DEVANAGARI SIGN CANDRABINDU AVAGRAHA +A8F8..A8FA ; Devanagari # Po [3] DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET +A8FB ; Devanagari # Lo DEVANAGARI HEADSTROKE +A8FC ; Devanagari # Po DEVANAGARI SIGN SIDDHAM +A8FD..A8FE ; Devanagari # Lo [2] DEVANAGARI JAIN OM..DEVANAGARI LETTER AY +A8FF ; Devanagari # Mn DEVANAGARI VOWEL SIGN AY + +# Total code points: 154 + +# ================================================ + +0980 ; Bengali # Lo BENGALI ANJI +0981 ; Bengali # Mn BENGALI SIGN CANDRABINDU +0982..0983 ; Bengali # Mc [2] BENGALI SIGN ANUSVARA..BENGALI SIGN VISARGA +0985..098C ; Bengali # Lo [8] BENGALI LETTER A..BENGALI LETTER VOCALIC L +098F..0990 ; Bengali # Lo [2] BENGALI LETTER E..BENGALI LETTER AI +0993..09A8 ; Bengali # Lo [22] BENGALI LETTER O..BENGALI LETTER NA +09AA..09B0 ; Bengali # Lo [7] BENGALI LETTER PA..BENGALI LETTER RA +09B2 ; Bengali # Lo BENGALI LETTER LA +09B6..09B9 ; Bengali # Lo [4] BENGALI LETTER SHA..BENGALI LETTER HA +09BC ; Bengali # Mn BENGALI SIGN NUKTA +09BD ; Bengali # Lo BENGALI SIGN AVAGRAHA +09BE..09C0 ; Bengali # Mc [3] BENGALI VOWEL SIGN AA..BENGALI VOWEL SIGN II +09C1..09C4 ; Bengali # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR +09C7..09C8 ; Bengali # Mc [2] BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI +09CB..09CC ; Bengali # Mc [2] BENGALI VOWEL SIGN O..BENGALI VOWEL SIGN AU +09CD ; Bengali # Mn BENGALI SIGN VIRAMA +09CE ; Bengali # Lo BENGALI LETTER KHANDA TA +09D7 ; Bengali # Mc BENGALI AU LENGTH MARK +09DC..09DD ; Bengali # Lo [2] BENGALI LETTER RRA..BENGALI LETTER RHA +09DF..09E1 ; Bengali # Lo [3] BENGALI LETTER YYA..BENGALI LETTER VOCALIC LL +09E2..09E3 ; Bengali # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL +09E6..09EF ; Bengali # Nd [10] BENGALI DIGIT ZERO..BENGALI DIGIT NINE +09F0..09F1 ; Bengali # Lo [2] BENGALI LETTER RA WITH MIDDLE DIAGONAL..BENGALI LETTER RA WITH LOWER DIAGONAL +09F2..09F3 ; Bengali # Sc [2] BENGALI RUPEE MARK..BENGALI RUPEE SIGN +09F4..09F9 ; Bengali # No [6] BENGALI CURRENCY NUMERATOR ONE..BENGALI CURRENCY DENOMINATOR SIXTEEN +09FA ; Bengali # So BENGALI ISSHAR +09FB ; Bengali # Sc BENGALI GANDA MARK +09FC ; Bengali # Lo BENGALI LETTER VEDIC ANUSVARA +09FD ; Bengali # Po BENGALI ABBREVIATION SIGN +09FE ; Bengali # Mn BENGALI SANDHI MARK + +# Total code points: 96 + +# ================================================ + +0A01..0A02 ; Gurmukhi # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI +0A03 ; Gurmukhi # Mc GURMUKHI SIGN VISARGA +0A05..0A0A ; Gurmukhi # Lo [6] GURMUKHI LETTER A..GURMUKHI LETTER UU +0A0F..0A10 ; Gurmukhi # Lo [2] GURMUKHI LETTER EE..GURMUKHI LETTER AI +0A13..0A28 ; Gurmukhi # Lo [22] GURMUKHI LETTER OO..GURMUKHI LETTER NA +0A2A..0A30 ; Gurmukhi # Lo [7] GURMUKHI LETTER PA..GURMUKHI LETTER RA +0A32..0A33 ; Gurmukhi # Lo [2] GURMUKHI LETTER LA..GURMUKHI LETTER LLA +0A35..0A36 ; Gurmukhi # Lo [2] GURMUKHI LETTER VA..GURMUKHI LETTER SHA +0A38..0A39 ; Gurmukhi # Lo [2] GURMUKHI LETTER SA..GURMUKHI LETTER HA +0A3C ; Gurmukhi # Mn GURMUKHI SIGN NUKTA +0A3E..0A40 ; Gurmukhi # Mc [3] GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN II +0A41..0A42 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU +0A47..0A48 ; Gurmukhi # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI +0A4B..0A4D ; Gurmukhi # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA +0A51 ; Gurmukhi # Mn GURMUKHI SIGN UDAAT +0A59..0A5C ; Gurmukhi # Lo [4] GURMUKHI LETTER KHHA..GURMUKHI LETTER RRA +0A5E ; Gurmukhi # Lo GURMUKHI LETTER FA +0A66..0A6F ; Gurmukhi # Nd [10] GURMUKHI DIGIT ZERO..GURMUKHI DIGIT NINE +0A70..0A71 ; Gurmukhi # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK +0A72..0A74 ; Gurmukhi # Lo [3] GURMUKHI IRI..GURMUKHI EK ONKAR +0A75 ; Gurmukhi # Mn GURMUKHI SIGN YAKASH +0A76 ; Gurmukhi # Po GURMUKHI ABBREVIATION SIGN + +# Total code points: 80 + +# ================================================ + +0A81..0A82 ; Gujarati # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA +0A83 ; Gujarati # Mc GUJARATI SIGN VISARGA +0A85..0A8D ; Gujarati # Lo [9] GUJARATI LETTER A..GUJARATI VOWEL CANDRA E +0A8F..0A91 ; Gujarati # Lo [3] GUJARATI LETTER E..GUJARATI VOWEL CANDRA O +0A93..0AA8 ; Gujarati # Lo [22] GUJARATI LETTER O..GUJARATI LETTER NA +0AAA..0AB0 ; Gujarati # Lo [7] GUJARATI LETTER PA..GUJARATI LETTER RA +0AB2..0AB3 ; Gujarati # Lo [2] GUJARATI LETTER LA..GUJARATI LETTER LLA +0AB5..0AB9 ; Gujarati # Lo [5] GUJARATI LETTER VA..GUJARATI LETTER HA +0ABC ; Gujarati # Mn GUJARATI SIGN NUKTA +0ABD ; Gujarati # Lo GUJARATI SIGN AVAGRAHA +0ABE..0AC0 ; Gujarati # Mc [3] GUJARATI VOWEL SIGN AA..GUJARATI VOWEL SIGN II +0AC1..0AC5 ; Gujarati # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E +0AC7..0AC8 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI +0AC9 ; Gujarati # Mc GUJARATI VOWEL SIGN CANDRA O +0ACB..0ACC ; Gujarati # Mc [2] GUJARATI VOWEL SIGN O..GUJARATI VOWEL SIGN AU +0ACD ; Gujarati # Mn GUJARATI SIGN VIRAMA +0AD0 ; Gujarati # Lo GUJARATI OM +0AE0..0AE1 ; Gujarati # Lo [2] GUJARATI LETTER VOCALIC RR..GUJARATI LETTER VOCALIC LL +0AE2..0AE3 ; Gujarati # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL +0AE6..0AEF ; Gujarati # Nd [10] GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE +0AF0 ; Gujarati # Po GUJARATI ABBREVIATION SIGN +0AF1 ; Gujarati # Sc GUJARATI RUPEE SIGN +0AF9 ; Gujarati # Lo GUJARATI LETTER ZHA +0AFA..0AFF ; Gujarati # Mn [6] GUJARATI SIGN SUKUN..GUJARATI SIGN TWO-CIRCLE NUKTA ABOVE + +# Total code points: 91 + +# ================================================ + +0B01 ; Oriya # Mn ORIYA SIGN CANDRABINDU +0B02..0B03 ; Oriya # Mc [2] ORIYA SIGN ANUSVARA..ORIYA SIGN VISARGA +0B05..0B0C ; Oriya # Lo [8] ORIYA LETTER A..ORIYA LETTER VOCALIC L +0B0F..0B10 ; Oriya # Lo [2] ORIYA LETTER E..ORIYA LETTER AI +0B13..0B28 ; Oriya # Lo [22] ORIYA LETTER O..ORIYA LETTER NA +0B2A..0B30 ; Oriya # Lo [7] ORIYA LETTER PA..ORIYA LETTER RA +0B32..0B33 ; Oriya # Lo [2] ORIYA LETTER LA..ORIYA LETTER LLA +0B35..0B39 ; Oriya # Lo [5] ORIYA LETTER VA..ORIYA LETTER HA +0B3C ; Oriya # Mn ORIYA SIGN NUKTA +0B3D ; Oriya # Lo ORIYA SIGN AVAGRAHA +0B3E ; Oriya # Mc ORIYA VOWEL SIGN AA +0B3F ; Oriya # Mn ORIYA VOWEL SIGN I +0B40 ; Oriya # Mc ORIYA VOWEL SIGN II +0B41..0B44 ; Oriya # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR +0B47..0B48 ; Oriya # Mc [2] ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI +0B4B..0B4C ; Oriya # Mc [2] ORIYA VOWEL SIGN O..ORIYA VOWEL SIGN AU +0B4D ; Oriya # Mn ORIYA SIGN VIRAMA +0B55..0B56 ; Oriya # Mn [2] ORIYA SIGN OVERLINE..ORIYA AI LENGTH MARK +0B57 ; Oriya # Mc ORIYA AU LENGTH MARK +0B5C..0B5D ; Oriya # Lo [2] ORIYA LETTER RRA..ORIYA LETTER RHA +0B5F..0B61 ; Oriya # Lo [3] ORIYA LETTER YYA..ORIYA LETTER VOCALIC LL +0B62..0B63 ; Oriya # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL +0B66..0B6F ; Oriya # Nd [10] ORIYA DIGIT ZERO..ORIYA DIGIT NINE +0B70 ; Oriya # So ORIYA ISSHAR +0B71 ; Oriya # Lo ORIYA LETTER WA +0B72..0B77 ; Oriya # No [6] ORIYA FRACTION ONE QUARTER..ORIYA FRACTION THREE SIXTEENTHS + +# Total code points: 91 + +# ================================================ + +0B82 ; Tamil # Mn TAMIL SIGN ANUSVARA +0B83 ; Tamil # Lo TAMIL SIGN VISARGA +0B85..0B8A ; Tamil # Lo [6] TAMIL LETTER A..TAMIL LETTER UU +0B8E..0B90 ; Tamil # Lo [3] TAMIL LETTER E..TAMIL LETTER AI +0B92..0B95 ; Tamil # Lo [4] TAMIL LETTER O..TAMIL LETTER KA +0B99..0B9A ; Tamil # Lo [2] TAMIL LETTER NGA..TAMIL LETTER CA +0B9C ; Tamil # Lo TAMIL LETTER JA +0B9E..0B9F ; Tamil # Lo [2] TAMIL LETTER NYA..TAMIL LETTER TTA +0BA3..0BA4 ; Tamil # Lo [2] TAMIL LETTER NNA..TAMIL LETTER TA +0BA8..0BAA ; Tamil # Lo [3] TAMIL LETTER NA..TAMIL LETTER PA +0BAE..0BB9 ; Tamil # Lo [12] TAMIL LETTER MA..TAMIL LETTER HA +0BBE..0BBF ; Tamil # Mc [2] TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN I +0BC0 ; Tamil # Mn TAMIL VOWEL SIGN II +0BC1..0BC2 ; Tamil # Mc [2] TAMIL VOWEL SIGN U..TAMIL VOWEL SIGN UU +0BC6..0BC8 ; Tamil # Mc [3] TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI +0BCA..0BCC ; Tamil # Mc [3] TAMIL VOWEL SIGN O..TAMIL VOWEL SIGN AU +0BCD ; Tamil # Mn TAMIL SIGN VIRAMA +0BD0 ; Tamil # Lo TAMIL OM +0BD7 ; Tamil # Mc TAMIL AU LENGTH MARK +0BE6..0BEF ; Tamil # Nd [10] TAMIL DIGIT ZERO..TAMIL DIGIT NINE +0BF0..0BF2 ; Tamil # No [3] TAMIL NUMBER TEN..TAMIL NUMBER ONE THOUSAND +0BF3..0BF8 ; Tamil # So [6] TAMIL DAY SIGN..TAMIL AS ABOVE SIGN +0BF9 ; Tamil # Sc TAMIL RUPEE SIGN +0BFA ; Tamil # So TAMIL NUMBER SIGN +11FC0..11FD4 ; Tamil # No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH +11FD5..11FDC ; Tamil # So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI +11FDD..11FE0 ; Tamil # Sc [4] TAMIL SIGN KAACU..TAMIL SIGN VARAAKAN +11FE1..11FF1 ; Tamil # So [17] TAMIL SIGN PAARAM..TAMIL SIGN VAKAIYARAA +11FFF ; Tamil # Po TAMIL PUNCTUATION END OF TEXT + +# Total code points: 123 + +# ================================================ + +0C00 ; Telugu # Mn TELUGU SIGN COMBINING CANDRABINDU ABOVE +0C01..0C03 ; Telugu # Mc [3] TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA +0C04 ; Telugu # Mn TELUGU SIGN COMBINING ANUSVARA ABOVE +0C05..0C0C ; Telugu # Lo [8] TELUGU LETTER A..TELUGU LETTER VOCALIC L +0C0E..0C10 ; Telugu # Lo [3] TELUGU LETTER E..TELUGU LETTER AI +0C12..0C28 ; Telugu # Lo [23] TELUGU LETTER O..TELUGU LETTER NA +0C2A..0C39 ; Telugu # Lo [16] TELUGU LETTER PA..TELUGU LETTER HA +0C3C ; Telugu # Mn TELUGU SIGN NUKTA +0C3D ; Telugu # Lo TELUGU SIGN AVAGRAHA +0C3E..0C40 ; Telugu # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II +0C41..0C44 ; Telugu # Mc [4] TELUGU VOWEL SIGN U..TELUGU VOWEL SIGN VOCALIC RR +0C46..0C48 ; Telugu # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI +0C4A..0C4D ; Telugu # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA +0C55..0C56 ; Telugu # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK +0C58..0C5A ; Telugu # Lo [3] TELUGU LETTER TSA..TELUGU LETTER RRRA +0C5D ; Telugu # Lo TELUGU LETTER NAKAARA POLLU +0C60..0C61 ; Telugu # Lo [2] TELUGU LETTER VOCALIC RR..TELUGU LETTER VOCALIC LL +0C62..0C63 ; Telugu # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL +0C66..0C6F ; Telugu # Nd [10] TELUGU DIGIT ZERO..TELUGU DIGIT NINE +0C77 ; Telugu # Po TELUGU SIGN SIDDHAM +0C78..0C7E ; Telugu # No [7] TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU FRACTION DIGIT THREE FOR EVEN POWERS OF FOUR +0C7F ; Telugu # So TELUGU SIGN TUUMU + +# Total code points: 100 + +# ================================================ + +0C80 ; Kannada # Lo KANNADA SIGN SPACING CANDRABINDU +0C81 ; Kannada # Mn KANNADA SIGN CANDRABINDU +0C82..0C83 ; Kannada # Mc [2] KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA +0C84 ; Kannada # Po KANNADA SIGN SIDDHAM +0C85..0C8C ; Kannada # Lo [8] KANNADA LETTER A..KANNADA LETTER VOCALIC L +0C8E..0C90 ; Kannada # Lo [3] KANNADA LETTER E..KANNADA LETTER AI +0C92..0CA8 ; Kannada # Lo [23] KANNADA LETTER O..KANNADA LETTER NA +0CAA..0CB3 ; Kannada # Lo [10] KANNADA LETTER PA..KANNADA LETTER LLA +0CB5..0CB9 ; Kannada # Lo [5] KANNADA LETTER VA..KANNADA LETTER HA +0CBC ; Kannada # Mn KANNADA SIGN NUKTA +0CBD ; Kannada # Lo KANNADA SIGN AVAGRAHA +0CBE ; Kannada # Mc KANNADA VOWEL SIGN AA +0CBF ; Kannada # Mn KANNADA VOWEL SIGN I +0CC0..0CC4 ; Kannada # Mc [5] KANNADA VOWEL SIGN II..KANNADA VOWEL SIGN VOCALIC RR +0CC6 ; Kannada # Mn KANNADA VOWEL SIGN E +0CC7..0CC8 ; Kannada # Mc [2] KANNADA VOWEL SIGN EE..KANNADA VOWEL SIGN AI +0CCA..0CCB ; Kannada # Mc [2] KANNADA VOWEL SIGN O..KANNADA VOWEL SIGN OO +0CCC..0CCD ; Kannada # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA +0CD5..0CD6 ; Kannada # Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK +0CDD..0CDE ; Kannada # Lo [2] KANNADA LETTER NAKAARA POLLU..KANNADA LETTER FA +0CE0..0CE1 ; Kannada # Lo [2] KANNADA LETTER VOCALIC RR..KANNADA LETTER VOCALIC LL +0CE2..0CE3 ; Kannada # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL +0CE6..0CEF ; Kannada # Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE +0CF1..0CF2 ; Kannada # Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA + +# Total code points: 90 + +# ================================================ + +0D00..0D01 ; Malayalam # Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU +0D02..0D03 ; Malayalam # Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA +0D04..0D0C ; Malayalam # Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L +0D0E..0D10 ; Malayalam # Lo [3] MALAYALAM LETTER E..MALAYALAM LETTER AI +0D12..0D3A ; Malayalam # Lo [41] MALAYALAM LETTER O..MALAYALAM LETTER TTTA +0D3B..0D3C ; Malayalam # Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA +0D3D ; Malayalam # Lo MALAYALAM SIGN AVAGRAHA +0D3E..0D40 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN AA..MALAYALAM VOWEL SIGN II +0D41..0D44 ; Malayalam # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR +0D46..0D48 ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI +0D4A..0D4C ; Malayalam # Mc [3] MALAYALAM VOWEL SIGN O..MALAYALAM VOWEL SIGN AU +0D4D ; Malayalam # Mn MALAYALAM SIGN VIRAMA +0D4E ; Malayalam # Lo MALAYALAM LETTER DOT REPH +0D4F ; Malayalam # So MALAYALAM SIGN PARA +0D54..0D56 ; Malayalam # Lo [3] MALAYALAM LETTER CHILLU M..MALAYALAM LETTER CHILLU LLL +0D57 ; Malayalam # Mc MALAYALAM AU LENGTH MARK +0D58..0D5E ; Malayalam # No [7] MALAYALAM FRACTION ONE ONE-HUNDRED-AND-SIXTIETH..MALAYALAM FRACTION ONE FIFTH +0D5F..0D61 ; Malayalam # Lo [3] MALAYALAM LETTER ARCHAIC II..MALAYALAM LETTER VOCALIC LL +0D62..0D63 ; Malayalam # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL +0D66..0D6F ; Malayalam # Nd [10] MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE +0D70..0D78 ; Malayalam # No [9] MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE SIXTEENTHS +0D79 ; Malayalam # So MALAYALAM DATE MARK +0D7A..0D7F ; Malayalam # Lo [6] MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K + +# Total code points: 118 + +# ================================================ + +0D81 ; Sinhala # Mn SINHALA SIGN CANDRABINDU +0D82..0D83 ; Sinhala # Mc [2] SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA +0D85..0D96 ; Sinhala # Lo [18] SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA +0D9A..0DB1 ; Sinhala # Lo [24] SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA +0DB3..0DBB ; Sinhala # Lo [9] SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA +0DBD ; Sinhala # Lo SINHALA LETTER DANTAJA LAYANNA +0DC0..0DC6 ; Sinhala # Lo [7] SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA +0DCA ; Sinhala # Mn SINHALA SIGN AL-LAKUNA +0DCF..0DD1 ; Sinhala # Mc [3] SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN DIGA AEDA-PILLA +0DD2..0DD4 ; Sinhala # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA +0DD6 ; Sinhala # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA +0DD8..0DDF ; Sinhala # Mc [8] SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA +0DE6..0DEF ; Sinhala # Nd [10] SINHALA LITH DIGIT ZERO..SINHALA LITH DIGIT NINE +0DF2..0DF3 ; Sinhala # Mc [2] SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA +0DF4 ; Sinhala # Po SINHALA PUNCTUATION KUNDDALIYA +111E1..111F4 ; Sinhala # No [20] SINHALA ARCHAIC DIGIT ONE..SINHALA ARCHAIC NUMBER ONE THOUSAND + +# Total code points: 111 + +# ================================================ + +0E01..0E30 ; Thai # Lo [48] THAI CHARACTER KO KAI..THAI CHARACTER SARA A +0E31 ; Thai # Mn THAI CHARACTER MAI HAN-AKAT +0E32..0E33 ; Thai # Lo [2] THAI CHARACTER SARA AA..THAI CHARACTER SARA AM +0E34..0E3A ; Thai # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU +0E40..0E45 ; Thai # Lo [6] THAI CHARACTER SARA E..THAI CHARACTER LAKKHANGYAO +0E46 ; Thai # Lm THAI CHARACTER MAIYAMOK +0E47..0E4E ; Thai # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN +0E4F ; Thai # Po THAI CHARACTER FONGMAN +0E50..0E59 ; Thai # Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE +0E5A..0E5B ; Thai # Po [2] THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT + +# Total code points: 86 + +# ================================================ + +0E81..0E82 ; Lao # Lo [2] LAO LETTER KO..LAO LETTER KHO SUNG +0E84 ; Lao # Lo LAO LETTER KHO TAM +0E86..0E8A ; Lao # Lo [5] LAO LETTER PALI GHA..LAO LETTER SO TAM +0E8C..0EA3 ; Lao # Lo [24] LAO LETTER PALI JHA..LAO LETTER LO LING +0EA5 ; Lao # Lo LAO LETTER LO LOOT +0EA7..0EB0 ; Lao # Lo [10] LAO LETTER WO..LAO VOWEL SIGN A +0EB1 ; Lao # Mn LAO VOWEL SIGN MAI KAN +0EB2..0EB3 ; Lao # Lo [2] LAO VOWEL SIGN AA..LAO VOWEL SIGN AM +0EB4..0EBC ; Lao # Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO +0EBD ; Lao # Lo LAO SEMIVOWEL SIGN NYO +0EC0..0EC4 ; Lao # Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI +0EC6 ; Lao # Lm LAO KO LA +0EC8..0ECD ; Lao # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA +0ED0..0ED9 ; Lao # Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE +0EDC..0EDF ; Lao # Lo [4] LAO HO NO..LAO LETTER KHMU NYO + +# Total code points: 82 + +# ================================================ + +0F00 ; Tibetan # Lo TIBETAN SYLLABLE OM +0F01..0F03 ; Tibetan # So [3] TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK GTER YIG MGO -UM GTER TSHEG MA +0F04..0F12 ; Tibetan # Po [15] TIBETAN MARK INITIAL YIG MGO MDUN MA..TIBETAN MARK RGYA GRAM SHAD +0F13 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS ME LONG CAN +0F14 ; Tibetan # Po TIBETAN MARK GTER TSHEG +0F15..0F17 ; Tibetan # So [3] TIBETAN LOGOTYPE SIGN CHAD RTAGS..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS +0F18..0F19 ; Tibetan # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS +0F1A..0F1F ; Tibetan # So [6] TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG +0F20..0F29 ; Tibetan # Nd [10] TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE +0F2A..0F33 ; Tibetan # No [10] TIBETAN DIGIT HALF ONE..TIBETAN DIGIT HALF ZERO +0F34 ; Tibetan # So TIBETAN MARK BSDUS RTAGS +0F35 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG NYI ZLA +0F36 ; Tibetan # So TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN +0F37 ; Tibetan # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS +0F38 ; Tibetan # So TIBETAN MARK CHE MGO +0F39 ; Tibetan # Mn TIBETAN MARK TSA -PHRU +0F3A ; Tibetan # Ps TIBETAN MARK GUG RTAGS GYON +0F3B ; Tibetan # Pe TIBETAN MARK GUG RTAGS GYAS +0F3C ; Tibetan # Ps TIBETAN MARK ANG KHANG GYON +0F3D ; Tibetan # Pe TIBETAN MARK ANG KHANG GYAS +0F3E..0F3F ; Tibetan # Mc [2] TIBETAN SIGN YAR TSHES..TIBETAN SIGN MAR TSHES +0F40..0F47 ; Tibetan # Lo [8] TIBETAN LETTER KA..TIBETAN LETTER JA +0F49..0F6C ; Tibetan # Lo [36] TIBETAN LETTER NYA..TIBETAN LETTER RRA +0F71..0F7E ; Tibetan # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO +0F7F ; Tibetan # Mc TIBETAN SIGN RNAM BCAD +0F80..0F84 ; Tibetan # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA +0F85 ; Tibetan # Po TIBETAN MARK PALUTA +0F86..0F87 ; Tibetan # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS +0F88..0F8C ; Tibetan # Lo [5] TIBETAN SIGN LCE TSA CAN..TIBETAN SIGN INVERTED MCHU CAN +0F8D..0F97 ; Tibetan # Mn [11] TIBETAN SUBJOINED SIGN LCE TSA CAN..TIBETAN SUBJOINED LETTER JA +0F99..0FBC ; Tibetan # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA +0FBE..0FC5 ; Tibetan # So [8] TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE +0FC6 ; Tibetan # Mn TIBETAN SYMBOL PADMA GDAN +0FC7..0FCC ; Tibetan # So [6] TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL +0FCE..0FCF ; Tibetan # So [2] TIBETAN SIGN RDEL NAG RDEL DKAR..TIBETAN SIGN RDEL NAG GSUM +0FD0..0FD4 ; Tibetan # Po [5] TIBETAN MARK BSKA- SHOG GI MGO RGYAN..TIBETAN MARK CLOSING BRDA RNYING YIG MGO SGAB MA +0FD9..0FDA ; Tibetan # Po [2] TIBETAN MARK LEADING MCHAN RTAGS..TIBETAN MARK TRAILING MCHAN RTAGS + +# Total code points: 207 + +# ================================================ + +1000..102A ; Myanmar # Lo [43] MYANMAR LETTER KA..MYANMAR LETTER AU +102B..102C ; Myanmar # Mc [2] MYANMAR VOWEL SIGN TALL AA..MYANMAR VOWEL SIGN AA +102D..1030 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU +1031 ; Myanmar # Mc MYANMAR VOWEL SIGN E +1032..1037 ; Myanmar # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW +1038 ; Myanmar # Mc MYANMAR SIGN VISARGA +1039..103A ; Myanmar # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT +103B..103C ; Myanmar # Mc [2] MYANMAR CONSONANT SIGN MEDIAL YA..MYANMAR CONSONANT SIGN MEDIAL RA +103D..103E ; Myanmar # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA +103F ; Myanmar # Lo MYANMAR LETTER GREAT SA +1040..1049 ; Myanmar # Nd [10] MYANMAR DIGIT ZERO..MYANMAR DIGIT NINE +104A..104F ; Myanmar # Po [6] MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE +1050..1055 ; Myanmar # Lo [6] MYANMAR LETTER SHA..MYANMAR LETTER VOCALIC LL +1056..1057 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN VOCALIC R..MYANMAR VOWEL SIGN VOCALIC RR +1058..1059 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL +105A..105D ; Myanmar # Lo [4] MYANMAR LETTER MON NGA..MYANMAR LETTER MON BBE +105E..1060 ; Myanmar # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA +1061 ; Myanmar # Lo MYANMAR LETTER SGAW KAREN SHA +1062..1064 ; Myanmar # Mc [3] MYANMAR VOWEL SIGN SGAW KAREN EU..MYANMAR TONE MARK SGAW KAREN KE PHO +1065..1066 ; Myanmar # Lo [2] MYANMAR LETTER WESTERN PWO KAREN THA..MYANMAR LETTER WESTERN PWO KAREN PWA +1067..106D ; Myanmar # Mc [7] MYANMAR VOWEL SIGN WESTERN PWO KAREN EU..MYANMAR SIGN WESTERN PWO KAREN TONE-5 +106E..1070 ; Myanmar # Lo [3] MYANMAR LETTER EASTERN PWO KAREN NNA..MYANMAR LETTER EASTERN PWO KAREN GHWA +1071..1074 ; Myanmar # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE +1075..1081 ; Myanmar # Lo [13] MYANMAR LETTER SHAN KA..MYANMAR LETTER SHAN HA +1082 ; Myanmar # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA +1083..1084 ; Myanmar # Mc [2] MYANMAR VOWEL SIGN SHAN AA..MYANMAR VOWEL SIGN SHAN E +1085..1086 ; Myanmar # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y +1087..108C ; Myanmar # Mc [6] MYANMAR SIGN SHAN TONE-2..MYANMAR SIGN SHAN COUNCIL TONE-3 +108D ; Myanmar # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE +108E ; Myanmar # Lo MYANMAR LETTER RUMAI PALAUNG FA +108F ; Myanmar # Mc MYANMAR SIGN RUMAI PALAUNG TONE-5 +1090..1099 ; Myanmar # Nd [10] MYANMAR SHAN DIGIT ZERO..MYANMAR SHAN DIGIT NINE +109A..109C ; Myanmar # Mc [3] MYANMAR SIGN KHAMTI TONE-1..MYANMAR VOWEL SIGN AITON A +109D ; Myanmar # Mn MYANMAR VOWEL SIGN AITON AI +109E..109F ; Myanmar # So [2] MYANMAR SYMBOL SHAN ONE..MYANMAR SYMBOL SHAN EXCLAMATION +A9E0..A9E4 ; Myanmar # Lo [5] MYANMAR LETTER SHAN GHA..MYANMAR LETTER SHAN BHA +A9E5 ; Myanmar # Mn MYANMAR SIGN SHAN SAW +A9E6 ; Myanmar # Lm MYANMAR MODIFIER LETTER SHAN REDUPLICATION +A9E7..A9EF ; Myanmar # Lo [9] MYANMAR LETTER TAI LAING NYA..MYANMAR LETTER TAI LAING NNA +A9F0..A9F9 ; Myanmar # Nd [10] MYANMAR TAI LAING DIGIT ZERO..MYANMAR TAI LAING DIGIT NINE +A9FA..A9FE ; Myanmar # Lo [5] MYANMAR LETTER TAI LAING LLA..MYANMAR LETTER TAI LAING BHA +AA60..AA6F ; Myanmar # Lo [16] MYANMAR LETTER KHAMTI GA..MYANMAR LETTER KHAMTI FA +AA70 ; Myanmar # Lm MYANMAR MODIFIER LETTER KHAMTI REDUPLICATION +AA71..AA76 ; Myanmar # Lo [6] MYANMAR LETTER KHAMTI XA..MYANMAR LOGOGRAM KHAMTI HM +AA77..AA79 ; Myanmar # So [3] MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO +AA7A ; Myanmar # Lo MYANMAR LETTER AITON RA +AA7B ; Myanmar # Mc MYANMAR SIGN PAO KAREN TONE +AA7C ; Myanmar # Mn MYANMAR SIGN TAI LAING TONE-2 +AA7D ; Myanmar # Mc MYANMAR SIGN TAI LAING TONE-5 +AA7E..AA7F ; Myanmar # Lo [2] MYANMAR LETTER SHWE PALAUNG CHA..MYANMAR LETTER SHWE PALAUNG SHA + +# Total code points: 223 + +# ================================================ + +10A0..10C5 ; Georgian # L& [38] GEORGIAN CAPITAL LETTER AN..GEORGIAN CAPITAL LETTER HOE +10C7 ; Georgian # L& GEORGIAN CAPITAL LETTER YN +10CD ; Georgian # L& GEORGIAN CAPITAL LETTER AEN +10D0..10FA ; Georgian # L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN +10FC ; Georgian # Lm MODIFIER LETTER GEORGIAN NAR +10FD..10FF ; Georgian # L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN +1C90..1CBA ; Georgian # L& [43] GEORGIAN MTAVRULI CAPITAL LETTER AN..GEORGIAN MTAVRULI CAPITAL LETTER AIN +1CBD..1CBF ; Georgian # L& [3] GEORGIAN MTAVRULI CAPITAL LETTER AEN..GEORGIAN MTAVRULI CAPITAL LETTER LABIAL SIGN +2D00..2D25 ; Georgian # L& [38] GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE +2D27 ; Georgian # L& GEORGIAN SMALL LETTER YN +2D2D ; Georgian # L& GEORGIAN SMALL LETTER AEN + +# Total code points: 173 + +# ================================================ + +1100..11FF ; Hangul # Lo [256] HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN +302E..302F ; Hangul # Mc [2] HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK +3131..318E ; Hangul # Lo [94] HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE +3200..321E ; Hangul # So [31] PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU +3260..327E ; Hangul # So [31] CIRCLED HANGUL KIYEOK..CIRCLED HANGUL IEUNG U +A960..A97C ; Hangul # Lo [29] HANGUL CHOSEONG TIKEUT-MIEUM..HANGUL CHOSEONG SSANGYEORINHIEUH +AC00..D7A3 ; Hangul # Lo [11172] HANGUL SYLLABLE GA..HANGUL SYLLABLE HIH +D7B0..D7C6 ; Hangul # Lo [23] HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E +D7CB..D7FB ; Hangul # Lo [49] HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH +FFA0..FFBE ; Hangul # Lo [31] HALFWIDTH HANGUL FILLER..HALFWIDTH HANGUL LETTER HIEUH +FFC2..FFC7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E +FFCA..FFCF ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE +FFD2..FFD7 ; Hangul # Lo [6] HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU +FFDA..FFDC ; Hangul # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I + +# Total code points: 11739 + +# ================================================ + +1200..1248 ; Ethiopic # Lo [73] ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA +124A..124D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE +1250..1256 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO +1258 ; Ethiopic # Lo ETHIOPIC SYLLABLE QHWA +125A..125D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE +1260..1288 ; Ethiopic # Lo [41] ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA +128A..128D ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE +1290..12B0 ; Ethiopic # Lo [33] ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA +12B2..12B5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE +12B8..12BE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO +12C0 ; Ethiopic # Lo ETHIOPIC SYLLABLE KXWA +12C2..12C5 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE +12C8..12D6 ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O +12D8..1310 ; Ethiopic # Lo [57] ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA +1312..1315 ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE +1318..135A ; Ethiopic # Lo [67] ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA +135D..135F ; Ethiopic # Mn [3] ETHIOPIC COMBINING GEMINATION AND VOWEL LENGTH MARK..ETHIOPIC COMBINING GEMINATION MARK +1360..1368 ; Ethiopic # Po [9] ETHIOPIC SECTION MARK..ETHIOPIC PARAGRAPH SEPARATOR +1369..137C ; Ethiopic # No [20] ETHIOPIC DIGIT ONE..ETHIOPIC NUMBER TEN THOUSAND +1380..138F ; Ethiopic # Lo [16] ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE +1390..1399 ; Ethiopic # So [10] ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT +2D80..2D96 ; Ethiopic # Lo [23] ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE +2DA0..2DA6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO +2DA8..2DAE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO +2DB0..2DB6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO +2DB8..2DBE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO +2DC0..2DC6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO +2DC8..2DCE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO +2DD0..2DD6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO +2DD8..2DDE ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO +AB01..AB06 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE TTHU..ETHIOPIC SYLLABLE TTHO +AB09..AB0E ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DDHU..ETHIOPIC SYLLABLE DDHO +AB11..AB16 ; Ethiopic # Lo [6] ETHIOPIC SYLLABLE DZU..ETHIOPIC SYLLABLE DZO +AB20..AB26 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE CCHHA..ETHIOPIC SYLLABLE CCHHO +AB28..AB2E ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE BBA..ETHIOPIC SYLLABLE BBO +1E7E0..1E7E6 ; Ethiopic # Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO +1E7E8..1E7EB ; Ethiopic # Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE +1E7ED..1E7EE ; Ethiopic # Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE +1E7F0..1E7FE ; Ethiopic # Lo [15] ETHIOPIC SYLLABLE GURAGE QWI..ETHIOPIC SYLLABLE GURAGE PWEE + +# Total code points: 523 + +# ================================================ + +13A0..13F5 ; Cherokee # L& [86] CHEROKEE LETTER A..CHEROKEE LETTER MV +13F8..13FD ; Cherokee # L& [6] CHEROKEE SMALL LETTER YE..CHEROKEE SMALL LETTER MV +AB70..ABBF ; Cherokee # L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA + +# Total code points: 172 + +# ================================================ + +1400 ; Canadian_Aboriginal # Pd CANADIAN SYLLABICS HYPHEN +1401..166C ; Canadian_Aboriginal # Lo [620] CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA +166D ; Canadian_Aboriginal # So CANADIAN SYLLABICS CHI SIGN +166E ; Canadian_Aboriginal # Po CANADIAN SYLLABICS FULL STOP +166F..167F ; Canadian_Aboriginal # Lo [17] CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W +18B0..18F5 ; Canadian_Aboriginal # Lo [70] CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S +11AB0..11ABF ; Canadian_Aboriginal # Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA + +# Total code points: 726 + +# ================================================ + +1680 ; Ogham # Zs OGHAM SPACE MARK +1681..169A ; Ogham # Lo [26] OGHAM LETTER BEITH..OGHAM LETTER PEITH +169B ; Ogham # Ps OGHAM FEATHER MARK +169C ; Ogham # Pe OGHAM REVERSED FEATHER MARK + +# Total code points: 29 + +# ================================================ + +16A0..16EA ; Runic # Lo [75] RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X +16EE..16F0 ; Runic # Nl [3] RUNIC ARLAUG SYMBOL..RUNIC BELGTHOR SYMBOL +16F1..16F8 ; Runic # Lo [8] RUNIC LETTER K..RUNIC LETTER FRANKS CASKET AESC + +# Total code points: 86 + +# ================================================ + +1780..17B3 ; Khmer # Lo [52] KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU +17B4..17B5 ; Khmer # Mn [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA +17B6 ; Khmer # Mc KHMER VOWEL SIGN AA +17B7..17BD ; Khmer # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA +17BE..17C5 ; Khmer # Mc [8] KHMER VOWEL SIGN OE..KHMER VOWEL SIGN AU +17C6 ; Khmer # Mn KHMER SIGN NIKAHIT +17C7..17C8 ; Khmer # Mc [2] KHMER SIGN REAHMUK..KHMER SIGN YUUKALEAPINTU +17C9..17D3 ; Khmer # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT +17D4..17D6 ; Khmer # Po [3] KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH +17D7 ; Khmer # Lm KHMER SIGN LEK TOO +17D8..17DA ; Khmer # Po [3] KHMER SIGN BEYYAL..KHMER SIGN KOOMUUT +17DB ; Khmer # Sc KHMER CURRENCY SYMBOL RIEL +17DC ; Khmer # Lo KHMER SIGN AVAKRAHASANYA +17DD ; Khmer # Mn KHMER SIGN ATTHACAN +17E0..17E9 ; Khmer # Nd [10] KHMER DIGIT ZERO..KHMER DIGIT NINE +17F0..17F9 ; Khmer # No [10] KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON +19E0..19FF ; Khmer # So [32] KHMER SYMBOL PATHAMASAT..KHMER SYMBOL DAP-PRAM ROC + +# Total code points: 146 + +# ================================================ + +1800..1801 ; Mongolian # Po [2] MONGOLIAN BIRGA..MONGOLIAN ELLIPSIS +1804 ; Mongolian # Po MONGOLIAN COLON +1806 ; Mongolian # Pd MONGOLIAN TODO SOFT HYPHEN +1807..180A ; Mongolian # Po [4] MONGOLIAN SIBE SYLLABLE BOUNDARY MARKER..MONGOLIAN NIRUGU +180B..180D ; Mongolian # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE +180E ; Mongolian # Cf MONGOLIAN VOWEL SEPARATOR +180F ; Mongolian # Mn MONGOLIAN FREE VARIATION SELECTOR FOUR +1810..1819 ; Mongolian # Nd [10] MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE +1820..1842 ; Mongolian # Lo [35] MONGOLIAN LETTER A..MONGOLIAN LETTER CHI +1843 ; Mongolian # Lm MONGOLIAN LETTER TODO LONG VOWEL SIGN +1844..1878 ; Mongolian # Lo [53] MONGOLIAN LETTER TODO E..MONGOLIAN LETTER CHA WITH TWO DOTS +1880..1884 ; Mongolian # Lo [5] MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER ALI GALI INVERTED UBADAMA +1885..1886 ; Mongolian # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA +1887..18A8 ; Mongolian # Lo [34] MONGOLIAN LETTER ALI GALI A..MONGOLIAN LETTER MANCHU ALI GALI BHA +18A9 ; Mongolian # Mn MONGOLIAN LETTER ALI GALI DAGALGA +18AA ; Mongolian # Lo MONGOLIAN LETTER MANCHU ALI GALI LHA +11660..1166C ; Mongolian # Po [13] MONGOLIAN BIRGA WITH ORNAMENT..MONGOLIAN TURNED SWIRL BIRGA WITH DOUBLE ORNAMENT + +# Total code points: 168 + +# ================================================ + +3041..3096 ; Hiragana # Lo [86] HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE +309D..309E ; Hiragana # Lm [2] HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK +309F ; Hiragana # Lo HIRAGANA DIGRAPH YORI +1B001..1B11F ; Hiragana # Lo [287] HIRAGANA LETTER ARCHAIC YE..HIRAGANA LETTER ARCHAIC WU +1B150..1B152 ; Hiragana # Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO +1F200 ; Hiragana # So SQUARE HIRAGANA HOKA + +# Total code points: 380 + +# ================================================ + +30A1..30FA ; Katakana # Lo [90] KATAKANA LETTER SMALL A..KATAKANA LETTER VO +30FD..30FE ; Katakana # Lm [2] KATAKANA ITERATION MARK..KATAKANA VOICED ITERATION MARK +30FF ; Katakana # Lo KATAKANA DIGRAPH KOTO +31F0..31FF ; Katakana # Lo [16] KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO +32D0..32FE ; Katakana # So [47] CIRCLED KATAKANA A..CIRCLED KATAKANA WO +3300..3357 ; Katakana # So [88] SQUARE APAATO..SQUARE WATTO +FF66..FF6F ; Katakana # Lo [10] HALFWIDTH KATAKANA LETTER WO..HALFWIDTH KATAKANA LETTER SMALL TU +FF71..FF9D ; Katakana # Lo [45] HALFWIDTH KATAKANA LETTER A..HALFWIDTH KATAKANA LETTER N +1AFF0..1AFF3 ; Katakana # Lm [4] KATAKANA LETTER MINNAN TONE-2..KATAKANA LETTER MINNAN TONE-5 +1AFF5..1AFFB ; Katakana # Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 +1AFFD..1AFFE ; Katakana # Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 +1B000 ; Katakana # Lo KATAKANA LETTER ARCHAIC E +1B120..1B122 ; Katakana # Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU +1B164..1B167 ; Katakana # Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N + +# Total code points: 320 + +# ================================================ + +02EA..02EB ; Bopomofo # Sk [2] MODIFIER LETTER YIN DEPARTING TONE MARK..MODIFIER LETTER YANG DEPARTING TONE MARK +3105..312F ; Bopomofo # Lo [43] BOPOMOFO LETTER B..BOPOMOFO LETTER NN +31A0..31BF ; Bopomofo # Lo [32] BOPOMOFO LETTER BU..BOPOMOFO LETTER AH + +# Total code points: 77 + +# ================================================ + +2E80..2E99 ; Han # So [26] CJK RADICAL REPEAT..CJK RADICAL RAP +2E9B..2EF3 ; Han # So [89] CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE +2F00..2FD5 ; Han # So [214] KANGXI RADICAL ONE..KANGXI RADICAL FLUTE +3005 ; Han # Lm IDEOGRAPHIC ITERATION MARK +3007 ; Han # Nl IDEOGRAPHIC NUMBER ZERO +3021..3029 ; Han # Nl [9] HANGZHOU NUMERAL ONE..HANGZHOU NUMERAL NINE +3038..303A ; Han # Nl [3] HANGZHOU NUMERAL TEN..HANGZHOU NUMERAL THIRTY +303B ; Han # Lm VERTICAL IDEOGRAPHIC ITERATION MARK +3400..4DBF ; Han # Lo [6592] CJK UNIFIED IDEOGRAPH-3400..CJK UNIFIED IDEOGRAPH-4DBF +4E00..9FFF ; Han # Lo [20992] CJK UNIFIED IDEOGRAPH-4E00..CJK UNIFIED IDEOGRAPH-9FFF +F900..FA6D ; Han # Lo [366] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA6D +FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9 +16FE2 ; Han # Po OLD CHINESE HOOK MARK +16FE3 ; Han # Lm OLD CHINESE ITERATION MARK +16FF0..16FF1 ; Han # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY +20000..2A6DF ; Han # Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF +2A700..2B738 ; Han # Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 +2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 +2CEB0..2EBE0 ; Han # Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 +2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D +30000..3134A ; Han # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A + +# Total code points: 94215 + +# ================================================ + +A000..A014 ; Yi # Lo [21] YI SYLLABLE IT..YI SYLLABLE E +A015 ; Yi # Lm YI SYLLABLE WU +A016..A48C ; Yi # Lo [1143] YI SYLLABLE BIT..YI SYLLABLE YYR +A490..A4C6 ; Yi # So [55] YI RADICAL QOT..YI RADICAL KE + +# Total code points: 1220 + +# ================================================ + +10300..1031F ; Old_Italic # Lo [32] OLD ITALIC LETTER A..OLD ITALIC LETTER ESS +10320..10323 ; Old_Italic # No [4] OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY +1032D..1032F ; Old_Italic # Lo [3] OLD ITALIC LETTER YE..OLD ITALIC LETTER SOUTHERN TSE + +# Total code points: 39 + +# ================================================ + +10330..10340 ; Gothic # Lo [17] GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA +10341 ; Gothic # Nl GOTHIC LETTER NINETY +10342..10349 ; Gothic # Lo [8] GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL +1034A ; Gothic # Nl GOTHIC LETTER NINE HUNDRED + +# Total code points: 27 + +# ================================================ + +10400..1044F ; Deseret # L& [80] DESERET CAPITAL LETTER LONG I..DESERET SMALL LETTER EW + +# Total code points: 80 + +# ================================================ + +0300..036F ; Inherited # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X +0485..0486 ; Inherited # Mn [2] COMBINING CYRILLIC DASIA PNEUMATA..COMBINING CYRILLIC PSILI PNEUMATA +064B..0655 ; Inherited # Mn [11] ARABIC FATHATAN..ARABIC HAMZA BELOW +0670 ; Inherited # Mn ARABIC LETTER SUPERSCRIPT ALEF +0951..0954 ; Inherited # Mn [4] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI ACUTE ACCENT +1AB0..1ABD ; Inherited # Mn [14] COMBINING DOUBLED CIRCUMFLEX ACCENT..COMBINING PARENTHESES BELOW +1ABE ; Inherited # Me COMBINING PARENTHESES OVERLAY +1ABF..1ACE ; Inherited # Mn [16] COMBINING LATIN SMALL LETTER W BELOW..COMBINING LATIN SMALL LETTER INSULAR T +1CD0..1CD2 ; Inherited # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA +1CD4..1CE0 ; Inherited # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA +1CE2..1CE8 ; Inherited # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL +1CED ; Inherited # Mn VEDIC SIGN TIRYAK +1CF4 ; Inherited # Mn VEDIC TONE CANDRA ABOVE +1CF8..1CF9 ; Inherited # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE +1DC0..1DFF ; Inherited # Mn [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW +200C..200D ; Inherited # Cf [2] ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER +20D0..20DC ; Inherited # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE +20DD..20E0 ; Inherited # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH +20E1 ; Inherited # Mn COMBINING LEFT RIGHT ARROW ABOVE +20E2..20E4 ; Inherited # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE +20E5..20F0 ; Inherited # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE +302A..302D ; Inherited # Mn [4] IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK +3099..309A ; Inherited # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK +FE00..FE0F ; Inherited # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 +FE20..FE2D ; Inherited # Mn [14] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON BELOW +101FD ; Inherited # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE +102E0 ; Inherited # Mn COPTIC EPACT THOUSANDS MARK +1133B ; Inherited # Mn COMBINING BINDU BELOW +1CF00..1CF2D ; Inherited # Mn [46] ZNAMENNY COMBINING MARK GORAZDO NIZKO S KRYZHEM ON LEFT..ZNAMENNY COMBINING MARK KRYZH ON LEFT +1CF30..1CF46 ; Inherited # Mn [23] ZNAMENNY COMBINING TONAL RANGE MARK MRACHNO..ZNAMENNY PRIZNAK MODIFIER ROG +1D167..1D169 ; Inherited # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 +1D17B..1D182 ; Inherited # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE +1D185..1D18B ; Inherited # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE +1D1AA..1D1AD ; Inherited # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +E0100..E01EF ; Inherited # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + +# Total code points: 657 + +# ================================================ + +1700..1711 ; Tagalog # Lo [18] TAGALOG LETTER A..TAGALOG LETTER HA +1712..1714 ; Tagalog # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA +1715 ; Tagalog # Mc TAGALOG SIGN PAMUDPOD +171F ; Tagalog # Lo TAGALOG LETTER ARCHAIC RA + +# Total code points: 23 + +# ================================================ + +1720..1731 ; Hanunoo # Lo [18] HANUNOO LETTER A..HANUNOO LETTER HA +1732..1733 ; Hanunoo # Mn [2] HANUNOO VOWEL SIGN I..HANUNOO VOWEL SIGN U +1734 ; Hanunoo # Mc HANUNOO SIGN PAMUDPOD + +# Total code points: 21 + +# ================================================ + +1740..1751 ; Buhid # Lo [18] BUHID LETTER A..BUHID LETTER HA +1752..1753 ; Buhid # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U + +# Total code points: 20 + +# ================================================ + +1760..176C ; Tagbanwa # Lo [13] TAGBANWA LETTER A..TAGBANWA LETTER YA +176E..1770 ; Tagbanwa # Lo [3] TAGBANWA LETTER LA..TAGBANWA LETTER SA +1772..1773 ; Tagbanwa # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U + +# Total code points: 18 + +# ================================================ + +1900..191E ; Limbu # Lo [31] LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER TRA +1920..1922 ; Limbu # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U +1923..1926 ; Limbu # Mc [4] LIMBU VOWEL SIGN EE..LIMBU VOWEL SIGN AU +1927..1928 ; Limbu # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O +1929..192B ; Limbu # Mc [3] LIMBU SUBJOINED LETTER YA..LIMBU SUBJOINED LETTER WA +1930..1931 ; Limbu # Mc [2] LIMBU SMALL LETTER KA..LIMBU SMALL LETTER NGA +1932 ; Limbu # Mn LIMBU SMALL LETTER ANUSVARA +1933..1938 ; Limbu # Mc [6] LIMBU SMALL LETTER TA..LIMBU SMALL LETTER LA +1939..193B ; Limbu # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I +1940 ; Limbu # So LIMBU SIGN LOO +1944..1945 ; Limbu # Po [2] LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK +1946..194F ; Limbu # Nd [10] LIMBU DIGIT ZERO..LIMBU DIGIT NINE + +# Total code points: 68 + +# ================================================ + +1950..196D ; Tai_Le # Lo [30] TAI LE LETTER KA..TAI LE LETTER AI +1970..1974 ; Tai_Le # Lo [5] TAI LE LETTER TONE-2..TAI LE LETTER TONE-6 + +# Total code points: 35 + +# ================================================ + +10000..1000B ; Linear_B # Lo [12] LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE +1000D..10026 ; Linear_B # Lo [26] LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO +10028..1003A ; Linear_B # Lo [19] LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO +1003C..1003D ; Linear_B # Lo [2] LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE +1003F..1004D ; Linear_B # Lo [15] LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO +10050..1005D ; Linear_B # Lo [14] LINEAR B SYMBOL B018..LINEAR B SYMBOL B089 +10080..100FA ; Linear_B # Lo [123] LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305 + +# Total code points: 211 + +# ================================================ + +10380..1039D ; Ugaritic # Lo [30] UGARITIC LETTER ALPA..UGARITIC LETTER SSU +1039F ; Ugaritic # Po UGARITIC WORD DIVIDER + +# Total code points: 31 + +# ================================================ + +10450..1047F ; Shavian # Lo [48] SHAVIAN LETTER PEEP..SHAVIAN LETTER YEW + +# Total code points: 48 + +# ================================================ + +10480..1049D ; Osmanya # Lo [30] OSMANYA LETTER ALEF..OSMANYA LETTER OO +104A0..104A9 ; Osmanya # Nd [10] OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE + +# Total code points: 40 + +# ================================================ + +10800..10805 ; Cypriot # Lo [6] CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA +10808 ; Cypriot # Lo CYPRIOT SYLLABLE JO +1080A..10835 ; Cypriot # Lo [44] CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO +10837..10838 ; Cypriot # Lo [2] CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE +1083C ; Cypriot # Lo CYPRIOT SYLLABLE ZA +1083F ; Cypriot # Lo CYPRIOT SYLLABLE ZO + +# Total code points: 55 + +# ================================================ + +2800..28FF ; Braille # So [256] BRAILLE PATTERN BLANK..BRAILLE PATTERN DOTS-12345678 + +# Total code points: 256 + +# ================================================ + +1A00..1A16 ; Buginese # Lo [23] BUGINESE LETTER KA..BUGINESE LETTER HA +1A17..1A18 ; Buginese # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U +1A19..1A1A ; Buginese # Mc [2] BUGINESE VOWEL SIGN E..BUGINESE VOWEL SIGN O +1A1B ; Buginese # Mn BUGINESE VOWEL SIGN AE +1A1E..1A1F ; Buginese # Po [2] BUGINESE PALLAWA..BUGINESE END OF SECTION + +# Total code points: 30 + +# ================================================ + +03E2..03EF ; Coptic # L& [14] COPTIC CAPITAL LETTER SHEI..COPTIC SMALL LETTER DEI +2C80..2CE4 ; Coptic # L& [101] COPTIC CAPITAL LETTER ALFA..COPTIC SYMBOL KAI +2CE5..2CEA ; Coptic # So [6] COPTIC SYMBOL MI RO..COPTIC SYMBOL SHIMA SIMA +2CEB..2CEE ; Coptic # L& [4] COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI..COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA +2CEF..2CF1 ; Coptic # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS +2CF2..2CF3 ; Coptic # L& [2] COPTIC CAPITAL LETTER BOHAIRIC KHEI..COPTIC SMALL LETTER BOHAIRIC KHEI +2CF9..2CFC ; Coptic # Po [4] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN VERSE DIVIDER +2CFD ; Coptic # No COPTIC FRACTION ONE HALF +2CFE..2CFF ; Coptic # Po [2] COPTIC FULL STOP..COPTIC MORPHOLOGICAL DIVIDER + +# Total code points: 137 + +# ================================================ + +1980..19AB ; New_Tai_Lue # Lo [44] NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA +19B0..19C9 ; New_Tai_Lue # Lo [26] NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2 +19D0..19D9 ; New_Tai_Lue # Nd [10] NEW TAI LUE DIGIT ZERO..NEW TAI LUE DIGIT NINE +19DA ; New_Tai_Lue # No NEW TAI LUE THAM DIGIT ONE +19DE..19DF ; New_Tai_Lue # So [2] NEW TAI LUE SIGN LAE..NEW TAI LUE SIGN LAEV + +# Total code points: 83 + +# ================================================ + +2C00..2C5F ; Glagolitic # L& [96] GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC SMALL LETTER CAUDATE CHRIVI +1E000..1E006 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE +1E008..1E018 ; Glagolitic # Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU +1E01B..1E021 ; Glagolitic # Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI +1E023..1E024 ; Glagolitic # Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS +1E026..1E02A ; Glagolitic # Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA + +# Total code points: 134 + +# ================================================ + +2D30..2D67 ; Tifinagh # Lo [56] TIFINAGH LETTER YA..TIFINAGH LETTER YO +2D6F ; Tifinagh # Lm TIFINAGH MODIFIER LETTER LABIALIZATION MARK +2D70 ; Tifinagh # Po TIFINAGH SEPARATOR MARK +2D7F ; Tifinagh # Mn TIFINAGH CONSONANT JOINER + +# Total code points: 59 + +# ================================================ + +A800..A801 ; Syloti_Nagri # Lo [2] SYLOTI NAGRI LETTER A..SYLOTI NAGRI LETTER I +A802 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN DVISVARA +A803..A805 ; Syloti_Nagri # Lo [3] SYLOTI NAGRI LETTER U..SYLOTI NAGRI LETTER O +A806 ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN HASANTA +A807..A80A ; Syloti_Nagri # Lo [4] SYLOTI NAGRI LETTER KO..SYLOTI NAGRI LETTER GHO +A80B ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ANUSVARA +A80C..A822 ; Syloti_Nagri # Lo [23] SYLOTI NAGRI LETTER CO..SYLOTI NAGRI LETTER HO +A823..A824 ; Syloti_Nagri # Mc [2] SYLOTI NAGRI VOWEL SIGN A..SYLOTI NAGRI VOWEL SIGN I +A825..A826 ; Syloti_Nagri # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E +A827 ; Syloti_Nagri # Mc SYLOTI NAGRI VOWEL SIGN OO +A828..A82B ; Syloti_Nagri # So [4] SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4 +A82C ; Syloti_Nagri # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA + +# Total code points: 45 + +# ================================================ + +103A0..103C3 ; Old_Persian # Lo [36] OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA +103C8..103CF ; Old_Persian # Lo [8] OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH +103D0 ; Old_Persian # Po OLD PERSIAN WORD DIVIDER +103D1..103D5 ; Old_Persian # Nl [5] OLD PERSIAN NUMBER ONE..OLD PERSIAN NUMBER HUNDRED + +# Total code points: 50 + +# ================================================ + +10A00 ; Kharoshthi # Lo KHAROSHTHI LETTER A +10A01..10A03 ; Kharoshthi # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R +10A05..10A06 ; Kharoshthi # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O +10A0C..10A0F ; Kharoshthi # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA +10A10..10A13 ; Kharoshthi # Lo [4] KHAROSHTHI LETTER KA..KHAROSHTHI LETTER GHA +10A15..10A17 ; Kharoshthi # Lo [3] KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA +10A19..10A35 ; Kharoshthi # Lo [29] KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER VHA +10A38..10A3A ; Kharoshthi # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW +10A3F ; Kharoshthi # Mn KHAROSHTHI VIRAMA +10A40..10A48 ; Kharoshthi # No [9] KHAROSHTHI DIGIT ONE..KHAROSHTHI FRACTION ONE HALF +10A50..10A58 ; Kharoshthi # Po [9] KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES + +# Total code points: 68 + +# ================================================ + +1B00..1B03 ; Balinese # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG +1B04 ; Balinese # Mc BALINESE SIGN BISAH +1B05..1B33 ; Balinese # Lo [47] BALINESE LETTER AKARA..BALINESE LETTER HA +1B34 ; Balinese # Mn BALINESE SIGN REREKAN +1B35 ; Balinese # Mc BALINESE VOWEL SIGN TEDUNG +1B36..1B3A ; Balinese # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA +1B3B ; Balinese # Mc BALINESE VOWEL SIGN RA REPA TEDUNG +1B3C ; Balinese # Mn BALINESE VOWEL SIGN LA LENGA +1B3D..1B41 ; Balinese # Mc [5] BALINESE VOWEL SIGN LA LENGA TEDUNG..BALINESE VOWEL SIGN TALING REPA TEDUNG +1B42 ; Balinese # Mn BALINESE VOWEL SIGN PEPET +1B43..1B44 ; Balinese # Mc [2] BALINESE VOWEL SIGN PEPET TEDUNG..BALINESE ADEG ADEG +1B45..1B4C ; Balinese # Lo [8] BALINESE LETTER KAF SASAK..BALINESE LETTER ARCHAIC JNYA +1B50..1B59 ; Balinese # Nd [10] BALINESE DIGIT ZERO..BALINESE DIGIT NINE +1B5A..1B60 ; Balinese # Po [7] BALINESE PANTI..BALINESE PAMENENG +1B61..1B6A ; Balinese # So [10] BALINESE MUSICAL SYMBOL DONG..BALINESE MUSICAL SYMBOL DANG GEDE +1B6B..1B73 ; Balinese # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG +1B74..1B7C ; Balinese # So [9] BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING +1B7D..1B7E ; Balinese # Po [2] BALINESE PANTI LANTANG..BALINESE PAMADA LANTANG + +# Total code points: 124 + +# ================================================ + +12000..12399 ; Cuneiform # Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U +12400..1246E ; Cuneiform # Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM +12470..12474 ; Cuneiform # Po [5] CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL QUADCOLON +12480..12543 ; Cuneiform # Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU + +# Total code points: 1234 + +# ================================================ + +10900..10915 ; Phoenician # Lo [22] PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU +10916..1091B ; Phoenician # No [6] PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE +1091F ; Phoenician # Po PHOENICIAN WORD SEPARATOR + +# Total code points: 29 + +# ================================================ + +A840..A873 ; Phags_Pa # Lo [52] PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU +A874..A877 ; Phags_Pa # Po [4] PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD + +# Total code points: 56 + +# ================================================ + +07C0..07C9 ; Nko # Nd [10] NKO DIGIT ZERO..NKO DIGIT NINE +07CA..07EA ; Nko # Lo [33] NKO LETTER A..NKO LETTER JONA RA +07EB..07F3 ; Nko # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE +07F4..07F5 ; Nko # Lm [2] NKO HIGH TONE APOSTROPHE..NKO LOW TONE APOSTROPHE +07F6 ; Nko # So NKO SYMBOL OO DENNEN +07F7..07F9 ; Nko # Po [3] NKO SYMBOL GBAKURUNEN..NKO EXCLAMATION MARK +07FA ; Nko # Lm NKO LAJANYALAN +07FD ; Nko # Mn NKO DANTAYALAN +07FE..07FF ; Nko # Sc [2] NKO DOROME SIGN..NKO TAMAN SIGN + +# Total code points: 62 + +# ================================================ + +1B80..1B81 ; Sundanese # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR +1B82 ; Sundanese # Mc SUNDANESE SIGN PANGWISAD +1B83..1BA0 ; Sundanese # Lo [30] SUNDANESE LETTER A..SUNDANESE LETTER HA +1BA1 ; Sundanese # Mc SUNDANESE CONSONANT SIGN PAMINGKAL +1BA2..1BA5 ; Sundanese # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU +1BA6..1BA7 ; Sundanese # Mc [2] SUNDANESE VOWEL SIGN PANAELAENG..SUNDANESE VOWEL SIGN PANOLONG +1BA8..1BA9 ; Sundanese # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG +1BAA ; Sundanese # Mc SUNDANESE SIGN PAMAAEH +1BAB..1BAD ; Sundanese # Mn [3] SUNDANESE SIGN VIRAMA..SUNDANESE CONSONANT SIGN PASANGAN WA +1BAE..1BAF ; Sundanese # Lo [2] SUNDANESE LETTER KHA..SUNDANESE LETTER SYA +1BB0..1BB9 ; Sundanese # Nd [10] SUNDANESE DIGIT ZERO..SUNDANESE DIGIT NINE +1BBA..1BBF ; Sundanese # Lo [6] SUNDANESE AVAGRAHA..SUNDANESE LETTER FINAL M +1CC0..1CC7 ; Sundanese # Po [8] SUNDANESE PUNCTUATION BINDU SURYA..SUNDANESE PUNCTUATION BINDU BA SATANGA + +# Total code points: 72 + +# ================================================ + +1C00..1C23 ; Lepcha # Lo [36] LEPCHA LETTER KA..LEPCHA LETTER A +1C24..1C2B ; Lepcha # Mc [8] LEPCHA SUBJOINED LETTER YA..LEPCHA VOWEL SIGN UU +1C2C..1C33 ; Lepcha # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T +1C34..1C35 ; Lepcha # Mc [2] LEPCHA CONSONANT SIGN NYIN-DO..LEPCHA CONSONANT SIGN KANG +1C36..1C37 ; Lepcha # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA +1C3B..1C3F ; Lepcha # Po [5] LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK +1C40..1C49 ; Lepcha # Nd [10] LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE +1C4D..1C4F ; Lepcha # Lo [3] LEPCHA LETTER TTA..LEPCHA LETTER DDA + +# Total code points: 74 + +# ================================================ + +1C50..1C59 ; Ol_Chiki # Nd [10] OL CHIKI DIGIT ZERO..OL CHIKI DIGIT NINE +1C5A..1C77 ; Ol_Chiki # Lo [30] OL CHIKI LETTER LA..OL CHIKI LETTER OH +1C78..1C7D ; Ol_Chiki # Lm [6] OL CHIKI MU TTUDDAG..OL CHIKI AHAD +1C7E..1C7F ; Ol_Chiki # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD + +# Total code points: 48 + +# ================================================ + +A500..A60B ; Vai # Lo [268] VAI SYLLABLE EE..VAI SYLLABLE NG +A60C ; Vai # Lm VAI SYLLABLE LENGTHENER +A60D..A60F ; Vai # Po [3] VAI COMMA..VAI QUESTION MARK +A610..A61F ; Vai # Lo [16] VAI SYLLABLE NDOLE FA..VAI SYMBOL JONG +A620..A629 ; Vai # Nd [10] VAI DIGIT ZERO..VAI DIGIT NINE +A62A..A62B ; Vai # Lo [2] VAI SYLLABLE NDOLE MA..VAI SYLLABLE NDOLE DO + +# Total code points: 300 + +# ================================================ + +A880..A881 ; Saurashtra # Mc [2] SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VISARGA +A882..A8B3 ; Saurashtra # Lo [50] SAURASHTRA LETTER A..SAURASHTRA LETTER LLA +A8B4..A8C3 ; Saurashtra # Mc [16] SAURASHTRA CONSONANT SIGN HAARU..SAURASHTRA VOWEL SIGN AU +A8C4..A8C5 ; Saurashtra # Mn [2] SAURASHTRA SIGN VIRAMA..SAURASHTRA SIGN CANDRABINDU +A8CE..A8CF ; Saurashtra # Po [2] SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA +A8D0..A8D9 ; Saurashtra # Nd [10] SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE + +# Total code points: 82 + +# ================================================ + +A900..A909 ; Kayah_Li # Nd [10] KAYAH LI DIGIT ZERO..KAYAH LI DIGIT NINE +A90A..A925 ; Kayah_Li # Lo [28] KAYAH LI LETTER KA..KAYAH LI LETTER OO +A926..A92D ; Kayah_Li # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU +A92F ; Kayah_Li # Po KAYAH LI SIGN SHYA + +# Total code points: 47 + +# ================================================ + +A930..A946 ; Rejang # Lo [23] REJANG LETTER KA..REJANG LETTER A +A947..A951 ; Rejang # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R +A952..A953 ; Rejang # Mc [2] REJANG CONSONANT SIGN H..REJANG VIRAMA +A95F ; Rejang # Po REJANG SECTION MARK + +# Total code points: 37 + +# ================================================ + +10280..1029C ; Lycian # Lo [29] LYCIAN LETTER A..LYCIAN LETTER X + +# Total code points: 29 + +# ================================================ + +102A0..102D0 ; Carian # Lo [49] CARIAN LETTER A..CARIAN LETTER UUU3 + +# Total code points: 49 + +# ================================================ + +10920..10939 ; Lydian # Lo [26] LYDIAN LETTER A..LYDIAN LETTER C +1093F ; Lydian # Po LYDIAN TRIANGULAR MARK + +# Total code points: 27 + +# ================================================ + +AA00..AA28 ; Cham # Lo [41] CHAM LETTER A..CHAM LETTER HA +AA29..AA2E ; Cham # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE +AA2F..AA30 ; Cham # Mc [2] CHAM VOWEL SIGN O..CHAM VOWEL SIGN AI +AA31..AA32 ; Cham # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE +AA33..AA34 ; Cham # Mc [2] CHAM CONSONANT SIGN YA..CHAM CONSONANT SIGN RA +AA35..AA36 ; Cham # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA +AA40..AA42 ; Cham # Lo [3] CHAM LETTER FINAL K..CHAM LETTER FINAL NG +AA43 ; Cham # Mn CHAM CONSONANT SIGN FINAL NG +AA44..AA4B ; Cham # Lo [8] CHAM LETTER FINAL CH..CHAM LETTER FINAL SS +AA4C ; Cham # Mn CHAM CONSONANT SIGN FINAL M +AA4D ; Cham # Mc CHAM CONSONANT SIGN FINAL H +AA50..AA59 ; Cham # Nd [10] CHAM DIGIT ZERO..CHAM DIGIT NINE +AA5C..AA5F ; Cham # Po [4] CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA + +# Total code points: 83 + +# ================================================ + +1A20..1A54 ; Tai_Tham # Lo [53] TAI THAM LETTER HIGH KA..TAI THAM LETTER GREAT SA +1A55 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN MEDIAL RA +1A56 ; Tai_Tham # Mn TAI THAM CONSONANT SIGN MEDIAL LA +1A57 ; Tai_Tham # Mc TAI THAM CONSONANT SIGN LA TANG LAI +1A58..1A5E ; Tai_Tham # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA +1A60 ; Tai_Tham # Mn TAI THAM SIGN SAKOT +1A61 ; Tai_Tham # Mc TAI THAM VOWEL SIGN A +1A62 ; Tai_Tham # Mn TAI THAM VOWEL SIGN MAI SAT +1A63..1A64 ; Tai_Tham # Mc [2] TAI THAM VOWEL SIGN AA..TAI THAM VOWEL SIGN TALL AA +1A65..1A6C ; Tai_Tham # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW +1A6D..1A72 ; Tai_Tham # Mc [6] TAI THAM VOWEL SIGN OY..TAI THAM VOWEL SIGN THAM AI +1A73..1A7C ; Tai_Tham # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN +1A7F ; Tai_Tham # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT +1A80..1A89 ; Tai_Tham # Nd [10] TAI THAM HORA DIGIT ZERO..TAI THAM HORA DIGIT NINE +1A90..1A99 ; Tai_Tham # Nd [10] TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE +1AA0..1AA6 ; Tai_Tham # Po [7] TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA +1AA7 ; Tai_Tham # Lm TAI THAM SIGN MAI YAMOK +1AA8..1AAD ; Tai_Tham # Po [6] TAI THAM SIGN KAAN..TAI THAM SIGN CAANG + +# Total code points: 127 + +# ================================================ + +AA80..AAAF ; Tai_Viet # Lo [48] TAI VIET LETTER LOW KO..TAI VIET LETTER HIGH O +AAB0 ; Tai_Viet # Mn TAI VIET MAI KANG +AAB1 ; Tai_Viet # Lo TAI VIET VOWEL AA +AAB2..AAB4 ; Tai_Viet # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U +AAB5..AAB6 ; Tai_Viet # Lo [2] TAI VIET VOWEL E..TAI VIET VOWEL O +AAB7..AAB8 ; Tai_Viet # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA +AAB9..AABD ; Tai_Viet # Lo [5] TAI VIET VOWEL UEA..TAI VIET VOWEL AN +AABE..AABF ; Tai_Viet # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK +AAC0 ; Tai_Viet # Lo TAI VIET TONE MAI NUENG +AAC1 ; Tai_Viet # Mn TAI VIET TONE MAI THO +AAC2 ; Tai_Viet # Lo TAI VIET TONE MAI SONG +AADB..AADC ; Tai_Viet # Lo [2] TAI VIET SYMBOL KON..TAI VIET SYMBOL NUENG +AADD ; Tai_Viet # Lm TAI VIET SYMBOL SAM +AADE..AADF ; Tai_Viet # Po [2] TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI + +# Total code points: 72 + +# ================================================ + +10B00..10B35 ; Avestan # Lo [54] AVESTAN LETTER A..AVESTAN LETTER HE +10B39..10B3F ; Avestan # Po [7] AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION + +# Total code points: 61 + +# ================================================ + +13000..1342E ; Egyptian_Hieroglyphs # Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 +13430..13438 ; Egyptian_Hieroglyphs # Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT + +# Total code points: 1080 + +# ================================================ + +0800..0815 ; Samaritan # Lo [22] SAMARITAN LETTER ALAF..SAMARITAN LETTER TAAF +0816..0819 ; Samaritan # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH +081A ; Samaritan # Lm SAMARITAN MODIFIER LETTER EPENTHETIC YUT +081B..0823 ; Samaritan # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A +0824 ; Samaritan # Lm SAMARITAN MODIFIER LETTER SHORT A +0825..0827 ; Samaritan # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U +0828 ; Samaritan # Lm SAMARITAN MODIFIER LETTER I +0829..082D ; Samaritan # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA +0830..083E ; Samaritan # Po [15] SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU + +# Total code points: 61 + +# ================================================ + +A4D0..A4F7 ; Lisu # Lo [40] LISU LETTER BA..LISU LETTER OE +A4F8..A4FD ; Lisu # Lm [6] LISU LETTER TONE MYA TI..LISU LETTER TONE MYA JEU +A4FE..A4FF ; Lisu # Po [2] LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP +11FB0 ; Lisu # Lo LISU LETTER YHA + +# Total code points: 49 + +# ================================================ + +A6A0..A6E5 ; Bamum # Lo [70] BAMUM LETTER A..BAMUM LETTER KI +A6E6..A6EF ; Bamum # Nl [10] BAMUM LETTER MO..BAMUM LETTER KOGHOM +A6F0..A6F1 ; Bamum # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS +A6F2..A6F7 ; Bamum # Po [6] BAMUM NJAEMLI..BAMUM QUESTION MARK +16800..16A38 ; Bamum # Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ + +# Total code points: 657 + +# ================================================ + +A980..A982 ; Javanese # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR +A983 ; Javanese # Mc JAVANESE SIGN WIGNYAN +A984..A9B2 ; Javanese # Lo [47] JAVANESE LETTER A..JAVANESE LETTER HA +A9B3 ; Javanese # Mn JAVANESE SIGN CECAK TELU +A9B4..A9B5 ; Javanese # Mc [2] JAVANESE VOWEL SIGN TARUNG..JAVANESE VOWEL SIGN TOLONG +A9B6..A9B9 ; Javanese # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT +A9BA..A9BB ; Javanese # Mc [2] JAVANESE VOWEL SIGN TALING..JAVANESE VOWEL SIGN DIRGA MURE +A9BC..A9BD ; Javanese # Mn [2] JAVANESE VOWEL SIGN PEPET..JAVANESE CONSONANT SIGN KERET +A9BE..A9C0 ; Javanese # Mc [3] JAVANESE CONSONANT SIGN PENGKAL..JAVANESE PANGKON +A9C1..A9CD ; Javanese # Po [13] JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH +A9D0..A9D9 ; Javanese # Nd [10] JAVANESE DIGIT ZERO..JAVANESE DIGIT NINE +A9DE..A9DF ; Javanese # Po [2] JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN + +# Total code points: 90 + +# ================================================ + +AAE0..AAEA ; Meetei_Mayek # Lo [11] MEETEI MAYEK LETTER E..MEETEI MAYEK LETTER SSA +AAEB ; Meetei_Mayek # Mc MEETEI MAYEK VOWEL SIGN II +AAEC..AAED ; Meetei_Mayek # Mn [2] MEETEI MAYEK VOWEL SIGN UU..MEETEI MAYEK VOWEL SIGN AAI +AAEE..AAEF ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN AU..MEETEI MAYEK VOWEL SIGN AAU +AAF0..AAF1 ; Meetei_Mayek # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM +AAF2 ; Meetei_Mayek # Lo MEETEI MAYEK ANJI +AAF3..AAF4 ; Meetei_Mayek # Lm [2] MEETEI MAYEK SYLLABLE REPETITION MARK..MEETEI MAYEK WORD REPETITION MARK +AAF5 ; Meetei_Mayek # Mc MEETEI MAYEK VOWEL SIGN VISARGA +AAF6 ; Meetei_Mayek # Mn MEETEI MAYEK VIRAMA +ABC0..ABE2 ; Meetei_Mayek # Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM +ABE3..ABE4 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP +ABE5 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN ANAP +ABE6..ABE7 ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN YENAP..MEETEI MAYEK VOWEL SIGN SOUNAP +ABE8 ; Meetei_Mayek # Mn MEETEI MAYEK VOWEL SIGN UNAP +ABE9..ABEA ; Meetei_Mayek # Mc [2] MEETEI MAYEK VOWEL SIGN CHEINAP..MEETEI MAYEK VOWEL SIGN NUNG +ABEB ; Meetei_Mayek # Po MEETEI MAYEK CHEIKHEI +ABEC ; Meetei_Mayek # Mc MEETEI MAYEK LUM IYEK +ABED ; Meetei_Mayek # Mn MEETEI MAYEK APUN IYEK +ABF0..ABF9 ; Meetei_Mayek # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE + +# Total code points: 79 + +# ================================================ + +10840..10855 ; Imperial_Aramaic # Lo [22] IMPERIAL ARAMAIC LETTER ALEPH..IMPERIAL ARAMAIC LETTER TAW +10857 ; Imperial_Aramaic # Po IMPERIAL ARAMAIC SECTION SIGN +10858..1085F ; Imperial_Aramaic # No [8] IMPERIAL ARAMAIC NUMBER ONE..IMPERIAL ARAMAIC NUMBER TEN THOUSAND + +# Total code points: 31 + +# ================================================ + +10A60..10A7C ; Old_South_Arabian # Lo [29] OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH +10A7D..10A7E ; Old_South_Arabian # No [2] OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMBER FIFTY +10A7F ; Old_South_Arabian # Po OLD SOUTH ARABIAN NUMERIC INDICATOR + +# Total code points: 32 + +# ================================================ + +10B40..10B55 ; Inscriptional_Parthian # Lo [22] INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW +10B58..10B5F ; Inscriptional_Parthian # No [8] INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND + +# Total code points: 30 + +# ================================================ + +10B60..10B72 ; Inscriptional_Pahlavi # Lo [19] INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW +10B78..10B7F ; Inscriptional_Pahlavi # No [8] INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND + +# Total code points: 27 + +# ================================================ + +10C00..10C48 ; Old_Turkic # Lo [73] OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH + +# Total code points: 73 + +# ================================================ + +11080..11081 ; Kaithi # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA +11082 ; Kaithi # Mc KAITHI SIGN VISARGA +11083..110AF ; Kaithi # Lo [45] KAITHI LETTER A..KAITHI LETTER HA +110B0..110B2 ; Kaithi # Mc [3] KAITHI VOWEL SIGN AA..KAITHI VOWEL SIGN II +110B3..110B6 ; Kaithi # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI +110B7..110B8 ; Kaithi # Mc [2] KAITHI VOWEL SIGN O..KAITHI VOWEL SIGN AU +110B9..110BA ; Kaithi # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA +110BB..110BC ; Kaithi # Po [2] KAITHI ABBREVIATION SIGN..KAITHI ENUMERATION SIGN +110BD ; Kaithi # Cf KAITHI NUMBER SIGN +110BE..110C1 ; Kaithi # Po [4] KAITHI SECTION MARK..KAITHI DOUBLE DANDA +110C2 ; Kaithi # Mn KAITHI VOWEL SIGN VOCALIC R +110CD ; Kaithi # Cf KAITHI NUMBER SIGN ABOVE + +# Total code points: 68 + +# ================================================ + +1BC0..1BE5 ; Batak # Lo [38] BATAK LETTER A..BATAK LETTER U +1BE6 ; Batak # Mn BATAK SIGN TOMPI +1BE7 ; Batak # Mc BATAK VOWEL SIGN E +1BE8..1BE9 ; Batak # Mn [2] BATAK VOWEL SIGN PAKPAK E..BATAK VOWEL SIGN EE +1BEA..1BEC ; Batak # Mc [3] BATAK VOWEL SIGN I..BATAK VOWEL SIGN O +1BED ; Batak # Mn BATAK VOWEL SIGN KARO O +1BEE ; Batak # Mc BATAK VOWEL SIGN U +1BEF..1BF1 ; Batak # Mn [3] BATAK VOWEL SIGN U FOR SIMALUNGUN SA..BATAK CONSONANT SIGN H +1BF2..1BF3 ; Batak # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN +1BFC..1BFF ; Batak # Po [4] BATAK SYMBOL BINDU NA METEK..BATAK SYMBOL BINDU PANGOLAT + +# Total code points: 56 + +# ================================================ + +11000 ; Brahmi # Mc BRAHMI SIGN CANDRABINDU +11001 ; Brahmi # Mn BRAHMI SIGN ANUSVARA +11002 ; Brahmi # Mc BRAHMI SIGN VISARGA +11003..11037 ; Brahmi # Lo [53] BRAHMI SIGN JIHVAMULIYA..BRAHMI LETTER OLD TAMIL NNNA +11038..11046 ; Brahmi # Mn [15] BRAHMI VOWEL SIGN AA..BRAHMI VIRAMA +11047..1104D ; Brahmi # Po [7] BRAHMI DANDA..BRAHMI PUNCTUATION LOTUS +11052..11065 ; Brahmi # No [20] BRAHMI NUMBER ONE..BRAHMI NUMBER ONE THOUSAND +11066..1106F ; Brahmi # Nd [10] BRAHMI DIGIT ZERO..BRAHMI DIGIT NINE +11070 ; Brahmi # Mn BRAHMI SIGN OLD TAMIL VIRAMA +11071..11072 ; Brahmi # Lo [2] BRAHMI LETTER OLD TAMIL SHORT E..BRAHMI LETTER OLD TAMIL SHORT O +11073..11074 ; Brahmi # Mn [2] BRAHMI VOWEL SIGN OLD TAMIL SHORT E..BRAHMI VOWEL SIGN OLD TAMIL SHORT O +11075 ; Brahmi # Lo BRAHMI LETTER OLD TAMIL LLA +1107F ; Brahmi # Mn BRAHMI NUMBER JOINER + +# Total code points: 115 + +# ================================================ + +0840..0858 ; Mandaic # Lo [25] MANDAIC LETTER HALQA..MANDAIC LETTER AIN +0859..085B ; Mandaic # Mn [3] MANDAIC AFFRICATION MARK..MANDAIC GEMINATION MARK +085E ; Mandaic # Po MANDAIC PUNCTUATION + +# Total code points: 29 + +# ================================================ + +11100..11102 ; Chakma # Mn [3] CHAKMA SIGN CANDRABINDU..CHAKMA SIGN VISARGA +11103..11126 ; Chakma # Lo [36] CHAKMA LETTER AA..CHAKMA LETTER HAA +11127..1112B ; Chakma # Mn [5] CHAKMA VOWEL SIGN A..CHAKMA VOWEL SIGN UU +1112C ; Chakma # Mc CHAKMA VOWEL SIGN E +1112D..11134 ; Chakma # Mn [8] CHAKMA VOWEL SIGN AI..CHAKMA MAAYYAA +11136..1113F ; Chakma # Nd [10] CHAKMA DIGIT ZERO..CHAKMA DIGIT NINE +11140..11143 ; Chakma # Po [4] CHAKMA SECTION MARK..CHAKMA QUESTION MARK +11144 ; Chakma # Lo CHAKMA LETTER LHAA +11145..11146 ; Chakma # Mc [2] CHAKMA VOWEL SIGN AA..CHAKMA VOWEL SIGN EI +11147 ; Chakma # Lo CHAKMA LETTER VAA + +# Total code points: 71 + +# ================================================ + +109A0..109B7 ; Meroitic_Cursive # Lo [24] MEROITIC CURSIVE LETTER A..MEROITIC CURSIVE LETTER DA +109BC..109BD ; Meroitic_Cursive # No [2] MEROITIC CURSIVE FRACTION ELEVEN TWELFTHS..MEROITIC CURSIVE FRACTION ONE HALF +109BE..109BF ; Meroitic_Cursive # Lo [2] MEROITIC CURSIVE LOGOGRAM RMT..MEROITIC CURSIVE LOGOGRAM IMN +109C0..109CF ; Meroitic_Cursive # No [16] MEROITIC CURSIVE NUMBER ONE..MEROITIC CURSIVE NUMBER SEVENTY +109D2..109FF ; Meroitic_Cursive # No [46] MEROITIC CURSIVE NUMBER ONE HUNDRED..MEROITIC CURSIVE FRACTION TEN TWELFTHS + +# Total code points: 90 + +# ================================================ + +10980..1099F ; Meroitic_Hieroglyphs # Lo [32] MEROITIC HIEROGLYPHIC LETTER A..MEROITIC HIEROGLYPHIC SYMBOL VIDJ-2 + +# Total code points: 32 + +# ================================================ + +16F00..16F4A ; Miao # Lo [75] MIAO LETTER PA..MIAO LETTER RTE +16F4F ; Miao # Mn MIAO SIGN CONSONANT MODIFIER BAR +16F50 ; Miao # Lo MIAO LETTER NASALIZATION +16F51..16F87 ; Miao # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +16F8F..16F92 ; Miao # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW +16F93..16F9F ; Miao # Lm [13] MIAO LETTER TONE-2..MIAO LETTER REFORMED TONE-8 + +# Total code points: 149 + +# ================================================ + +11180..11181 ; Sharada # Mn [2] SHARADA SIGN CANDRABINDU..SHARADA SIGN ANUSVARA +11182 ; Sharada # Mc SHARADA SIGN VISARGA +11183..111B2 ; Sharada # Lo [48] SHARADA LETTER A..SHARADA LETTER HA +111B3..111B5 ; Sharada # Mc [3] SHARADA VOWEL SIGN AA..SHARADA VOWEL SIGN II +111B6..111BE ; Sharada # Mn [9] SHARADA VOWEL SIGN U..SHARADA VOWEL SIGN O +111BF..111C0 ; Sharada # Mc [2] SHARADA VOWEL SIGN AU..SHARADA SIGN VIRAMA +111C1..111C4 ; Sharada # Lo [4] SHARADA SIGN AVAGRAHA..SHARADA OM +111C5..111C8 ; Sharada # Po [4] SHARADA DANDA..SHARADA SEPARATOR +111C9..111CC ; Sharada # Mn [4] SHARADA SANDHI MARK..SHARADA EXTRA SHORT VOWEL MARK +111CD ; Sharada # Po SHARADA SUTRA MARK +111CE ; Sharada # Mc SHARADA VOWEL SIGN PRISHTHAMATRA E +111CF ; Sharada # Mn SHARADA SIGN INVERTED CANDRABINDU +111D0..111D9 ; Sharada # Nd [10] SHARADA DIGIT ZERO..SHARADA DIGIT NINE +111DA ; Sharada # Lo SHARADA EKAM +111DB ; Sharada # Po SHARADA SIGN SIDDHAM +111DC ; Sharada # Lo SHARADA HEADSTROKE +111DD..111DF ; Sharada # Po [3] SHARADA CONTINUATION SIGN..SHARADA SECTION MARK-2 + +# Total code points: 96 + +# ================================================ + +110D0..110E8 ; Sora_Sompeng # Lo [25] SORA SOMPENG LETTER SAH..SORA SOMPENG LETTER MAE +110F0..110F9 ; Sora_Sompeng # Nd [10] SORA SOMPENG DIGIT ZERO..SORA SOMPENG DIGIT NINE + +# Total code points: 35 + +# ================================================ + +11680..116AA ; Takri # Lo [43] TAKRI LETTER A..TAKRI LETTER RRA +116AB ; Takri # Mn TAKRI SIGN ANUSVARA +116AC ; Takri # Mc TAKRI SIGN VISARGA +116AD ; Takri # Mn TAKRI VOWEL SIGN AA +116AE..116AF ; Takri # Mc [2] TAKRI VOWEL SIGN I..TAKRI VOWEL SIGN II +116B0..116B5 ; Takri # Mn [6] TAKRI VOWEL SIGN U..TAKRI VOWEL SIGN AU +116B6 ; Takri # Mc TAKRI SIGN VIRAMA +116B7 ; Takri # Mn TAKRI SIGN NUKTA +116B8 ; Takri # Lo TAKRI LETTER ARCHAIC KHA +116B9 ; Takri # Po TAKRI ABBREVIATION SIGN +116C0..116C9 ; Takri # Nd [10] TAKRI DIGIT ZERO..TAKRI DIGIT NINE + +# Total code points: 68 + +# ================================================ + +10530..10563 ; Caucasian_Albanian # Lo [52] CAUCASIAN ALBANIAN LETTER ALT..CAUCASIAN ALBANIAN LETTER KIW +1056F ; Caucasian_Albanian # Po CAUCASIAN ALBANIAN CITATION MARK + +# Total code points: 53 + +# ================================================ + +16AD0..16AED ; Bassa_Vah # Lo [30] BASSA VAH LETTER ENNI..BASSA VAH LETTER I +16AF0..16AF4 ; Bassa_Vah # Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE +16AF5 ; Bassa_Vah # Po BASSA VAH FULL STOP + +# Total code points: 36 + +# ================================================ + +1BC00..1BC6A ; Duployan # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M +1BC70..1BC7C ; Duployan # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK +1BC80..1BC88 ; Duployan # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL +1BC90..1BC99 ; Duployan # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW +1BC9C ; Duployan # So DUPLOYAN SIGN O WITH CROSS +1BC9D..1BC9E ; Duployan # Mn [2] DUPLOYAN THICK LETTER SELECTOR..DUPLOYAN DOUBLE MARK +1BC9F ; Duployan # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP + +# Total code points: 143 + +# ================================================ + +10500..10527 ; Elbasan # Lo [40] ELBASAN LETTER A..ELBASAN LETTER KHE + +# Total code points: 40 + +# ================================================ + +11300..11301 ; Grantha # Mn [2] GRANTHA SIGN COMBINING ANUSVARA ABOVE..GRANTHA SIGN CANDRABINDU +11302..11303 ; Grantha # Mc [2] GRANTHA SIGN ANUSVARA..GRANTHA SIGN VISARGA +11305..1130C ; Grantha # Lo [8] GRANTHA LETTER A..GRANTHA LETTER VOCALIC L +1130F..11310 ; Grantha # Lo [2] GRANTHA LETTER EE..GRANTHA LETTER AI +11313..11328 ; Grantha # Lo [22] GRANTHA LETTER OO..GRANTHA LETTER NA +1132A..11330 ; Grantha # Lo [7] GRANTHA LETTER PA..GRANTHA LETTER RA +11332..11333 ; Grantha # Lo [2] GRANTHA LETTER LA..GRANTHA LETTER LLA +11335..11339 ; Grantha # Lo [5] GRANTHA LETTER VA..GRANTHA LETTER HA +1133C ; Grantha # Mn GRANTHA SIGN NUKTA +1133D ; Grantha # Lo GRANTHA SIGN AVAGRAHA +1133E..1133F ; Grantha # Mc [2] GRANTHA VOWEL SIGN AA..GRANTHA VOWEL SIGN I +11340 ; Grantha # Mn GRANTHA VOWEL SIGN II +11341..11344 ; Grantha # Mc [4] GRANTHA VOWEL SIGN U..GRANTHA VOWEL SIGN VOCALIC RR +11347..11348 ; Grantha # Mc [2] GRANTHA VOWEL SIGN EE..GRANTHA VOWEL SIGN AI +1134B..1134D ; Grantha # Mc [3] GRANTHA VOWEL SIGN OO..GRANTHA SIGN VIRAMA +11350 ; Grantha # Lo GRANTHA OM +11357 ; Grantha # Mc GRANTHA AU LENGTH MARK +1135D..11361 ; Grantha # Lo [5] GRANTHA SIGN PLUTA..GRANTHA LETTER VOCALIC LL +11362..11363 ; Grantha # Mc [2] GRANTHA VOWEL SIGN VOCALIC L..GRANTHA VOWEL SIGN VOCALIC LL +11366..1136C ; Grantha # Mn [7] COMBINING GRANTHA DIGIT ZERO..COMBINING GRANTHA DIGIT SIX +11370..11374 ; Grantha # Mn [5] COMBINING GRANTHA LETTER A..COMBINING GRANTHA LETTER PA + +# Total code points: 85 + +# ================================================ + +16B00..16B2F ; Pahawh_Hmong # Lo [48] PAHAWH HMONG VOWEL KEEB..PAHAWH HMONG CONSONANT CAU +16B30..16B36 ; Pahawh_Hmong # Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM +16B37..16B3B ; Pahawh_Hmong # Po [5] PAHAWH HMONG SIGN VOS THOM..PAHAWH HMONG SIGN VOS FEEM +16B3C..16B3F ; Pahawh_Hmong # So [4] PAHAWH HMONG SIGN XYEEM NTXIV..PAHAWH HMONG SIGN XYEEM FAIB +16B40..16B43 ; Pahawh_Hmong # Lm [4] PAHAWH HMONG SIGN VOS SEEV..PAHAWH HMONG SIGN IB YAM +16B44 ; Pahawh_Hmong # Po PAHAWH HMONG SIGN XAUS +16B45 ; Pahawh_Hmong # So PAHAWH HMONG SIGN CIM TSOV ROG +16B50..16B59 ; Pahawh_Hmong # Nd [10] PAHAWH HMONG DIGIT ZERO..PAHAWH HMONG DIGIT NINE +16B5B..16B61 ; Pahawh_Hmong # No [7] PAHAWH HMONG NUMBER TENS..PAHAWH HMONG NUMBER TRILLIONS +16B63..16B77 ; Pahawh_Hmong # Lo [21] PAHAWH HMONG SIGN VOS LUB..PAHAWH HMONG SIGN CIM NRES TOS +16B7D..16B8F ; Pahawh_Hmong # Lo [19] PAHAWH HMONG CLAN SIGN TSHEEJ..PAHAWH HMONG CLAN SIGN VWJ + +# Total code points: 127 + +# ================================================ + +11200..11211 ; Khojki # Lo [18] KHOJKI LETTER A..KHOJKI LETTER JJA +11213..1122B ; Khojki # Lo [25] KHOJKI LETTER NYA..KHOJKI LETTER LLA +1122C..1122E ; Khojki # Mc [3] KHOJKI VOWEL SIGN AA..KHOJKI VOWEL SIGN II +1122F..11231 ; Khojki # Mn [3] KHOJKI VOWEL SIGN U..KHOJKI VOWEL SIGN AI +11232..11233 ; Khojki # Mc [2] KHOJKI VOWEL SIGN O..KHOJKI VOWEL SIGN AU +11234 ; Khojki # Mn KHOJKI SIGN ANUSVARA +11235 ; Khojki # Mc KHOJKI SIGN VIRAMA +11236..11237 ; Khojki # Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA +11238..1123D ; Khojki # Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN +1123E ; Khojki # Mn KHOJKI SIGN SUKUN + +# Total code points: 62 + +# ================================================ + +10600..10736 ; Linear_A # Lo [311] LINEAR A SIGN AB001..LINEAR A SIGN A664 +10740..10755 ; Linear_A # Lo [22] LINEAR A SIGN A701 A..LINEAR A SIGN A732 JE +10760..10767 ; Linear_A # Lo [8] LINEAR A SIGN A800..LINEAR A SIGN A807 + +# Total code points: 341 + +# ================================================ + +11150..11172 ; Mahajani # Lo [35] MAHAJANI LETTER A..MAHAJANI LETTER RRA +11173 ; Mahajani # Mn MAHAJANI SIGN NUKTA +11174..11175 ; Mahajani # Po [2] MAHAJANI ABBREVIATION SIGN..MAHAJANI SECTION MARK +11176 ; Mahajani # Lo MAHAJANI LIGATURE SHRI + +# Total code points: 39 + +# ================================================ + +10AC0..10AC7 ; Manichaean # Lo [8] MANICHAEAN LETTER ALEPH..MANICHAEAN LETTER WAW +10AC8 ; Manichaean # So MANICHAEAN SIGN UD +10AC9..10AE4 ; Manichaean # Lo [28] MANICHAEAN LETTER ZAYIN..MANICHAEAN LETTER TAW +10AE5..10AE6 ; Manichaean # Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW +10AEB..10AEF ; Manichaean # No [5] MANICHAEAN NUMBER ONE..MANICHAEAN NUMBER ONE HUNDRED +10AF0..10AF6 ; Manichaean # Po [7] MANICHAEAN PUNCTUATION STAR..MANICHAEAN PUNCTUATION LINE FILLER + +# Total code points: 51 + +# ================================================ + +1E800..1E8C4 ; Mende_Kikakui # Lo [197] MENDE KIKAKUI SYLLABLE M001 KI..MENDE KIKAKUI SYLLABLE M060 NYON +1E8C7..1E8CF ; Mende_Kikakui # No [9] MENDE KIKAKUI DIGIT ONE..MENDE KIKAKUI DIGIT NINE +1E8D0..1E8D6 ; Mende_Kikakui # Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS + +# Total code points: 213 + +# ================================================ + +11600..1162F ; Modi # Lo [48] MODI LETTER A..MODI LETTER LLA +11630..11632 ; Modi # Mc [3] MODI VOWEL SIGN AA..MODI VOWEL SIGN II +11633..1163A ; Modi # Mn [8] MODI VOWEL SIGN U..MODI VOWEL SIGN AI +1163B..1163C ; Modi # Mc [2] MODI VOWEL SIGN O..MODI VOWEL SIGN AU +1163D ; Modi # Mn MODI SIGN ANUSVARA +1163E ; Modi # Mc MODI SIGN VISARGA +1163F..11640 ; Modi # Mn [2] MODI SIGN VIRAMA..MODI SIGN ARDHACANDRA +11641..11643 ; Modi # Po [3] MODI DANDA..MODI ABBREVIATION SIGN +11644 ; Modi # Lo MODI SIGN HUVA +11650..11659 ; Modi # Nd [10] MODI DIGIT ZERO..MODI DIGIT NINE + +# Total code points: 79 + +# ================================================ + +16A40..16A5E ; Mro # Lo [31] MRO LETTER TA..MRO LETTER TEK +16A60..16A69 ; Mro # Nd [10] MRO DIGIT ZERO..MRO DIGIT NINE +16A6E..16A6F ; Mro # Po [2] MRO DANDA..MRO DOUBLE DANDA + +# Total code points: 43 + +# ================================================ + +10A80..10A9C ; Old_North_Arabian # Lo [29] OLD NORTH ARABIAN LETTER HEH..OLD NORTH ARABIAN LETTER ZAH +10A9D..10A9F ; Old_North_Arabian # No [3] OLD NORTH ARABIAN NUMBER ONE..OLD NORTH ARABIAN NUMBER TWENTY + +# Total code points: 32 + +# ================================================ + +10880..1089E ; Nabataean # Lo [31] NABATAEAN LETTER FINAL ALEPH..NABATAEAN LETTER TAW +108A7..108AF ; Nabataean # No [9] NABATAEAN NUMBER ONE..NABATAEAN NUMBER ONE HUNDRED + +# Total code points: 40 + +# ================================================ + +10860..10876 ; Palmyrene # Lo [23] PALMYRENE LETTER ALEPH..PALMYRENE LETTER TAW +10877..10878 ; Palmyrene # So [2] PALMYRENE LEFT-POINTING FLEURON..PALMYRENE RIGHT-POINTING FLEURON +10879..1087F ; Palmyrene # No [7] PALMYRENE NUMBER ONE..PALMYRENE NUMBER TWENTY + +# Total code points: 32 + +# ================================================ + +11AC0..11AF8 ; Pau_Cin_Hau # Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL + +# Total code points: 57 + +# ================================================ + +10350..10375 ; Old_Permic # Lo [38] OLD PERMIC LETTER AN..OLD PERMIC LETTER IA +10376..1037A ; Old_Permic # Mn [5] COMBINING OLD PERMIC LETTER AN..COMBINING OLD PERMIC LETTER SII + +# Total code points: 43 + +# ================================================ + +10B80..10B91 ; Psalter_Pahlavi # Lo [18] PSALTER PAHLAVI LETTER ALEPH..PSALTER PAHLAVI LETTER TAW +10B99..10B9C ; Psalter_Pahlavi # Po [4] PSALTER PAHLAVI SECTION MARK..PSALTER PAHLAVI FOUR DOTS WITH DOT +10BA9..10BAF ; Psalter_Pahlavi # No [7] PSALTER PAHLAVI NUMBER ONE..PSALTER PAHLAVI NUMBER ONE HUNDRED + +# Total code points: 29 + +# ================================================ + +11580..115AE ; Siddham # Lo [47] SIDDHAM LETTER A..SIDDHAM LETTER HA +115AF..115B1 ; Siddham # Mc [3] SIDDHAM VOWEL SIGN AA..SIDDHAM VOWEL SIGN II +115B2..115B5 ; Siddham # Mn [4] SIDDHAM VOWEL SIGN U..SIDDHAM VOWEL SIGN VOCALIC RR +115B8..115BB ; Siddham # Mc [4] SIDDHAM VOWEL SIGN E..SIDDHAM VOWEL SIGN AU +115BC..115BD ; Siddham # Mn [2] SIDDHAM SIGN CANDRABINDU..SIDDHAM SIGN ANUSVARA +115BE ; Siddham # Mc SIDDHAM SIGN VISARGA +115BF..115C0 ; Siddham # Mn [2] SIDDHAM SIGN VIRAMA..SIDDHAM SIGN NUKTA +115C1..115D7 ; Siddham # Po [23] SIDDHAM SIGN SIDDHAM..SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES +115D8..115DB ; Siddham # Lo [4] SIDDHAM LETTER THREE-CIRCLE ALTERNATE I..SIDDHAM LETTER ALTERNATE U +115DC..115DD ; Siddham # Mn [2] SIDDHAM VOWEL SIGN ALTERNATE U..SIDDHAM VOWEL SIGN ALTERNATE UU + +# Total code points: 92 + +# ================================================ + +112B0..112DE ; Khudawadi # Lo [47] KHUDAWADI LETTER A..KHUDAWADI LETTER HA +112DF ; Khudawadi # Mn KHUDAWADI SIGN ANUSVARA +112E0..112E2 ; Khudawadi # Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II +112E3..112EA ; Khudawadi # Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA +112F0..112F9 ; Khudawadi # Nd [10] KHUDAWADI DIGIT ZERO..KHUDAWADI DIGIT NINE + +# Total code points: 69 + +# ================================================ + +11480..114AF ; Tirhuta # Lo [48] TIRHUTA ANJI..TIRHUTA LETTER HA +114B0..114B2 ; Tirhuta # Mc [3] TIRHUTA VOWEL SIGN AA..TIRHUTA VOWEL SIGN II +114B3..114B8 ; Tirhuta # Mn [6] TIRHUTA VOWEL SIGN U..TIRHUTA VOWEL SIGN VOCALIC LL +114B9 ; Tirhuta # Mc TIRHUTA VOWEL SIGN E +114BA ; Tirhuta # Mn TIRHUTA VOWEL SIGN SHORT E +114BB..114BE ; Tirhuta # Mc [4] TIRHUTA VOWEL SIGN AI..TIRHUTA VOWEL SIGN AU +114BF..114C0 ; Tirhuta # Mn [2] TIRHUTA SIGN CANDRABINDU..TIRHUTA SIGN ANUSVARA +114C1 ; Tirhuta # Mc TIRHUTA SIGN VISARGA +114C2..114C3 ; Tirhuta # Mn [2] TIRHUTA SIGN VIRAMA..TIRHUTA SIGN NUKTA +114C4..114C5 ; Tirhuta # Lo [2] TIRHUTA SIGN AVAGRAHA..TIRHUTA GVANG +114C6 ; Tirhuta # Po TIRHUTA ABBREVIATION SIGN +114C7 ; Tirhuta # Lo TIRHUTA OM +114D0..114D9 ; Tirhuta # Nd [10] TIRHUTA DIGIT ZERO..TIRHUTA DIGIT NINE + +# Total code points: 82 + +# ================================================ + +118A0..118DF ; Warang_Citi # L& [64] WARANG CITI CAPITAL LETTER NGAA..WARANG CITI SMALL LETTER VIYO +118E0..118E9 ; Warang_Citi # Nd [10] WARANG CITI DIGIT ZERO..WARANG CITI DIGIT NINE +118EA..118F2 ; Warang_Citi # No [9] WARANG CITI NUMBER TEN..WARANG CITI NUMBER NINETY +118FF ; Warang_Citi # Lo WARANG CITI OM + +# Total code points: 84 + +# ================================================ + +11700..1171A ; Ahom # Lo [27] AHOM LETTER KA..AHOM LETTER ALTERNATE BA +1171D..1171F ; Ahom # Mn [3] AHOM CONSONANT SIGN MEDIAL LA..AHOM CONSONANT SIGN MEDIAL LIGATING RA +11720..11721 ; Ahom # Mc [2] AHOM VOWEL SIGN A..AHOM VOWEL SIGN AA +11722..11725 ; Ahom # Mn [4] AHOM VOWEL SIGN I..AHOM VOWEL SIGN UU +11726 ; Ahom # Mc AHOM VOWEL SIGN E +11727..1172B ; Ahom # Mn [5] AHOM VOWEL SIGN AW..AHOM SIGN KILLER +11730..11739 ; Ahom # Nd [10] AHOM DIGIT ZERO..AHOM DIGIT NINE +1173A..1173B ; Ahom # No [2] AHOM NUMBER TEN..AHOM NUMBER TWENTY +1173C..1173E ; Ahom # Po [3] AHOM SIGN SMALL SECTION..AHOM SIGN RULAI +1173F ; Ahom # So AHOM SYMBOL VI +11740..11746 ; Ahom # Lo [7] AHOM LETTER CA..AHOM LETTER LLA + +# Total code points: 65 + +# ================================================ + +14400..14646 ; Anatolian_Hieroglyphs # Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 + +# Total code points: 583 + +# ================================================ + +108E0..108F2 ; Hatran # Lo [19] HATRAN LETTER ALEPH..HATRAN LETTER QOPH +108F4..108F5 ; Hatran # Lo [2] HATRAN LETTER SHIN..HATRAN LETTER TAW +108FB..108FF ; Hatran # No [5] HATRAN NUMBER ONE..HATRAN NUMBER ONE HUNDRED + +# Total code points: 26 + +# ================================================ + +11280..11286 ; Multani # Lo [7] MULTANI LETTER A..MULTANI LETTER GA +11288 ; Multani # Lo MULTANI LETTER GHA +1128A..1128D ; Multani # Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA +1128F..1129D ; Multani # Lo [15] MULTANI LETTER NYA..MULTANI LETTER BA +1129F..112A8 ; Multani # Lo [10] MULTANI LETTER BHA..MULTANI LETTER RHA +112A9 ; Multani # Po MULTANI SECTION MARK + +# Total code points: 38 + +# ================================================ + +10C80..10CB2 ; Old_Hungarian # L& [51] OLD HUNGARIAN CAPITAL LETTER A..OLD HUNGARIAN CAPITAL LETTER US +10CC0..10CF2 ; Old_Hungarian # L& [51] OLD HUNGARIAN SMALL LETTER A..OLD HUNGARIAN SMALL LETTER US +10CFA..10CFF ; Old_Hungarian # No [6] OLD HUNGARIAN NUMBER ONE..OLD HUNGARIAN NUMBER ONE THOUSAND + +# Total code points: 108 + +# ================================================ + +1D800..1D9FF ; SignWriting # So [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD +1DA00..1DA36 ; SignWriting # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN +1DA37..1DA3A ; SignWriting # So [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE +1DA3B..1DA6C ; SignWriting # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT +1DA6D..1DA74 ; SignWriting # So [8] SIGNWRITING SHOULDER HIP SPINE..SIGNWRITING TORSO-FLOORPLANE TWISTING +1DA75 ; SignWriting # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS +1DA76..1DA83 ; SignWriting # So [14] SIGNWRITING LIMB COMBINATION..SIGNWRITING LOCATION DEPTH +1DA84 ; SignWriting # Mn SIGNWRITING LOCATION HEAD NECK +1DA85..1DA86 ; SignWriting # So [2] SIGNWRITING LOCATION TORSO..SIGNWRITING LOCATION LIMBS DIGITS +1DA87..1DA8B ; SignWriting # Po [5] SIGNWRITING COMMA..SIGNWRITING PARENTHESIS +1DA9B..1DA9F ; SignWriting # Mn [5] SIGNWRITING FILL MODIFIER-2..SIGNWRITING FILL MODIFIER-6 +1DAA1..1DAAF ; SignWriting # Mn [15] SIGNWRITING ROTATION MODIFIER-2..SIGNWRITING ROTATION MODIFIER-16 + +# Total code points: 672 + +# ================================================ + +1E900..1E943 ; Adlam # L& [68] ADLAM CAPITAL LETTER ALIF..ADLAM SMALL LETTER SHA +1E944..1E94A ; Adlam # Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA +1E94B ; Adlam # Lm ADLAM NASALIZATION MARK +1E950..1E959 ; Adlam # Nd [10] ADLAM DIGIT ZERO..ADLAM DIGIT NINE +1E95E..1E95F ; Adlam # Po [2] ADLAM INITIAL EXCLAMATION MARK..ADLAM INITIAL QUESTION MARK + +# Total code points: 88 + +# ================================================ + +11C00..11C08 ; Bhaiksuki # Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L +11C0A..11C2E ; Bhaiksuki # Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA +11C2F ; Bhaiksuki # Mc BHAIKSUKI VOWEL SIGN AA +11C30..11C36 ; Bhaiksuki # Mn [7] BHAIKSUKI VOWEL SIGN I..BHAIKSUKI VOWEL SIGN VOCALIC L +11C38..11C3D ; Bhaiksuki # Mn [6] BHAIKSUKI VOWEL SIGN E..BHAIKSUKI SIGN ANUSVARA +11C3E ; Bhaiksuki # Mc BHAIKSUKI SIGN VISARGA +11C3F ; Bhaiksuki # Mn BHAIKSUKI SIGN VIRAMA +11C40 ; Bhaiksuki # Lo BHAIKSUKI SIGN AVAGRAHA +11C41..11C45 ; Bhaiksuki # Po [5] BHAIKSUKI DANDA..BHAIKSUKI GAP FILLER-2 +11C50..11C59 ; Bhaiksuki # Nd [10] BHAIKSUKI DIGIT ZERO..BHAIKSUKI DIGIT NINE +11C5A..11C6C ; Bhaiksuki # No [19] BHAIKSUKI NUMBER ONE..BHAIKSUKI HUNDREDS UNIT MARK + +# Total code points: 97 + +# ================================================ + +11C70..11C71 ; Marchen # Po [2] MARCHEN HEAD MARK..MARCHEN MARK SHAD +11C72..11C8F ; Marchen # Lo [30] MARCHEN LETTER KA..MARCHEN LETTER A +11C92..11CA7 ; Marchen # Mn [22] MARCHEN SUBJOINED LETTER KA..MARCHEN SUBJOINED LETTER ZA +11CA9 ; Marchen # Mc MARCHEN SUBJOINED LETTER YA +11CAA..11CB0 ; Marchen # Mn [7] MARCHEN SUBJOINED LETTER RA..MARCHEN VOWEL SIGN AA +11CB1 ; Marchen # Mc MARCHEN VOWEL SIGN I +11CB2..11CB3 ; Marchen # Mn [2] MARCHEN VOWEL SIGN U..MARCHEN VOWEL SIGN E +11CB4 ; Marchen # Mc MARCHEN VOWEL SIGN O +11CB5..11CB6 ; Marchen # Mn [2] MARCHEN SIGN ANUSVARA..MARCHEN SIGN CANDRABINDU + +# Total code points: 68 + +# ================================================ + +11400..11434 ; Newa # Lo [53] NEWA LETTER A..NEWA LETTER HA +11435..11437 ; Newa # Mc [3] NEWA VOWEL SIGN AA..NEWA VOWEL SIGN II +11438..1143F ; Newa # Mn [8] NEWA VOWEL SIGN U..NEWA VOWEL SIGN AI +11440..11441 ; Newa # Mc [2] NEWA VOWEL SIGN O..NEWA VOWEL SIGN AU +11442..11444 ; Newa # Mn [3] NEWA SIGN VIRAMA..NEWA SIGN ANUSVARA +11445 ; Newa # Mc NEWA SIGN VISARGA +11446 ; Newa # Mn NEWA SIGN NUKTA +11447..1144A ; Newa # Lo [4] NEWA SIGN AVAGRAHA..NEWA SIDDHI +1144B..1144F ; Newa # Po [5] NEWA DANDA..NEWA ABBREVIATION SIGN +11450..11459 ; Newa # Nd [10] NEWA DIGIT ZERO..NEWA DIGIT NINE +1145A..1145B ; Newa # Po [2] NEWA DOUBLE COMMA..NEWA PLACEHOLDER MARK +1145D ; Newa # Po NEWA INSERTION SIGN +1145E ; Newa # Mn NEWA SANDHI MARK +1145F..11461 ; Newa # Lo [3] NEWA LETTER VEDIC ANUSVARA..NEWA SIGN UPADHMANIYA + +# Total code points: 97 + +# ================================================ + +104B0..104D3 ; Osage # L& [36] OSAGE CAPITAL LETTER A..OSAGE CAPITAL LETTER ZHA +104D8..104FB ; Osage # L& [36] OSAGE SMALL LETTER A..OSAGE SMALL LETTER ZHA + +# Total code points: 72 + +# ================================================ + +16FE0 ; Tangut # Lm TANGUT ITERATION MARK +17000..187F7 ; Tangut # Lo [6136] TANGUT IDEOGRAPH-17000..TANGUT IDEOGRAPH-187F7 +18800..18AFF ; Tangut # Lo [768] TANGUT COMPONENT-001..TANGUT COMPONENT-768 +18D00..18D08 ; Tangut # Lo [9] TANGUT IDEOGRAPH-18D00..TANGUT IDEOGRAPH-18D08 + +# Total code points: 6914 + +# ================================================ + +11D00..11D06 ; Masaram_Gondi # Lo [7] MASARAM GONDI LETTER A..MASARAM GONDI LETTER E +11D08..11D09 ; Masaram_Gondi # Lo [2] MASARAM GONDI LETTER AI..MASARAM GONDI LETTER O +11D0B..11D30 ; Masaram_Gondi # Lo [38] MASARAM GONDI LETTER AU..MASARAM GONDI LETTER TRA +11D31..11D36 ; Masaram_Gondi # Mn [6] MASARAM GONDI VOWEL SIGN AA..MASARAM GONDI VOWEL SIGN VOCALIC R +11D3A ; Masaram_Gondi # Mn MASARAM GONDI VOWEL SIGN E +11D3C..11D3D ; Masaram_Gondi # Mn [2] MASARAM GONDI VOWEL SIGN AI..MASARAM GONDI VOWEL SIGN O +11D3F..11D45 ; Masaram_Gondi # Mn [7] MASARAM GONDI VOWEL SIGN AU..MASARAM GONDI VIRAMA +11D46 ; Masaram_Gondi # Lo MASARAM GONDI REPHA +11D47 ; Masaram_Gondi # Mn MASARAM GONDI RA-KARA +11D50..11D59 ; Masaram_Gondi # Nd [10] MASARAM GONDI DIGIT ZERO..MASARAM GONDI DIGIT NINE + +# Total code points: 75 + +# ================================================ + +16FE1 ; Nushu # Lm NUSHU ITERATION MARK +1B170..1B2FB ; Nushu # Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB + +# Total code points: 397 + +# ================================================ + +11A50 ; Soyombo # Lo SOYOMBO LETTER A +11A51..11A56 ; Soyombo # Mn [6] SOYOMBO VOWEL SIGN I..SOYOMBO VOWEL SIGN OE +11A57..11A58 ; Soyombo # Mc [2] SOYOMBO VOWEL SIGN AI..SOYOMBO VOWEL SIGN AU +11A59..11A5B ; Soyombo # Mn [3] SOYOMBO VOWEL SIGN VOCALIC R..SOYOMBO VOWEL LENGTH MARK +11A5C..11A89 ; Soyombo # Lo [46] SOYOMBO LETTER KA..SOYOMBO CLUSTER-INITIAL LETTER SA +11A8A..11A96 ; Soyombo # Mn [13] SOYOMBO FINAL CONSONANT SIGN G..SOYOMBO SIGN ANUSVARA +11A97 ; Soyombo # Mc SOYOMBO SIGN VISARGA +11A98..11A99 ; Soyombo # Mn [2] SOYOMBO GEMINATION MARK..SOYOMBO SUBJOINER +11A9A..11A9C ; Soyombo # Po [3] SOYOMBO MARK TSHEG..SOYOMBO MARK DOUBLE SHAD +11A9D ; Soyombo # Lo SOYOMBO MARK PLUTA +11A9E..11AA2 ; Soyombo # Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 + +# Total code points: 83 + +# ================================================ + +11A00 ; Zanabazar_Square # Lo ZANABAZAR SQUARE LETTER A +11A01..11A0A ; Zanabazar_Square # Mn [10] ZANABAZAR SQUARE VOWEL SIGN I..ZANABAZAR SQUARE VOWEL LENGTH MARK +11A0B..11A32 ; Zanabazar_Square # Lo [40] ZANABAZAR SQUARE LETTER KA..ZANABAZAR SQUARE LETTER KSSA +11A33..11A38 ; Zanabazar_Square # Mn [6] ZANABAZAR SQUARE FINAL CONSONANT MARK..ZANABAZAR SQUARE SIGN ANUSVARA +11A39 ; Zanabazar_Square # Mc ZANABAZAR SQUARE SIGN VISARGA +11A3A ; Zanabazar_Square # Lo ZANABAZAR SQUARE CLUSTER-INITIAL LETTER RA +11A3B..11A3E ; Zanabazar_Square # Mn [4] ZANABAZAR SQUARE CLUSTER-FINAL LETTER YA..ZANABAZAR SQUARE CLUSTER-FINAL LETTER VA +11A3F..11A46 ; Zanabazar_Square # Po [8] ZANABAZAR SQUARE INITIAL HEAD MARK..ZANABAZAR SQUARE CLOSING DOUBLE-LINED HEAD MARK +11A47 ; Zanabazar_Square # Mn ZANABAZAR SQUARE SUBJOINER + +# Total code points: 72 + +# ================================================ + +11800..1182B ; Dogra # Lo [44] DOGRA LETTER A..DOGRA LETTER RRA +1182C..1182E ; Dogra # Mc [3] DOGRA VOWEL SIGN AA..DOGRA VOWEL SIGN II +1182F..11837 ; Dogra # Mn [9] DOGRA VOWEL SIGN U..DOGRA SIGN ANUSVARA +11838 ; Dogra # Mc DOGRA SIGN VISARGA +11839..1183A ; Dogra # Mn [2] DOGRA SIGN VIRAMA..DOGRA SIGN NUKTA +1183B ; Dogra # Po DOGRA ABBREVIATION SIGN + +# Total code points: 60 + +# ================================================ + +11D60..11D65 ; Gunjala_Gondi # Lo [6] GUNJALA GONDI LETTER A..GUNJALA GONDI LETTER UU +11D67..11D68 ; Gunjala_Gondi # Lo [2] GUNJALA GONDI LETTER EE..GUNJALA GONDI LETTER AI +11D6A..11D89 ; Gunjala_Gondi # Lo [32] GUNJALA GONDI LETTER OO..GUNJALA GONDI LETTER SA +11D8A..11D8E ; Gunjala_Gondi # Mc [5] GUNJALA GONDI VOWEL SIGN AA..GUNJALA GONDI VOWEL SIGN UU +11D90..11D91 ; Gunjala_Gondi # Mn [2] GUNJALA GONDI VOWEL SIGN EE..GUNJALA GONDI VOWEL SIGN AI +11D93..11D94 ; Gunjala_Gondi # Mc [2] GUNJALA GONDI VOWEL SIGN OO..GUNJALA GONDI VOWEL SIGN AU +11D95 ; Gunjala_Gondi # Mn GUNJALA GONDI SIGN ANUSVARA +11D96 ; Gunjala_Gondi # Mc GUNJALA GONDI SIGN VISARGA +11D97 ; Gunjala_Gondi # Mn GUNJALA GONDI VIRAMA +11D98 ; Gunjala_Gondi # Lo GUNJALA GONDI OM +11DA0..11DA9 ; Gunjala_Gondi # Nd [10] GUNJALA GONDI DIGIT ZERO..GUNJALA GONDI DIGIT NINE + +# Total code points: 63 + +# ================================================ + +11EE0..11EF2 ; Makasar # Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA +11EF3..11EF4 ; Makasar # Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U +11EF5..11EF6 ; Makasar # Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O +11EF7..11EF8 ; Makasar # Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION + +# Total code points: 25 + +# ================================================ + +16E40..16E7F ; Medefaidrin # L& [64] MEDEFAIDRIN CAPITAL LETTER M..MEDEFAIDRIN SMALL LETTER Y +16E80..16E96 ; Medefaidrin # No [23] MEDEFAIDRIN DIGIT ZERO..MEDEFAIDRIN DIGIT THREE ALTERNATE FORM +16E97..16E9A ; Medefaidrin # Po [4] MEDEFAIDRIN COMMA..MEDEFAIDRIN EXCLAMATION OH + +# Total code points: 91 + +# ================================================ + +10D00..10D23 ; Hanifi_Rohingya # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA +10D24..10D27 ; Hanifi_Rohingya # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI +10D30..10D39 ; Hanifi_Rohingya # Nd [10] HANIFI ROHINGYA DIGIT ZERO..HANIFI ROHINGYA DIGIT NINE + +# Total code points: 50 + +# ================================================ + +10F30..10F45 ; Sogdian # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN +10F46..10F50 ; Sogdian # Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW +10F51..10F54 ; Sogdian # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED +10F55..10F59 ; Sogdian # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT + +# Total code points: 42 + +# ================================================ + +10F00..10F1C ; Old_Sogdian # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL +10F1D..10F26 ; Old_Sogdian # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF +10F27 ; Old_Sogdian # Lo OLD SOGDIAN LIGATURE AYIN-DALETH + +# Total code points: 40 + +# ================================================ + +10FE0..10FF6 ; Elymaic # Lo [23] ELYMAIC LETTER ALEPH..ELYMAIC LIGATURE ZAYIN-YODH + +# Total code points: 23 + +# ================================================ + +119A0..119A7 ; Nandinagari # Lo [8] NANDINAGARI LETTER A..NANDINAGARI LETTER VOCALIC RR +119AA..119D0 ; Nandinagari # Lo [39] NANDINAGARI LETTER E..NANDINAGARI LETTER RRA +119D1..119D3 ; Nandinagari # Mc [3] NANDINAGARI VOWEL SIGN AA..NANDINAGARI VOWEL SIGN II +119D4..119D7 ; Nandinagari # Mn [4] NANDINAGARI VOWEL SIGN U..NANDINAGARI VOWEL SIGN VOCALIC RR +119DA..119DB ; Nandinagari # Mn [2] NANDINAGARI VOWEL SIGN E..NANDINAGARI VOWEL SIGN AI +119DC..119DF ; Nandinagari # Mc [4] NANDINAGARI VOWEL SIGN O..NANDINAGARI SIGN VISARGA +119E0 ; Nandinagari # Mn NANDINAGARI SIGN VIRAMA +119E1 ; Nandinagari # Lo NANDINAGARI SIGN AVAGRAHA +119E2 ; Nandinagari # Po NANDINAGARI SIGN SIDDHAM +119E3 ; Nandinagari # Lo NANDINAGARI HEADSTROKE +119E4 ; Nandinagari # Mc NANDINAGARI VOWEL SIGN PRISHTHAMATRA E + +# Total code points: 65 + +# ================================================ + +1E100..1E12C ; Nyiakeng_Puachue_Hmong # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W +1E130..1E136 ; Nyiakeng_Puachue_Hmong # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D +1E137..1E13D ; Nyiakeng_Puachue_Hmong # Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER +1E140..1E149 ; Nyiakeng_Puachue_Hmong # Nd [10] NYIAKENG PUACHUE HMONG DIGIT ZERO..NYIAKENG PUACHUE HMONG DIGIT NINE +1E14E ; Nyiakeng_Puachue_Hmong # Lo NYIAKENG PUACHUE HMONG LOGOGRAM NYAJ +1E14F ; Nyiakeng_Puachue_Hmong # So NYIAKENG PUACHUE HMONG CIRCLED CA + +# Total code points: 71 + +# ================================================ + +1E2C0..1E2EB ; Wancho # Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH +1E2EC..1E2EF ; Wancho # Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI +1E2F0..1E2F9 ; Wancho # Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE +1E2FF ; Wancho # Sc WANCHO NGUN SIGN + +# Total code points: 59 + +# ================================================ + +10FB0..10FC4 ; Chorasmian # Lo [21] CHORASMIAN LETTER ALEPH..CHORASMIAN LETTER TAW +10FC5..10FCB ; Chorasmian # No [7] CHORASMIAN NUMBER ONE..CHORASMIAN NUMBER ONE HUNDRED + +# Total code points: 28 + +# ================================================ + +11900..11906 ; Dives_Akuru # Lo [7] DIVES AKURU LETTER A..DIVES AKURU LETTER E +11909 ; Dives_Akuru # Lo DIVES AKURU LETTER O +1190C..11913 ; Dives_Akuru # Lo [8] DIVES AKURU LETTER KA..DIVES AKURU LETTER JA +11915..11916 ; Dives_Akuru # Lo [2] DIVES AKURU LETTER NYA..DIVES AKURU LETTER TTA +11918..1192F ; Dives_Akuru # Lo [24] DIVES AKURU LETTER DDA..DIVES AKURU LETTER ZA +11930..11935 ; Dives_Akuru # Mc [6] DIVES AKURU VOWEL SIGN AA..DIVES AKURU VOWEL SIGN E +11937..11938 ; Dives_Akuru # Mc [2] DIVES AKURU VOWEL SIGN AI..DIVES AKURU VOWEL SIGN O +1193B..1193C ; Dives_Akuru # Mn [2] DIVES AKURU SIGN ANUSVARA..DIVES AKURU SIGN CANDRABINDU +1193D ; Dives_Akuru # Mc DIVES AKURU SIGN HALANTA +1193E ; Dives_Akuru # Mn DIVES AKURU VIRAMA +1193F ; Dives_Akuru # Lo DIVES AKURU PREFIXED NASAL SIGN +11940 ; Dives_Akuru # Mc DIVES AKURU MEDIAL YA +11941 ; Dives_Akuru # Lo DIVES AKURU INITIAL RA +11942 ; Dives_Akuru # Mc DIVES AKURU MEDIAL RA +11943 ; Dives_Akuru # Mn DIVES AKURU SIGN NUKTA +11944..11946 ; Dives_Akuru # Po [3] DIVES AKURU DOUBLE DANDA..DIVES AKURU END OF TEXT MARK +11950..11959 ; Dives_Akuru # Nd [10] DIVES AKURU DIGIT ZERO..DIVES AKURU DIGIT NINE + +# Total code points: 72 + +# ================================================ + +16FE4 ; Khitan_Small_Script # Mn KHITAN SMALL SCRIPT FILLER +18B00..18CD5 ; Khitan_Small_Script # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5 + +# Total code points: 471 + +# ================================================ + +10E80..10EA9 ; Yezidi # Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET +10EAB..10EAC ; Yezidi # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK +10EAD ; Yezidi # Pd YEZIDI HYPHENATION MARK +10EB0..10EB1 ; Yezidi # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE + +# Total code points: 47 + +# ================================================ + +12F90..12FF0 ; Cypro_Minoan # Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 +12FF1..12FF2 ; Cypro_Minoan # Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 + +# Total code points: 99 + +# ================================================ + +10F70..10F81 ; Old_Uyghur # Lo [18] OLD UYGHUR LETTER ALEPH..OLD UYGHUR LETTER LESH +10F82..10F85 ; Old_Uyghur # Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW +10F86..10F89 ; Old_Uyghur # Po [4] OLD UYGHUR PUNCTUATION BAR..OLD UYGHUR PUNCTUATION FOUR DOTS + +# Total code points: 26 + +# ================================================ + +16A70..16ABE ; Tangsa # Lo [79] TANGSA LETTER OZ..TANGSA LETTER ZA +16AC0..16AC9 ; Tangsa # Nd [10] TANGSA DIGIT ZERO..TANGSA DIGIT NINE + +# Total code points: 89 + +# ================================================ + +1E290..1E2AD ; Toto # Lo [30] TOTO LETTER PA..TOTO LETTER A +1E2AE ; Toto # Mn TOTO SIGN RISING TONE + +# Total code points: 31 + +# ================================================ + +10570..1057A ; Vithkuqi # L& [11] VITHKUQI CAPITAL LETTER A..VITHKUQI CAPITAL LETTER GA +1057C..1058A ; Vithkuqi # L& [15] VITHKUQI CAPITAL LETTER HA..VITHKUQI CAPITAL LETTER RE +1058C..10592 ; Vithkuqi # L& [7] VITHKUQI CAPITAL LETTER SE..VITHKUQI CAPITAL LETTER XE +10594..10595 ; Vithkuqi # L& [2] VITHKUQI CAPITAL LETTER Y..VITHKUQI CAPITAL LETTER ZE +10597..105A1 ; Vithkuqi # L& [11] VITHKUQI SMALL LETTER A..VITHKUQI SMALL LETTER GA +105A3..105B1 ; Vithkuqi # L& [15] VITHKUQI SMALL LETTER HA..VITHKUQI SMALL LETTER RE +105B3..105B9 ; Vithkuqi # L& [7] VITHKUQI SMALL LETTER SE..VITHKUQI SMALL LETTER XE +105BB..105BC ; Vithkuqi # L& [2] VITHKUQI SMALL LETTER Y..VITHKUQI SMALL LETTER ZE + +# Total code points: 70 + +# EOF diff --git a/admin/unidata/unidata-gen.el b/admin/unidata/unidata-gen.el index d6b5a476bb0..f0538d70e21 100644 --- a/admin/unidata/unidata-gen.el +++ b/admin/unidata/unidata-gen.el @@ -1449,20 +1449,24 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)." (format ";;; %s ends here\n" basename))))) (or noninteractive (message "Generating %s...done" file))) -(defun unidata-gen-charprop (&optional charprop-file) +(defun unidata-gen-charprop (&optional charprop-file text) (or charprop-file (setq charprop-file (pop command-line-args-left))) (with-temp-file charprop-file (insert ";; Automatically generated by unidata-gen.el." " -*- lexical-binding: t -*-\n" ";; See the admin/unidata/ directory in the Emacs sources.\n") - (dolist (elt unidata-file-alist) - (dolist (proplist (cdr elt)) - (insert (format "(define-char-code-property '%S %S\n %S)\n" - (unidata-prop-prop proplist) (car elt) - (unidata-prop-docstring proplist))))) + (if text + (insert text) + (dolist (elt unidata-file-alist) + (dolist (proplist (cdr elt)) + (insert (format "(define-char-code-property '%S %S\n %S)\n" + (unidata-prop-prop proplist) (car elt) + (unidata-prop-docstring proplist)))))) (or noninteractive (message "Writing %s..." charprop-file)) (insert "\n" - "(provide 'charprop)\n" + (format "(provide '%s)\n" + (file-name-sans-extension + (file-name-nondirectory charprop-file))) "\n" ";; Local Variables:\n" ";; coding: utf-8\n" @@ -1473,6 +1477,105 @@ Property value is a symbol `o' (Open), `c' (Close), or `n' (None)." (format ";;; %s ends here\n" (file-name-nondirectory charprop-file))))) +(defun unidata-gen-scripts (&optional file) + ;; Running from Makefile. + (unless file + (setq file (pop command-line-args-left))) + (let ((aliases (unidata-gen--read-script-aliases)) + (table (make-char-table nil)) + (segmented (make-hash-table :test #'equal))) + ;; First parse the scripts. + (with-temp-buffer + (unidata-gen--insert-file "Scripts.txt") + (while (not (eobp)) + ;; 1700..1711 ; Tagalog # Lo [18] TAGALOG LETTER A..TAGALOG + (when (looking-at "\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^ ]+\\) +#") + (let ((start (string-to-number (match-string 1) 16)) + (end (and (match-string 2) + (string-to-number (match-string 2) 16))) + (scripts (list (intern (string-replace + "_" "-" + (downcase (match-string 3))))))) + (set-char-table-range + table (if end (cons start end) start) scripts))) + (forward-line 1))) + + ;; Then parse the file that lists "other scripts" that characters + ;; may appear in, and add those. + (with-temp-buffer + (unidata-gen--insert-file "ScriptExtensions.txt") + (while (not (eobp)) + ;; 102E0 ; Arab Copt # Mn COPTIC EPACT THOUSANDS MARK + (when (looking-at "\\([0-9A-F]+\\)\\(?:\\.\\.\\([0-9A-F]+\\)\\)? +; +\\([^#]+\\)") + (let ((start (string-to-number (match-string 1) 16)) + (end (and (match-string 2) + (string-to-number (match-string 2) 16))) + (scripts + (mapcar + (lambda (alias) + (intern (string-replace + "_" "-" (downcase + (gethash alias aliases))))) + (split-string (string-trim (match-string 3)))))) + (dolist (script scripts) + (dotimes (i (- (1+ (or end start)) start)) + (set-char-table-range + table (+ i start) + (append (elt table (+ i start)) (list script))))))) + (forward-line 1))) + + ;; Then go through the data and collect into buckets based on + ;; identical script lists. + (map-char-table + (lambda (key value) + ;; `map-char-table' is reused, so copy it. + (push (if (consp key) + (cons (car key) (cdr key)) + key) + ;; Keep the first element first, but sort the rest. + (gethash (cons (car value) + (sort (remq (car value) value) #'string<)) + segmented))) + table) + + ;; Then go through the data and collect into buckets based on + (let ((scripts nil)) + (maphash + (lambda (segment chars) + (push (cons segment chars) scripts)) + segmented) + (setq scripts (sort scripts (lambda (s1 s2) + (string< (caar s1) (caar s2))))) + (with-temp-buffer + (insert "(textsec--create-script-table '(\n") + (dolist (script scripts) + (insert "(" (prin1-to-string (car script)) "\n") + (insert " " (prin1-to-string (cdr script))) + (insert ")\n")) + (insert "))\n") + ;; Write the file. + (unidata-gen-charprop file (buffer-string)))))) + +(defun unidata-gen--read-script-aliases () + (let ((aliases (make-hash-table :test #'equal))) + (with-temp-buffer + (unidata-gen--insert-file "PropertyValueAliases.txt") + (unless (re-search-forward "^# Script " nil t) + (error "Can't find the Script section")) + (forward-line 2) + (while (looking-at "sc *;") + (let ((elem (split-string (buffer-substring (point) (line-end-position)) + ";" nil "[ \t]+"))) + (setf (gethash (nth 1 elem) aliases) + (nth 2 elem))) + (forward-line 1)) + aliases))) + +(defun unidata-gen--insert-file (name) + (insert-file-contents + (expand-file-name (concat "../admin/unidata/" name) + data-directory))) + ;;; unidata-gen.el ends here @@ -951,6 +951,10 @@ The input must be encoded text. * Lisp Changes in Emacs 29.1 +--- +** The Gnus range functions have been moved to a new library, range.el. +All the old names have been made obsolete. + +++ ** New function 'function-alias-p'. This predicate says whether an object is a function alias, and if it diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el new file mode 100644 index 00000000000..ab17623ec52 --- /dev/null +++ b/lisp/international/textsec.el @@ -0,0 +1,95 @@ +;;; textsec.el --- Functions for handling homoglyphs and the like -*- lexical-binding: t; -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'cl-lib) + +(defvar textsec--char-scripts nil) + +(eval-and-compile + (defun textsec--create-script-table (data) + "Create the textsec--char-scripts char table." + (setq textsec--char-scripts (make-char-table nil)) + (dolist (scripts data) + (dolist (range (cadr scripts)) + (set-char-table-range textsec--char-scripts + range (car scripts))))) + (require 'uni-scripts)) + +(defun textsec-scripts (string) + "Return a list of scripts used in STRING." + (seq-map (lambda (char) + (elt textsec--char-scripts char)) + string)) + +(defun textsec-single-script-p (string) + "Return non-nil if STRING is all in a single script. + +Note that the concept of \"single script\" used by this function +isn't obvious -- some mixtures of scripts count as a \"single +script\. See + + https://www.unicode.org/reports/tr39/#Mixed_Script_Detection + +for details." + (let ((scripts (mapcar (lambda (s) + (append s + (mapcan (lambda (script) + (copy-sequence + (textsec--augment-script script))) + s))) + (textsec-scripts string)))) + (catch 'empty + (cl-loop for s1 in scripts + do (cl-loop for s2 in scripts + when (and (not (memq 'common s1)) + (not (memq 'common s2)) + (not (memq 'inherited s1)) + (not (memq 'inherited s2)) + (not (seq-intersection s1 s2))) + do (throw 'empty nil))) + t))) + +(defun textsec--augment-script (script) + (cond + ((eq script 'han) + '(hangul japan korea)) + ((or (eq script 'hiragana) + (eq script 'katakana)) + '(japan)) + ((or (eq script 'hangul) + (eq script 'bopomofo)) + '(korea)))) + +(defun textsec-covering-scripts (string) + "Return a minimal list of scripts used in STRING." + (let* ((scripts (textsec-scripts string)) + (set (car scripts))) + (dolist (s scripts) + (setq set (seq-union set (seq-difference s set)))) + (delq 'common (delq 'inherited set)))) + +(provide 'textsec) + +;;; textsec.el ends here diff --git a/test/lisp/international/textsec-tests.el b/test/lisp/international/textsec-tests.el new file mode 100644 index 00000000000..c80b2ba0fdf --- /dev/null +++ b/test/lisp/international/textsec-tests.el @@ -0,0 +1,72 @@ +;;; textsec-tests.el --- Tests for textsec.el -*- lexical-binding: t; -*- + +;; Copyright (C) 2022 Free Software Foundation, Inc. + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; + +;;; Code: + +(require 'textsec) +(require 'ert) +(require 'ert-x) + +(ert-deftest test-scripts () + (should (equal (textsec-scripts "Circle") + '((latin) (latin) (latin) (latin) (latin) (latin)))) + (should (textsec-single-script-p "Circle")) + + (should (equal (textsec-scripts "СігсӀе") + '((cyrillic) (cyrillic) (cyrillic) + (cyrillic) (cyrillic) (cyrillic)))) + (should (textsec-single-script-p "СігсӀе")) + + (should (equal (textsec-scripts "Сirсlе") + '((cyrillic) (latin) (latin) (cyrillic) (latin) (cyrillic)))) + (should-not (textsec-single-script-p "Сirсlе")) + + (should (equal (textsec-scripts "Circ1e") + '((latin) (latin) (latin) (latin) (common) (latin)))) + (should (textsec-single-script-p "Circ1e")) + + (should (equal (textsec-scripts "C𝗂𝗋𝖼𝗅𝖾") + '((latin) (common) (common) (common) (common) (common)))) + (should (textsec-single-script-p "C𝗂𝗋𝖼𝗅𝖾")) + + (should (equal (textsec-scripts "𝖢𝗂𝗋𝖼𝗅𝖾") + '((common) (common) (common) (common) (common) (common)))) + (should (textsec-single-script-p "𝖢𝗂𝗋𝖼𝗅𝖾")) + + (should (equal (textsec-scripts "〆切") + '((common han) (han)))) + (should (textsec-single-script-p "〆切")) + + (should (equal (textsec-scripts "ねガ") + '((hiragana) (katakana)))) + (should (textsec-single-script-p "ねガ"))) + +(ert-deftest test-minimal-scripts () + (should (equal (textsec-covering-scripts "Circle") + '(latin))) + (should (equal (textsec-covering-scripts "Сirсlе") + '(cyrillic latin))) + (should (equal (textsec-covering-scripts "〆切") + '(han)))) + +;;; textsec-tests.el ends here |