summaryrefslogtreecommitdiff
path: root/lisp/international/characters.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/international/characters.el')
-rw-r--r--lisp/international/characters.el220
1 files changed, 134 insertions, 86 deletions
diff --git a/lisp/international/characters.el b/lisp/international/characters.el
index 97bf31acfc3..a2156ee01aa 100644
--- a/lisp/international/characters.el
+++ b/lisp/international/characters.el
@@ -116,11 +116,11 @@ Base characters (Unicode General Category L,N,P,S,Zs)")
Combining diacritic or mark (Unicode General Category M)")
;; bidi types
-(define-category ?R "Right-to-left (strong)
+(define-category ?R "Strong R2L
Characters with \"strong\" right-to-left directionality, i.e.
with R, AL, RLE, or RLO Unicode bidi character type.")
-(define-category ?L "Left-to-right (strong)
+(define-category ?L "Strong L2R
Characters with \"strong\" left-to-right directionality, i.e.
with L, LRE, or LRO Unicode bidi character type.")
@@ -214,6 +214,9 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-category-entry '(#x31F0 . #x31FF) ?K)
(modify-category-entry '(#x30A0 . #x30FA) ?\|)
(modify-category-entry #x30FF ?\|)
+(modify-category-entry '(#x1AFF0 . #x1B000) ?K)
+(modify-category-entry '(#x1B120 . #x1B122) ?K)
+(modify-category-entry '(#x1B164 . #x1B167) ?K)
;; Hiragana block
(modify-category-entry '(#x3040 . #x309F) ?H)
@@ -221,8 +224,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-category-entry #x309F ?\|)
(modify-category-entry #x30A0 ?H)
(modify-category-entry #x30FC ?H)
+(modify-category-entry #x1B001 ?H)
+(modify-category-entry #x1B11F ?H)
+(modify-category-entry '(#x1B150 . #x1B152) ?H)
+(modify-category-entry '(#x1B002 . #x1B11E) ?H) ; Hentiagana
-(modify-category-entry '(#x1B000 . #x1B1FF) ?j)
+(modify-category-entry '(#x1AFF0 . #x1B1FF) ?j)
;; JISX0208
@@ -295,7 +302,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(map-charset-chars #'modify-category-entry (car charsets) ?b)
(setq charsets (cdr charsets))))
(modify-category-entry '(#x600 . #x6ff) ?b)
-(modify-category-entry '(#x8a0 . #x8ff) ?b)
+(modify-category-entry '(#x870 . #x8ff) ?b)
(modify-category-entry '(#xfb50 . #xfdff) ?b)
(modify-category-entry '(#xfe70 . #xfefe) ?b)
@@ -306,7 +313,9 @@ with L, LRE, or LRO Unicode bidi character type.")
;; Ethiopic character set
(modify-category-entry '(#x1200 . #x1399) ?e)
-(modify-category-entry '(#x2d80 . #x2dde) ?e)
+(modify-category-entry '(#X2D80 . #X2DDE) ?e)
+(modify-category-entry '(#xAB01 . #xAB2E) ?e)
+(modify-category-entry '(#x1E7E0 . #x1E7FE) ?e)
(let ((chars '(?፡ ?። ?፣ ?፤ ?፥ ?፦ ?፧ ?፨)))
(while chars
(modify-syntax-entry (car chars) ".")
@@ -580,6 +589,12 @@ with L, LRE, or LRO Unicode bidi character type.")
(modify-category-entry c ?l)
(setq c (1+ c)))
+ ;; Latin Extended-G
+ (setq c #x1DF00)
+ (while (<= c #x1DFFF)
+ (modify-category-entry c ?l)
+ (setq c (1+ c)))
+
;; Greek
(modify-category-entry '(#x0370 . #x03FF) ?g)
@@ -1016,7 +1031,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x0D41 . #x0D44)
(#x0D4D . #x0D4D)
(#x0D62 . #x0D63)
- (#x0D81 . #x0D81)
+ (#x0D81 . #x0D81)
(#x0DCA . #x0DCA)
(#x0DD2 . #x0DD6)
(#x0E31 . #x0E31)
@@ -1045,7 +1060,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1085 . #x1086)
(#x108D . #x108D)
(#x109D . #x109D)
- (#x1160 . #x11FF)
+ (#x1160 . #x11FF)
(#x135D . #x135F)
(#x1712 . #x1714)
(#x1732 . #x1734)
@@ -1111,7 +1126,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xA806 . #xA806)
(#xA80B . #xA80B)
(#xA825 . #xA826)
- (#xA82C . #xA82C)
+ (#xA82C . #xA82C)
(#xA8C4 . #xA8C5)
(#xA8E0 . #xA8F1)
(#xA926 . #xA92D)
@@ -1136,7 +1151,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xABE5 . #xABE5)
(#xABE8 . #xABE8)
(#xABED . #xABED)
- (#xD7B0 . #xD7FB)
+ (#xD7B0 . #xD7FB)
(#xFB1E . #xFB1E)
(#xFE00 . #xFE0F)
(#xFE20 . #xFE2F)
@@ -1148,7 +1163,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x10A01 . #x10A0F)
(#x10A38 . #x10A3F)
(#x10AE5 . #x10AE6)
- (#x10EAB . #x10EAC)
+ (#x10EAB . #x10EAC)
(#x11001 . #x11001)
(#x11038 . #x11046)
(#x1107F . #x11081)
@@ -1162,7 +1177,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x11180 . #x11181)
(#x111B6 . #x111BE)
(#x111CA . #x111CC)
- (#x111CF . #x111CF)
+ (#x111CF . #x111CF)
(#x1122F . #x11231)
(#x11234 . #x11234)
(#x11236 . #x11237)
@@ -1194,9 +1209,9 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1171D . #x1171F)
(#x11722 . #x11725)
(#x11727 . #x1172B)
- (#x1193B . #x1193C)
- (#x1193E . #x1193E)
- (#x11943 . #x11943)
+ (#x1193B . #x1193C)
+ (#x1193E . #x1193E)
+ (#x11943 . #x11943)
(#x11C30 . #x11C36)
(#x11C38 . #x11C3D)
(#x11C92 . #x11CA7)
@@ -1206,7 +1221,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x16AF0 . #x16AF4)
(#x16B30 . #x16B36)
(#x16F8F . #x16F92)
- (#x16FE4 . #x16FE4)
+ (#x16FE4 . #x16FE4)
(#x1BC9D . #x1BC9E)
(#x1BCA0 . #x1BCA3)
(#x1D167 . #x1D169)
@@ -1280,18 +1295,19 @@ with L, LRE, or LRO Unicode bidi character type.")
(#xFF01 . #xFF60)
(#xFFE0 . #xFFE6)
(#x16FE0 . #x16FE4)
- (#x16FF0 . #x16FF1)
+ (#x16FF0 . #x16FF1)
(#x17000 . #x187F7)
(#x18800 . #x18AFF)
- (#x18B00 . #x18CD5)
+ (#x18B00 . #x18CD5)
+ (#x1AFF0 . #x1AFFF)
(#x1B000 . #x1B152)
- (#x1B164 . #x1B167)
- (#x1B170 . #x1B2FB)
+ (#x1B164 . #x1B167)
+ (#x1B170 . #x1B2FB)
(#x1F004 . #x1F004)
(#x1F0CF . #x1F0CF)
(#x1F18E . #x1F18E)
(#x1F191 . #x1F19A)
- (#x1F1AD . #x1F1AD)
+ (#x1F1AD . #x1F1AD)
(#x1F200 . #x1F320)
(#x1F32D . #x1F335)
(#x1F337 . #x1F37C)
@@ -1316,27 +1332,26 @@ with L, LRE, or LRO Unicode bidi character type.")
(#x1F680 . #x1F6C5)
(#x1F6CC . #x1F6CC)
(#x1F6D0 . #x1F6D2)
- (#x1F6D5 . #x1F6D7)
+ (#x1F6D5 . #x1F6D7)
+ (#x1F6DD . #x1F6DF)
(#x1F6EB . #x1F6EC)
(#x1F6F4 . #x1F6FC)
- (#x1F7E0 . #x1F7EB)
+ (#x1F7E0 . #x1F7F0)
(#x1F90C . #x1F93A)
- (#x1F93C . #x1F945)
- (#x1F947 . #x1F978)
- (#x1F97A . #x1F9CB)
- (#x1F9A5 . #x1F9AA)
- (#x1F9AE . #x1F9CA)
- (#x1F9CD . #x1F9FF)
- (#x1FA00 . #x1FA53)
- (#x1FA60 . #x1FA6D)
- (#x1FA70 . #x1FA74)
- (#x1FA78 . #x1FA7A)
- (#x1FA80 . #x1FA86)
- (#x1FA90 . #x1FAA8)
- (#x1FAB0 . #x1FAB6)
- (#x1FAC0 . #x1FAC2)
- (#x1FAD0 . #x1FAD6)
- (#x1FB00 . #x1FB92)
+ (#x1F93C . #x1F945)
+ (#x1F947 . #x1F9FF)
+ (#x1FA00 . #x1FA53)
+ (#x1FA60 . #x1FA6D)
+ (#x1FA70 . #x1FA74)
+ (#x1FA78 . #x1FA7C)
+ (#x1FA80 . #x1FA86)
+ (#x1FA90 . #x1FAAC)
+ (#x1FAB0 . #x1FABA)
+ (#x1FAC0 . #x1FAC5)
+ (#x1FAD0 . #x1FAD9)
+ (#x1FAE0 . #x1FAE7)
+ (#x1FAF0 . #x1FAF6)
+ (#x1FB00 . #x1FB92)
(#x20000 . #x2FFFF)
(#x30000 . #x3FFFF))))
(dolist (elt l)
@@ -1401,7 +1416,7 @@ with L, LRE, or LRO Unicode bidi character type.")
(defun use-default-char-width-table ()
"Internal use only.
-Setup char-width-table appropriate for non-CJK language environment."
+Setup `char-width-table' appropriate for non-CJK language environment."
(while (char-table-parent char-width-table)
(setq char-width-table (char-table-parent char-width-table))))
@@ -1413,8 +1428,12 @@ Setup char-width-table appropriate for non-CJK language environment."
(if dump-mode
;; While dumping, we can't use require, and international is not
;; in load-path.
- (load "international/charscript")
- (require 'charscript))
+ (progn
+ (load "international/charscript")
+ (load "international/emoji-zwj"))
+ (progn
+ (require 'charscript)
+ (require 'emoji-zwj)))
(map-charset-chars
(lambda (range _ignore)
@@ -1474,6 +1493,9 @@ Setup char-width-table appropriate for non-CJK language environment."
(aset char-acronym-table #x202D "LRO") ; LEFT-TO-RIGHT OVERRIDE
(aset char-acronym-table #x202E "RLO") ; RIGHT-TO-LEFT OVERRIDE
(aset char-acronym-table #x2060 "WJ") ; WORD JOINER
+(aset char-acronym-table #x2066 "LRI") ; LEFT-TO-RIGHT ISOLATE
+(aset char-acronym-table #x2067 "RLI") ; RIGHT-TO-LEFT ISOLATE
+(aset char-acronym-table #x2069 "PDI") ; POP DIRECTIONAL ISOLATE
(aset char-acronym-table #x206A "ISS") ; INHIBIT SYMMETRIC SWAPPING
(aset char-acronym-table #x206B "ASS") ; ACTIVATE SYMMETRIC SWAPPING
(aset char-acronym-table #x206C "IAFS") ; INHIBIT ARABIC FORM SHAPING
@@ -1498,18 +1520,32 @@ Setup char-width-table appropriate for non-CJK language environment."
(aset char-acronym-table (+ #xE0021 i) (format " %c TAG" (+ 33 i))))
(aset char-acronym-table #xE007F "->|TAG") ; CANCEL TAG
+;; We can't use the \N{name} things here, because this file is used
+;; too early in the build process.
+(defvar glyphless--bidi-control-characters
+ '(#x202a ; ?\N{left-to-right embedding}
+ #x202b ; ?\N{right-to-left embedding}
+ #x202d ; ?\N{left-to-right override}
+ #x202e ; ?\N{right-to-left override}
+ #x2066 ; ?\N{left-to-right isolate}
+ #x2067 ; ?\N{right-to-left isolate}
+ #x2068 ; ?\N{first strong isolate}
+ #x202c ; ?\N{pop directional formatting}
+ #x2069)) ; ?\N{pop directional isolate})
+
(defun update-glyphless-char-display (&optional variable value)
"Make the setting of `glyphless-char-display-control' take effect.
This function updates the char-table `glyphless-char-display',
and is intended to be used in the `:set' attribute of the
option `glyphless-char-display'."
- (when value
+ (when variable
(set-default variable value))
(dolist (elt value)
(let ((target (car elt))
(method (cdr elt)))
- (or (memq method '(zero-width thin-space empty-box acronym hex-code))
- (error "Invalid glyphless character display method: %s" method))
+ (unless (memq method '( zero-width thin-space empty-box
+ acronym hex-code bidi-control))
+ (error "Invalid glyphless character display method: %s" method))
(cond ((eq target 'c0-control)
(glyphless-set-char-table-range glyphless-char-display
#x00 #x1F method)
@@ -1521,24 +1557,32 @@ option `glyphless-char-display'."
((eq target 'c1-control)
(glyphless-set-char-table-range glyphless-char-display
#x80 #x9F method))
- ((eq target 'format-control)
+ ((eq target 'variation-selectors)
+ (glyphless-set-char-table-range glyphless-char-display
+ #xFE00 #xFE0F method))
+ ((or (eq target 'format-control)
+ (eq target 'bidi-control))
(when unicode-category-table
(map-char-table
(lambda (char category)
- (if (eq category 'Cf)
- (let ((this-method method)
- from to)
- (if (consp char)
- (setq from (car char) to (cdr char))
- (setq from char to char))
- (while (<= from to)
- (when (/= from #xAD)
- (if (eq method 'acronym)
- (setq this-method
- (aref char-acronym-table from)))
+ (when (eq category 'Cf)
+ (let ((this-method method)
+ from to)
+ (if (consp char)
+ (setq from (car char) to (cdr char))
+ (setq from char to char))
+ (while (<= from to)
+ (when (/= from #xAD)
+ (when (eq method 'acronym)
+ (setq this-method
+ (or (aref char-acronym-table from)
+ "UNK")))
+ (when (or (eq target 'format-control)
+ (memq from
+ glyphless--bidi-control-characters))
(set-char-table-range glyphless-char-display
- from this-method))
- (setq from (1+ from))))))
+ from this-method)))
+ (setq from (1+ from))))))
unicode-category-table)))
((eq target 'no-font)
(set-char-table-extra-slot glyphless-char-display 0 method))
@@ -1554,8 +1598,22 @@ option `glyphless-char-display'."
(set-char-table-range chartable (cons from to) method)))
;;; Control of displaying glyphless characters.
+(define-widget 'glyphless-char-display-method 'lazy
+ "Display method for glyphless characters."
+ :group 'mule
+ :format "%v"
+ :value 'thin-space
+ :type
+ '(choice
+ (const :tag "Don't display" zero-width)
+ (const :tag "Display as thin space" thin-space)
+ (const :tag "Display as empty box" empty-box)
+ (const :tag "Display acronym" acronym)
+ (const :tag "Display hex code in a box" hex-code)))
+
(defcustom glyphless-char-display-control
'((format-control . thin-space)
+ (variation-selectors . thin-space)
(no-font . hex-code))
"List of directives to control display of glyphless characters.
@@ -1571,9 +1629,17 @@ GROUP must be one of these symbols:
such as U+200C (ZWNJ), U+200E (LRM), but
excluding characters that have graphic images,
such as U+00AD (SHY).
- `no-font': characters for which no suitable font is found.
- For character terminals, characters that cannot
- be encoded by `terminal-coding-system'.
+ `bidi-control': A subset of `format-control', but only characters
+ that are relevant for bidirectional formatting control,
+ like U+2069 (PDI) and U+202B (RLE).
+ `variation-selectors':
+ Characters in the range U+FE00..U+FE0F, used for
+ selecting alternate glyph presentations, such as
+ Emoji vs Text presentation, of the preceding
+ character(s).
+ `no-font': For GUI frames, characters for which no suitable
+ font is found; for text-mode frames, characters
+ that cannot be encoded by `terminal-coding-system'.
METHOD must be one of these symbols:
`zero-width': don't display.
@@ -1588,33 +1654,15 @@ Do not set its value directly from Lisp; the value takes effect
only via a custom `:set'
function (`update-glyphless-char-display'), which updates
`glyphless-char-display'."
- :version "24.1"
+ :version "28.1"
:type '(alist :key-type (symbol :tag "Character Group")
:value-type (symbol :tag "Display Method"))
- :options '((c0-control
- (choice (const :tag "Don't display" zero-width)
- (const :tag "Display as thin space" thin-space)
- (const :tag "Display as empty box" empty-box)
- (const :tag "Display acronym" acronym)
- (const :tag "Display hex code in a box" hex-code)))
- (c1-control
- (choice (const :tag "Don't display" zero-width)
- (const :tag "Display as thin space" thin-space)
- (const :tag "Display as empty box" empty-box)
- (const :tag "Display acronym" acronym)
- (const :tag "Display hex code in a box" hex-code)))
- (format-control
- (choice (const :tag "Don't display" zero-width)
- (const :tag "Display as thin space" thin-space)
- (const :tag "Display as empty box" empty-box)
- (const :tag "Display acronym" acronym)
- (const :tag "Display hex code in a box" hex-code)))
- (no-font
- (choice (const :tag "Don't display" zero-width)
- (const :tag "Display as thin space" thin-space)
- (const :tag "Display as empty box" empty-box)
- (const :tag "Display acronym" acronym)
- (const :tag "Display hex code in a box" hex-code))))
+ :options '((c0-control glyphless-char-display-method)
+ (c1-control glyphless-char-display-method)
+ (format-control glyphless-char-display-method)
+ (bidi-control glyphless-char-display-method)
+ (variation-selectors glyphless-char-display-method)
+ (no-font (glyphless-char-display-method :value hex-code)))
:set 'update-glyphless-char-display
:group 'display)