summaryrefslogtreecommitdiff
path: root/lisp/json.el
diff options
context:
space:
mode:
authorPhilipp Stephani <phst@google.com>2016-10-24 21:54:51 +0200
committerPhilipp Stephani <phst@google.com>2017-01-01 13:24:14 +0100
commit93be35e038bbb19e8d64d3c1f9d1be76a9083d09 (patch)
tree7a9074fa83d51aa8136f57be267c83016c4f3978 /lisp/json.el
parentbaa370f255d2f9d3f662fac0de98eaadd3242aa6 (diff)
downloademacs-93be35e038bbb19e8d64d3c1f9d1be76a9083d09.tar.gz
Fix encoding of JSON surrogate pairs
JSON requires that such pairs be treated as UTF-16 surrogate pairs, not individual code points; cf. Bug #24784. * lisp/json.el (json-read-escaped-char): Fix decoding of surrogate pairs. (json--decode-utf-16-surrogates): New defun. * test/lisp/json-tests.el (test-json-read-string): Add test for surrogate pairs.
Diffstat (limited to 'lisp/json.el')
-rw-r--r--lisp/json.el15
1 files changed, 15 insertions, 0 deletions
diff --git a/lisp/json.el b/lisp/json.el
index 38f828e8fbb..b2ac356641b 100644
--- a/lisp/json.el
+++ b/lisp/json.el
@@ -363,6 +363,10 @@ representation will be parsed correctly."
;; String parsing
+(defun json--decode-utf-16-surrogates (high low)
+ "Return the code point represented by the UTF-16 surrogates HIGH and LOW."
+ (+ (lsh (- high #xD800) 10) (- low #xDC00) #x10000))
+
(defun json-read-escaped-char ()
"Read the JSON string escaped character at point."
;; Skip over the '\'
@@ -372,6 +376,17 @@ representation will be parsed correctly."
(cond
(special (cdr special))
((not (eq char ?u)) char)
+ ;; Special-case UTF-16 surrogate pairs,
+ ;; cf. https://tools.ietf.org/html/rfc7159#section-7. Note that
+ ;; this clause overlaps with the next one and therefore has to
+ ;; come first.
+ ((looking-at
+ (rx (group (any "Dd") (any "89ABab") (= 2 (any "0-9A-Fa-f")))
+ "\\u" (group (any "Dd") (any "C-Fc-f") (= 2 (any "0-9A-Fa-f")))))
+ (json-advance 10)
+ (json--decode-utf-16-surrogates
+ (string-to-number (match-string 1) 16)
+ (string-to-number (match-string 2) 16)))
((looking-at "[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]")
(let ((hex (match-string 0)))
(json-advance 4)