summaryrefslogtreecommitdiff
path: root/lisp/net/shr.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/net/shr.el')
-rw-r--r--lisp/net/shr.el349
1 files changed, 139 insertions, 210 deletions
diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index 48590fd675a..44fb5ec6e9a 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -40,6 +40,7 @@
(require 'image)
(require 'puny)
(require 'url-cookie)
+(require 'pixel-fill)
(require 'text-property-search)
(defgroup shr nil
@@ -56,8 +57,15 @@ fit these criteria."
:version "24.1"
:type 'float)
+(defcustom shr-allowed-images nil
+ "If non-nil, only images that match this regexp are displayed.
+If nil, all URLs are allowed. Also see `shr-blocked-images'."
+ :version "29.1"
+ :type '(choice (const nil) regexp))
+
(defcustom shr-blocked-images nil
- "Images that have URLs matching this regexp will be blocked."
+ "Images that have URLs matching this regexp will be blocked.
+If nil, no images are blocked. Also see `shr-allowed-images'."
:version "24.1"
:type '(choice (const nil) regexp))
@@ -162,6 +170,10 @@ cid: URL as the argument.")
(defvar shr-put-image-function #'shr-put-image
"Function called to put image and alt string.")
+(defface shr-text '((t :inherit variable-pitch-text))
+ "Face used for rendering text."
+ :version "29.1")
+
(defface shr-strike-through '((t :strike-through t))
"Face for <s> elements."
:version "24.1")
@@ -183,6 +195,11 @@ temporarily blinks with this face."
"Face for <abbr> elements."
:version "27.1")
+(defface shr-sup
+ '((t :height 0.8))
+ "Face for <sup> and <sub> elements."
+ :version "29.1")
+
(defface shr-h1
'((t :height 1.3 :weight bold))
"Face for <h1> elements."
@@ -231,7 +248,6 @@ and other things:
(defvar shr-internal-width nil)
(defvar shr-list-mode nil)
(defvar shr-content-cache nil)
-(defvar shr-kinsoku-shorten nil)
(defvar shr-table-depth 0)
(defvar shr-stylesheet nil)
(defvar shr-base nil)
@@ -246,24 +262,23 @@ and other things:
(defvar shr-target-id nil
"Target fragment identifier anchor.")
-
-(defvar shr-map
- (let ((map (make-sparse-keymap)))
- (define-key map "a" #'shr-show-alt-text)
- (define-key map "i" #'shr-browse-image)
- (define-key map "z" #'shr-zoom-image)
- (define-key map [?\t] #'shr-next-link)
- (define-key map [?\M-\t] #'shr-previous-link)
- (define-key map [follow-link] 'mouse-face)
- (define-key map [mouse-2] #'shr-browse-url)
- (define-key map [C-down-mouse-1] #'shr-mouse-browse-url-new-window)
- (define-key map "I" #'shr-insert-image)
- (define-key map "w" #'shr-maybe-probe-and-copy-url)
- (define-key map "u" #'shr-maybe-probe-and-copy-url)
- (define-key map "v" #'shr-browse-url)
- (define-key map "O" #'shr-save-contents)
- (define-key map "\r" #'shr-browse-url)
- map))
+(defvar shr--link-targets nil)
+
+(defvar-keymap shr-map
+ "a" #'shr-show-alt-text
+ "i" #'shr-browse-image
+ "z" #'shr-zoom-image
+ "TAB" #'shr-next-link
+ "C-M-i" #'shr-previous-link
+ "<follow-link>" 'mouse-face
+ "<mouse-2>" #'shr-browse-url
+ "C-<down-mouse-1>" #'shr-mouse-browse-url-new-window
+ "I" #'shr-insert-image
+ "w" #'shr-maybe-probe-and-copy-url
+ "u" #'shr-maybe-probe-and-copy-url
+ "v" #'shr-browse-url
+ "O" #'shr-save-contents
+ "RET" #'shr-browse-url)
(defvar shr-image-map
(let ((map (copy-keymap shr-map)))
@@ -305,6 +320,18 @@ and other things:
(or (not (zerop (fringe-columns 'right)))
(not (zerop (fringe-columns 'left))))))
+(defun shr--window-width ()
+ ;; Compute the width based on the window width. We need to
+ ;; adjust the available width for when the user disables
+ ;; the fringes, which will cause the display engine usurp
+ ;; one column for the continuation glyph.
+ (if (not shr-use-fonts)
+ (- (window-body-width) 1
+ (if (shr--have-one-fringe-p)
+ 1
+ 0))
+ (pixel-fill-width)))
+
;;;###autoload
(defun shr-insert-document (dom)
"Render the parsed document DOM into the current buffer.
@@ -326,22 +353,9 @@ DOM should be a parse tree as generated by
(if (not shr-use-fonts)
shr-width
(* shr-width (frame-char-width)))
- ;; Compute the width based on the window width. We need to
- ;; adjust the available width for when the user disables
- ;; the fringes, which will cause the display engine usurp
- ;; one column for the continuation glyph.
- (if (not shr-use-fonts)
- (- (window-body-width) 1
- (if (shr--have-one-fringe-p)
- 1
- 0))
- (- (window-body-width nil t)
- (* 2 (frame-char-width))
- (if (shr--have-one-fringe-p)
- 0
- (* (frame-char-width) 2))
- 1))))
+ (shr--window-width)))
(max-specpdl-size max-specpdl-size)
+ (shr--link-targets nil)
;; `bidi-display-reordering' is supposed to be only used for
;; debugging purposes, but Shr's naïve filling algorithm
;; cannot cope with the complexity of RTL text in an LTR
@@ -365,9 +379,22 @@ DOM should be a parse tree as generated by
(shr-descend dom)
(shr-fill-lines start (point))
(shr--remove-blank-lines-at-the-end start (point))
+ (shr--set-target-ids shr--link-targets)
(when shr-warning
(message "%s" shr-warning))))
+(defun shr--set-target-ids (ids)
+ ;; If the buffer is empty, there's no point in setting targets.
+ (unless (zerop (buffer-size))
+ ;; We may have several targets in the same place (if you have
+ ;; several <span id='foo'> things after one another). So group
+ ;; them by position.
+ (dolist (group (seq-group-by #'cdr ids))
+ (let ((point (min (1- (point-max)) (car group))))
+ (put-text-property point (1+ point)
+ 'shr-target-id
+ (mapcar #'car (cdr group)))))))
+
(defun shr--remove-blank-lines-at-the-end (start end)
(save-restriction
(save-excursion
@@ -547,6 +574,12 @@ size, and full-buffer size."
(shr-insert sub)
(shr-descend sub))))
+(defun shr-image-blocked-p (url)
+ (or (and shr-blocked-images
+ (string-match shr-blocked-images url))
+ (and shr-allowed-images
+ (not (string-match shr-allowed-images url)))))
+
(defun shr-indirect-call (tag-name dom &rest args)
(let ((function (intern (concat "shr-tag-" (symbol-name tag-name)) obarray))
;; Allow other packages to override (or provide) rendering
@@ -577,7 +610,7 @@ size, and full-buffer size."
(setq shr-warning
"Not rendering the complete page because of too-deep nesting")
(when style
- (if (string-match "color\\|display\\|border-collapse" style)
+ (if (string-match-p "color\\|display\\|border-collapse" style)
(setq shr-stylesheet (nconc (shr-parse-style style)
shr-stylesheet))
(setq style nil)))
@@ -596,16 +629,8 @@ size, and full-buffer size."
(funcall function dom))
(t
(shr-generic dom)))
- (when-let* ((id (dom-attr dom 'id)))
- ;; If the element was empty, we don't have anything to put the
- ;; anchor on. So just insert a dummy character.
- (when (= start (point))
- (if (not (bolp))
- (insert ? )
- (insert ? )
- (shr-mark-fill start))
- (put-text-property (1- (point)) (point) 'display ""))
- (put-text-property (1- (point)) (point) 'shr-target-id id))
+ (when-let ((id (dom-attr dom 'id)))
+ (push (cons id (point)) shr--link-targets))
;; If style is set, then this node has set the color.
(when style
(shr-colorize-region
@@ -619,43 +644,11 @@ size, and full-buffer size."
(with-temp-buffer
(let ((shr-indentation 0)
(shr-start nil)
- (shr-internal-width (- (window-body-width nil t)
- (* 2 (frame-char-width))
- ;; Adjust the window width for when
- ;; the user disables the fringes,
- ;; which causes the display engine
- ;; to usurp one column for the
- ;; continuation glyph.
- (if (and (null shr-width)
- (not (shr--have-one-fringe-p)))
- (* (frame-char-width) 2)
- 0))))
+ (shr-internal-width (shr--window-width)))
(shr-insert text)
(shr-fill-lines (point-min) (point-max))
(buffer-string)))))
-(define-inline shr-char-breakable-p (char)
- "Return non-nil if a line can be broken before and after CHAR."
- (inline-quote (aref fill-find-break-point-function-table ,char)))
-(define-inline shr-char-nospace-p (char)
- "Return non-nil if no space is required before and after CHAR."
- (inline-quote (aref fill-nospace-between-words-table ,char)))
-
-;; KINSOKU is a Japanese word meaning a rule that should not be violated.
-;; In Emacs, it is a term used for characters, e.g. punctuation marks,
-;; parentheses, and so on, that should not be placed in the beginning
-;; of a line or the end of a line.
-(define-inline shr-char-kinsoku-bol-p (char)
- "Return non-nil if a line ought not to begin with CHAR."
- (inline-letevals (char)
- (inline-quote (and (not (eq ,char ?'))
- (aref (char-category-set ,char) ?>)))))
-(define-inline shr-char-kinsoku-eol-p (char)
- "Return non-nil if a line ought not to end with CHAR."
- (inline-quote (aref (char-category-set ,char) ?<)))
-(unless (shr-char-kinsoku-bol-p (make-char 'japanese-jisx0208 33 35))
- (load "kinsoku" nil t))
-
(defun shr-pixel-column ()
(if (not shr-use-fonts)
(current-column)
@@ -669,6 +662,7 @@ size, and full-buffer size."
(car (window-text-pixel-size nil (line-beginning-position) (point))))))
(defun shr-pixel-region ()
+ (declare (obsolete nil "29.1"))
(- (shr-pixel-column)
(save-excursion
(goto-char (mark))
@@ -711,7 +705,7 @@ size, and full-buffer size."
(goto-char (point-max)))))
(t
(let ((font-start (point)))
- (when (and (string-match "\\`[ \t\n\r]" text)
+ (when (and (string-match-p "\\`[ \t\n\r]" text)
(not (bolp))
(not (eq (char-after (1- (point))) ? )))
(insert " "))
@@ -739,7 +733,7 @@ size, and full-buffer size."
(when shr-use-fonts
(put-text-property font-start (point)
'face
- (or shr-current-font 'variable-pitch)))))))))
+ (or shr-current-font 'shr-text)))))))))
(defun shr-fill-lines (start end)
(if (<= shr-internal-width 0)
@@ -788,7 +782,7 @@ size, and full-buffer size."
(while (not (eolp))
;; We have to do some folding. First find the first
;; previous point suitable for folding.
- (if (or (not (shr-find-fill-point (line-beginning-position)))
+ (if (or (not (pixel-fill-find-fill-point (line-beginning-position)))
(= (point) start))
;; We had unbreakable text (for this width), so just go to
;; the first space and carry on.
@@ -829,84 +823,6 @@ size, and full-buffer size."
(when (looking-at " $")
(delete-region (point) (line-end-position)))))))
-(defun shr-find-fill-point (start)
- (let ((bp (point))
- (end (point))
- failed)
- (while (not (or (setq failed (<= (point) start))
- (eq (preceding-char) ? )
- (eq (following-char) ? )
- (shr-char-breakable-p (preceding-char))
- (shr-char-breakable-p (following-char))
- (and (shr-char-kinsoku-bol-p (preceding-char))
- (shr-char-breakable-p (following-char))
- (not (shr-char-kinsoku-bol-p (following-char))))
- (shr-char-kinsoku-eol-p (following-char))
- (bolp)))
- (backward-char 1))
- (if failed
- ;; There's no breakable point, so we give it up.
- (let (found)
- (goto-char bp)
- ;; Don't overflow the window edge, even if
- ;; shr-kinsoku-shorten is nil.
- (unless (or shr-kinsoku-shorten (null shr-width))
- (while (setq found (re-search-forward
- "\\(\\c>\\)\\| \\|\\c<\\|\\c|"
- (line-end-position) 'move)))
- (if (and found
- (not (match-beginning 1)))
- (goto-char (match-beginning 0)))))
- (or
- (eolp)
- ;; Don't put kinsoku-bol characters at the beginning of a line,
- ;; or kinsoku-eol characters at the end of a line.
- (cond
- ;; Don't overflow the window edge, even if shr-kinsoku-shorten
- ;; is nil.
- ((or shr-kinsoku-shorten (null shr-width))
- (while (and (not (memq (preceding-char) (list ?\C-@ ?\n ? )))
- (or (shr-char-kinsoku-eol-p (preceding-char))
- (shr-char-kinsoku-bol-p (following-char))))
- (backward-char 1))
- (when (setq failed (<= (point) start))
- ;; There's no breakable point that doesn't violate kinsoku,
- ;; so we look for the second best position.
- (while (and (progn
- (forward-char 1)
- (<= (point) end))
- (progn
- (setq bp (point))
- (shr-char-kinsoku-eol-p (following-char)))))
- (goto-char bp)))
- ((shr-char-kinsoku-eol-p (preceding-char))
- ;; Find backward the point where kinsoku-eol characters begin.
- (let ((count 4))
- (while
- (progn
- (backward-char 1)
- (and (> (setq count (1- count)) 0)
- (not (memq (preceding-char) (list ?\C-@ ?\n ? )))
- (or (shr-char-kinsoku-eol-p (preceding-char))
- (shr-char-kinsoku-bol-p (following-char)))))))
- (when (setq failed (<= (point) start))
- ;; There's no breakable point that doesn't violate kinsoku,
- ;; so we go to the second best position.
- (if (looking-at "\\(\\c<+\\)\\c<")
- (goto-char (match-end 1))
- (forward-char 1))))
- ((shr-char-kinsoku-bol-p (following-char))
- ;; Find forward the point where kinsoku-bol characters end.
- (let ((count 4))
- (while (progn
- (forward-char 1)
- (and (>= (setq count (1- count)) 0)
- (shr-char-kinsoku-bol-p (following-char))
- (shr-char-breakable-p (following-char))))))))
- (when (eq (following-char) ? )
- (forward-char 1))))
- (not failed)))
-
(defun shr-parse-base (url)
;; Always chop off anchors.
(when (string-match "#.*" url)
@@ -947,7 +863,7 @@ size, and full-buffer size."
(cond ((zerop (length url))
(nth 3 base))
((or (not base)
- (string-match "\\`[a-z]*:" url))
+ (string-match-p "\\`[a-z]*:" url))
;; Absolute or empty URI
url)
((eq (aref url 0) ?/)
@@ -984,22 +900,6 @@ size, and full-buffer size."
(looking-at " *$")))
;; We're already at a new paragraph; do nothing.
)
- ((and (not (bolp))
- (save-excursion
- (beginning-of-line)
- (looking-at " *$"))
- (save-excursion
- (forward-line -1)
- (looking-at " *$"))
- ;; Check all chars on the current line and see whether
- ;; they're all placeholders.
- (cl-loop for pos from (line-beginning-position) upto (1- (point))
- unless (get-text-property pos 'shr-target-id)
- return nil
- finally return t))
- ;; We have some invisible markers from <div id="foo"></div>;
- ;; do nothing.
- )
((and prefix
(= prefix (- (point) (line-beginning-position))))
;; Do nothing; we're at the start of a <li>.
@@ -1132,14 +1032,14 @@ the mouse click event."
(let ((param (match-string 4 data))
(payload (url-unhex-string (match-string 5 data))))
(when (and param
- (string-match "^.*\\(;[ \t]*base64\\)$" param))
+ (string-match-p "^.*\\(;[ \t]*base64\\)$" param))
(setq payload (ignore-errors
(base64-decode-string payload))))
payload)))
;; Behind display-graphic-p test.
(declare-function image-size "image.c" (spec &optional pixels frame))
-(declare-function image-animate "image" (image &optional index limit))
+(declare-function image-animate "image" (image &optional index limit position))
(defun shr-put-image (spec alt &optional flags)
"Insert image SPEC with a string ALT. Return image.
@@ -1176,13 +1076,14 @@ element is the data blob and the second element is the content-type."
(when (and (> (current-column) 0)
(> (car (image-size image t)) 400))
(insert "\n"))
- (if (eq size 'original)
- (insert-sliced-image image (or alt "*") nil 20 1)
- (insert-image image (or alt "*")))
- (put-text-property start (point) 'image-size size)
- (when (and shr-image-animate
- (cdr (image-multi-frame-p image)))
- (image-animate image nil 60)))
+ (let ((image-pos (point)))
+ (if (eq size 'original)
+ (insert-sliced-image image (or alt "*") nil 20 1)
+ (insert-image image (or alt "*")))
+ (put-text-property start (point) 'image-size size)
+ (when (and shr-image-animate
+ (cdr (image-multi-frame-p image)))
+ (image-animate image nil 60 image-pos))))
image)
(insert (or alt ""))))
@@ -1268,7 +1169,7 @@ Return a string with image data."
;; SVG images may contain references to further images that we may
;; want to block. So special-case these by parsing the XML data
;; and remove anything that looks like a blocked bit.
- (when (and shr-blocked-images
+ (when (and (or shr-allowed-images shr-blocked-images)
(eq content-type 'image/svg+xml))
(setq data
;; Note that libxml2 doesn't parse everything perfectly,
@@ -1447,8 +1348,7 @@ ones, in case fg and bg are nil."
((or (not (eq (dom-tag elem) 'image))
;; Filter out blocked elements inside the SVG image.
(not (setq url (dom-attr elem ':xlink:href)))
- (not shr-blocked-images)
- (not (string-match shr-blocked-images url)))
+ (not (shr-image-blocked-p url)))
(insert " ")
(shr-dom-print elem)))))
(insert (format "</%s>" (dom-tag dom))))
@@ -1465,12 +1365,14 @@ ones, in case fg and bg are nil."
(defun shr-tag-sup (dom)
(let ((start (point)))
(shr-generic dom)
- (put-text-property start (point) 'display '(raise 0.2))))
+ (put-text-property start (point) 'display '(raise 0.2))
+ (add-face-text-property start (point) 'shr-sup)))
(defun shr-tag-sub (dom)
(let ((start (point)))
(shr-generic dom)
- (put-text-property start (point) 'display '(raise -0.2))))
+ (put-text-property start (point) 'display '(raise -0.2))
+ (add-face-text-property start (point) 'shr-sup)))
(defun shr-tag-p (dom)
(shr-ensure-paragraph)
@@ -1532,9 +1434,7 @@ ones, in case fg and bg are nil."
(defun shr-parse-style (style)
(when style
- (save-match-data
- (when (string-match "\n" style)
- (setq style (replace-match " " t t style))))
+ (setq style (replace-regexp-in-string "\n" " " style))
(let ((plist nil))
(dolist (elem (split-string style ";"))
(when elem
@@ -1563,13 +1463,9 @@ ones, in case fg and bg are nil."
(start (point))
shr-start)
(shr-generic dom)
- (when-let* ((id (unless (dom-attr dom 'id) ; Handled by `shr-descend'.
- (dom-attr dom 'name)))) ; Obsolete since HTML5.
- ;; We have an empty element, so just insert... something.
- (when (= start (point))
- (insert ?\s)
- (put-text-property (1- (point)) (point) 'display ""))
- (put-text-property start (1+ start) 'shr-target-id id))
+ (when-let* ((id (and (not (dom-attr dom 'id)) ; Handled by `shr-descend'.
+ (dom-attr dom 'name)))) ; Obsolete since HTML5.
+ (push (cons id (point)) shr--link-targets))
(when url
(shr-urlify (or shr-start start) (shr-expand-url url) title))))
@@ -1592,7 +1488,7 @@ ones, in case fg and bg are nil."
(let ((start (point))
url multimedia image)
(when-let* ((type (dom-attr dom 'type)))
- (when (string-match "\\`image/svg" type)
+ (when (string-match-p "\\`image/svg" type)
(setq url (dom-attr dom 'data)
image t)))
(dolist (child (dom-non-text-children dom))
@@ -1628,6 +1524,14 @@ url if no type is specified. The value should be a float in the range 0.0 to
:version "24.4"
:type '(alist :key-type regexp :value-type float))
+(defcustom shr-use-xwidgets-for-media nil
+ "If non-nil, use xwidgets to display video and audio elements.
+This also depends on Emacs being built with xwidgets capability.
+Note that this is experimental, and may lead to instability on
+some platforms."
+ :type 'boolean
+ :version "29.1")
+
(defun shr--get-media-pref (elem)
"Determine the preference for ELEM.
The preference is a float determined from `shr-prefer-media-type'."
@@ -1664,16 +1568,39 @@ The preference is a float determined from `shr-prefer-media-type'."
pref (cdr ret)))))))))
(cons url pref))
+(declare-function xwidget-webkit-execute-script "xwidget.c"
+ (xwidget script &optional callback))
+
(defun shr-tag-video (dom)
(let ((image (dom-attr dom 'poster))
(url (dom-attr dom 'src))
(start (point)))
(unless url
(setq url (car (shr--extract-best-source dom))))
- (if (> (length image) 0)
- (shr-indirect-call 'img nil image)
- (shr-insert " [video] "))
- (shr-urlify start (shr-expand-url url))))
+ (if (and shr-use-xwidgets-for-media
+ (fboundp 'make-xwidget))
+ ;; Play the video.
+ (progn
+ (require 'xwidget)
+ (let ((widget (make-xwidget
+ 'webkit
+ "Video"
+ (truncate (* (window-pixel-width) 0.8))
+ (truncate (* (window-pixel-width) 0.8 0.75)))))
+ (insert
+ (propertize
+ " [video] "
+ 'display (list 'xwidget :xwidget widget)))
+ (xwidget-webkit-execute-script
+ widget (format "document.body.innerHTML = %S;"
+ (format
+ "<style>body { margin: 0px; }</style><div style='background: black; height: 100%%; display: flex; align-items: center; justify-content: center;'><video autoplay loop muted controls style='max-width: 100%%; max-height: 100%%;'><source src=%S type='video/mp4'></source></video></div>"
+ url)))))
+ ;; No xwidgets.
+ (if (> (length image) 0)
+ (shr-indirect-call 'img nil image)
+ (shr-insert " [video] "))
+ (shr-urlify start (shr-expand-url url)))))
(defun shr-tag-audio (dom)
(let ((url (dom-attr dom 'src))
@@ -1723,8 +1650,7 @@ The preference is a float determined from `shr-prefer-media-type'."
(funcall shr-put-image-function image alt
(list :width width :height height)))))
((or shr-inhibit-images
- (and shr-blocked-images
- (string-match shr-blocked-images url)))
+ (shr-image-blocked-p url))
(setq shr-start (point))
(shr-insert alt))
((and (not shr-ignore-cache)
@@ -2036,7 +1962,8 @@ BASE is the URL of the HTML being rendered."
(setq dom (or (dom-child-by-tag dom 'tbody) dom))
(let* ((shr-inhibit-images t)
(shr-table-depth (1+ shr-table-depth))
- (shr-kinsoku-shorten t)
+ ;; Fill hard in CJK languages.
+ (pixel-fill-respect-kinsoku nil)
;; Find all suggested widths.
(columns (shr-column-specs dom))
;; Compute how many pixels wide each TD should be.
@@ -2530,9 +2457,10 @@ flags that control whether to collect or render objects."
(style (dom-attr dom 'style))
(shr-stylesheet shr-stylesheet)
(max-width 0)
+ (shr--link-targets nil)
natural-width)
(when style
- (setq style (and (string-match "color" style)
+ (setq style (and (string-search "color" style)
(shr-parse-style style))))
(when bgcolor
(setq style (nconc (list (cons 'background-color bgcolor))
@@ -2571,6 +2499,7 @@ flags that control whether to collect or render objects."
(end-of-line)
(point)))
(goto-char (point-min))
+ (shr--set-target-ids shr--link-targets)
(list max-width
natural-width
(count-lines (point-min) (point-max))