summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Porter <jporterbugs@gmail.com>2024-03-17 12:01:59 -0700
committerJim Porter <jporterbugs@gmail.com>2024-03-23 10:17:06 -0700
commit72972118e6f5831f200108cd7b80bf86538c265e (patch)
treee096274d388abcf57ffff132f13566b02fdc5578
parentaf1e36d0c66350113869df9e840e5f21b750ce9d (diff)
downloademacs-72972118e6f5831f200108cd7b80bf86538c265e.tar.gz
Allow toggling "readable" mode in EWW
Additionally, add an option to prevent adding a new history entry for each call of 'eww-readable' (bug#68254). * lisp/net/eww.el (eww-retrieve): * lisp/net/eww.el (eww-readable-adds-to-history): New option. (eww-retrieve): Make sure we call CALLBACK in all configurations. (eww-render): Simplify how to pass encoding. (eww--parse-html-region, eww-display-document): New functions, extracted from... (eww-display-html): ... here. (eww-document-base): New function. (eww-readable): Toggle "readable" mode interactively, like with a minor mode. Consult 'eww-readable-adds-to-history'. (eww-reload): Use 'eshell-display-document'. * test/lisp/net/eww-tests.el (eww-test--with-mock-retrieve): Fix indent. (eww-test/display/html, eww-test/readable/toggle-display): New tests. * doc/misc/eww.texi (Basics): Describe the new behavior. * etc/NEWS: Announce this change.
-rw-r--r--doc/misc/eww.texi5
-rw-r--r--etc/NEWS12
-rw-r--r--lisp/net/eww.el127
-rw-r--r--test/lisp/net/eww-tests.el57
4 files changed, 155 insertions, 46 deletions
diff --git a/doc/misc/eww.texi b/doc/misc/eww.texi
index d31fcf1802b..522034c874d 100644
--- a/doc/misc/eww.texi
+++ b/doc/misc/eww.texi
@@ -146,6 +146,11 @@ a new tab is created on the frame tab bar.
which part of the document contains the ``readable'' text, and will
only display this part. This usually gets rid of menus and the like.
+ When called interactively, this command toggles the display of the
+readable parts. With a positive prefix argument, this command always
+displays the readable parts, and with a zero or negative prefix, it
+always displays the full page.
+
@findex eww-toggle-fonts
@vindex shr-use-fonts
@kindex F
diff --git a/etc/NEWS b/etc/NEWS
index e9cb455aa40..30eaaf40385 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -1066,6 +1066,18 @@ entries newer than the current page. To change the behavior when
browsing from "historical" pages, you can customize
'eww-before-browse-history-function'.
++++
+*** 'eww-readable' now toggles display of the readable parts of a web page.
+When called interactively, 'eww-readable' toggles whether to display
+only the readable parts of a page or the full page. With a positive
+prefix argument, it always displays the readable parts, and with a zero
+or negative prefix, it always displays the full page.
+
+---
+*** New option 'eww-readable-adds-to-history'.
+When non-nil (the default), calling 'eww-readable' adds a new entry to
+the EWW page history.
+
** go-ts-mode
+++
diff --git a/lisp/net/eww.el b/lisp/net/eww.el
index 54847bdf396..54b65d35164 100644
--- a/lisp/net/eww.el
+++ b/lisp/net/eww.el
@@ -275,6 +275,11 @@ parameter, and should return the (possibly) transformed URL."
:type '(repeat function)
:version "29.1")
+(defcustom eww-readable-adds-to-history t
+ "If non-nil, calling `eww-readable' adds a new entry to the history."
+ :type 'boolean
+ :version "30.1")
+
(defface eww-form-submit
'((((type x w32 ns haiku pgtk android) (class color)) ; Like default mode line
:box (:line-width 2 :style released-button)
@@ -464,11 +469,11 @@ For more information, see Info node `(eww) Top'."
(defun eww-retrieve (url callback cbargs)
(cond
((null eww-retrieve-command)
- (url-retrieve url #'eww-render cbargs))
+ (url-retrieve url callback cbargs))
((eq eww-retrieve-command 'sync)
(let ((data-buffer (url-retrieve-synchronously url)))
(with-current-buffer data-buffer
- (apply #'eww-render nil cbargs))))
+ (apply callback nil cbargs))))
(t
(let ((buffer (generate-new-buffer " *eww retrieve*"))
(error-buffer (generate-new-buffer " *eww error*")))
@@ -673,9 +678,9 @@ The renaming scheme is performed in accordance with
(insert (format "<a href=%S>Direct link to the document</a>"
url))
(goto-char (point-min))
- (eww-display-html charset url nil point buffer encode))
+ (eww-display-html (or encode charset) url nil point buffer))
((eww-html-p (car content-type))
- (eww-display-html charset url nil point buffer encode))
+ (eww-display-html (or encode charset) url nil point buffer))
((equal (car content-type) "application/pdf")
(eww-display-pdf))
((string-match-p "\\`image/" (car content-type))
@@ -726,34 +731,40 @@ The renaming scheme is performed in accordance with
(declare-function libxml-parse-html-region "xml.c"
(start end &optional base-url discard-comments))
-(defun eww-display-html (charset url &optional document point buffer encode)
+(defun eww--parse-html-region (start end &optional coding-system)
+ "Parse the HTML between START and END, returning the DOM as an S-expression.
+Use CODING-SYSTEM to decode the region; if nil, decode as UTF-8.
+
+This replaces the region with the preprocessed HTML."
+ (setq coding-system (or coding-system 'utf-8))
+ (with-restriction start end
+ (condition-case nil
+ (decode-coding-region (point-min) (point-max) coding-system)
+ (coding-system-error nil))
+ ;; Remove CRLF and replace NUL with &#0; before parsing.
+ (while (re-search-forward "\\(\r$\\)\\|\0" nil t)
+ (replace-match (if (match-beginning 1) "" "&#0;") t t))
+ (eww--preprocess-html (point-min) (point-max))
+ (libxml-parse-html-region (point-min) (point-max))))
+
+(defsubst eww-document-base (url dom)
+ `(base ((href . ,url)) ,dom))
+
+(defun eww-display-document (document &optional point buffer)
(unless (fboundp 'libxml-parse-html-region)
(error "This function requires Emacs to be compiled with libxml2"))
+ (setq buffer (or buffer (current-buffer)))
(unless (buffer-live-p buffer)
(error "Buffer %s doesn't exist" buffer))
;; There should be a better way to abort loading images
;; asynchronously.
(setq url-queue nil)
- (let ((document
- (or document
- (list
- 'base (list (cons 'href url))
- (progn
- (setq encode (or encode charset 'utf-8))
- (condition-case nil
- (decode-coding-region (point) (point-max) encode)
- (coding-system-error nil))
- (save-excursion
- ;; Remove CRLF and replace NUL with &#0; before parsing.
- (while (re-search-forward "\\(\r$\\)\\|\0" nil t)
- (replace-match (if (match-beginning 1) "" "&#0;") t t)))
- (eww--preprocess-html (point) (point-max))
- (libxml-parse-html-region (point) (point-max))))))
- (source (and (null document)
- (buffer-substring (point) (point-max)))))
+ (let ((url (when (eq (car document) 'base)
+ (alist-get 'href (cadr document)))))
+ (unless url
+ (error "Document is missing base URL"))
(with-current-buffer buffer
(setq bidi-paragraph-direction nil)
- (plist-put eww-data :source source)
(plist-put eww-data :dom document)
(let ((inhibit-read-only t)
(inhibit-modification-hooks t)
@@ -794,6 +805,16 @@ The renaming scheme is performed in accordance with
(forward-line 1)))))
(eww-size-text-inputs))))
+(defun eww-display-html (charset url &optional document point buffer)
+ (let ((source (buffer-substring (point) (point-max))))
+ (with-current-buffer buffer
+ (plist-put eww-data :source source)))
+ (eww-display-document
+ (or document
+ (eww-document-base
+ url (eww--parse-html-region (point) (point-max) charset)))
+ point buffer))
+
(defun eww-handle-link (dom)
(let* ((rel (dom-attr dom 'rel))
(href (dom-attr dom 'href))
@@ -1055,30 +1076,47 @@ The renaming scheme is performed in accordance with
"automatic"
bidi-paragraph-direction)))
-(defun eww-readable ()
- "View the main \"readable\" parts of the current web page.
+(defun eww-readable (&optional arg)
+ "Toggle display of only the main \"readable\" parts of the current web page.
This command uses heuristics to find the parts of the web page that
-contains the main textual portion, leaving out navigation menus and
-the like."
- (interactive nil eww-mode)
+contain the main textual portion, leaving out navigation menus and the
+like.
+
+If called interactively, toggle the display of the readable parts. If
+the prefix argument is positive, display the readable parts, and if it
+is zero or negative, display the full page.
+
+If called from Lisp, toggle the display of the readable parts if ARG is
+`toggle'. Display the readable parts if ARG is nil, omitted, or is a
+positive number. Display the full page if ARG is a negative number.
+
+When `eww-readable-adds-to-history' is non-nil, calling this function
+adds a new entry to `eww-history'."
+ (interactive (list (if current-prefix-arg
+ (prefix-numeric-value current-prefix-arg)
+ 'toggle))
+ eww-mode)
(let* ((old-data eww-data)
- (dom (with-temp-buffer
+ (make-readable (cond
+ ((eq arg 'toggle)
+ (not (plist-get old-data :readable)))
+ ((and (numberp arg) (< arg 1))
+ nil)
+ (t t)))
+ (dom (with-temp-buffer
(insert (plist-get old-data :source))
- (condition-case nil
- (decode-coding-region (point-min) (point-max) 'utf-8)
- (coding-system-error nil))
- (eww--preprocess-html (point-min) (point-max))
- (libxml-parse-html-region (point-min) (point-max))))
+ (eww--parse-html-region (point-min) (point-max))))
(base (plist-get eww-data :url)))
- (eww-score-readability dom)
- (eww-save-history)
- (eww--before-browse)
- (eww-display-html nil nil
- (list 'base (list (cons 'href base))
- (eww-highest-readability dom))
- nil (current-buffer))
- (dolist (elem '(:source :url :title :next :previous :up :peer))
- (plist-put eww-data elem (plist-get old-data elem)))
+ (when make-readable
+ (eww-score-readability dom)
+ (setq dom (eww-highest-readability dom)))
+ (when eww-readable-adds-to-history
+ (eww-save-history)
+ (eww--before-browse)
+ (dolist (elem '(:source :url :title :next :previous :up :peer))
+ (plist-put eww-data elem (plist-get old-data elem))))
+ (eww-display-document (eww-document-base base dom))
+ (plist-put eww-data :readable make-readable)
(eww--after-page-change)))
(defun eww-score-readability (node)
@@ -1398,8 +1436,7 @@ just re-display the HTML already fetched."
(if local
(if (null (plist-get eww-data :dom))
(error "No current HTML data")
- (eww-display-html 'utf-8 url (plist-get eww-data :dom)
- (point) (current-buffer)))
+ (eww-display-document (plist-get eww-data :dom) (point)))
(let ((parsed (url-generic-parse-url url)))
(if (equal (url-type parsed) "file")
;; Use Tramp instead of url.el for files (since url.el
diff --git a/test/lisp/net/eww-tests.el b/test/lisp/net/eww-tests.el
index bd00893d503..a09e0a4f279 100644
--- a/test/lisp/net/eww-tests.el
+++ b/test/lisp/net/eww-tests.el
@@ -33,7 +33,7 @@ body.")
"Evaluate BODY with a mock implementation of `eww-retrieve'.
This avoids network requests during our tests. Additionally, prepare a
temporary EWW buffer for our tests."
- (declare (indent 1))
+ (declare (indent 0))
`(cl-letf (((symbol-function 'eww-retrieve)
(lambda (url callback args)
(with-temp-buffer
@@ -48,6 +48,24 @@ temporary EWW buffer for our tests."
;;; Tests:
+(ert-deftest eww-test/display/html ()
+ "Test displaying a simple HTML page."
+ (eww-test--with-mock-retrieve
+ (let ((eww-test--response-function
+ (lambda (url)
+ (concat "Content-Type: text/html\n\n"
+ (format "<html><body><h1>Hello</h1>%s</body></html>"
+ url)))))
+ (eww "example.invalid")
+ ;; Check that the buffer contains the rendered HTML.
+ (should (equal (buffer-string) "Hello\n\n\nhttp://example.invalid/\n"))
+ (should (equal (get-text-property (point-min) 'face)
+ '(shr-text shr-h1)))
+ ;; Check that the DOM includes the `base'.
+ (should (equal (pcase (plist-get eww-data :dom)
+ (`(base ((href . ,url)) ,_) url))
+ "http://example.invalid/")))))
+
(ert-deftest eww-test/history/new-page ()
"Test that when visiting a new page, the previous one goes into the history."
(eww-test--with-mock-retrieve
@@ -176,5 +194,42 @@ This sets `eww-before-browse-history-function' to
"http://one.invalid/")))
(should (= eww-history-position 0)))))
+(ert-deftest eww-test/readable/toggle-display ()
+ "Test toggling the display of the \"readable\" parts of a web page."
+ (eww-test--with-mock-retrieve
+ (let* ((shr-width most-positive-fixnum)
+ (shr-use-fonts nil)
+ (words (string-join
+ (make-list
+ 20 "All work and no play makes Jack a dull boy.")
+ " "))
+ (eww-test--response-function
+ (lambda (_url)
+ (concat "Content-Type: text/html\n\n"
+ "<html><body>"
+ "<a>This is an uninteresting sentence.</a>"
+ "<div>"
+ words
+ "</div>"
+ "</body></html>"))))
+ (eww "example.invalid")
+ ;; Make sure EWW renders the whole document.
+ (should-not (plist-get eww-data :readable))
+ (should (string-prefix-p
+ "This is an uninteresting sentence."
+ (buffer-substring-no-properties (point-min) (point-max))))
+ (eww-readable 'toggle)
+ ;; Now, EWW should render just the "readable" parts.
+ (should (plist-get eww-data :readable))
+ (should (string-match-p
+ (concat "\\`" (regexp-quote words) "\n*\\'")
+ (buffer-substring-no-properties (point-min) (point-max))))
+ (eww-readable 'toggle)
+ ;; Finally, EWW should render the whole document again.
+ (should-not (plist-get eww-data :readable))
+ (should (string-prefix-p
+ "This is an uninteresting sentence."
+ (buffer-substring-no-properties (point-min) (point-max)))))))
+
(provide 'eww-tests)
;; eww-tests.el ends here