diff options
author | Dave Love <fx@gnu.org> | 2002-10-29 17:28:10 +0000 |
---|---|---|
committer | Dave Love <fx@gnu.org> | 2002-10-29 17:28:10 +0000 |
commit | 0e5e6e4fe7f18194fbf6d1f3d409ede19298e56e (patch) | |
tree | 5dec3c8bce4cf3584bf4c9f39b72bf848a8db2fe | |
parent | 91379542b1866e4b74a04db3ebce488d190fd37c (diff) | |
download | emacs-0e5e6e4fe7f18194fbf6d1f3d409ede19298e56e.tar.gz |
(ietf-drums): Don't require.
(message-posting-charset): defvar when compiling.
(rfc2047-header-encoding-alist): Add `address-mime' part.
(rfc2047-charset-encoding-alist): Use B for iso-8859-7,
iso-8859-8. Doc fix.
(rfc2047-q-encoding-alist): Augment header list.
(rfc2047-encoding-type, rfc2047-syntax-table): New.
(rfc2047-encode-message-header): Account for address-mime method.
(rfc2047-dissect-region): Rewritten for RFC2822 rules. Now just
return list of strings.
(rfc2047-encode-region): Change test for need to encode word.
Change rfc2047-encode call.
(rfc2047-encode): Remove CHARSET arg and decide encoding from
region contents.
-rw-r--r-- | lisp/gnus/ChangeLog | 17 | ||||
-rw-r--r-- | lisp/gnus/rfc2047.el | 271 |
2 files changed, 185 insertions, 103 deletions
diff --git a/lisp/gnus/ChangeLog b/lisp/gnus/ChangeLog index 83923546834..213f4418085 100644 --- a/lisp/gnus/ChangeLog +++ b/lisp/gnus/ChangeLog @@ -1,3 +1,20 @@ +2002-10-29 Dave Love <fx@gnu.org> + + * rfc2047.el (ietf-drums): Don't require. + (message-posting-charset): defvar when compiling. + (rfc2047-header-encoding-alist): Add `address-mime' part. + (rfc2047-charset-encoding-alist): Use B for iso-8859-7, + iso-8859-8. Doc fix. + (rfc2047-q-encoding-alist): Augment header list. + (rfc2047-encoding-type, rfc2047-syntax-table): New. + (rfc2047-encode-message-header): Account for address-mime method. + (rfc2047-dissect-region): Rewritten for RFC2822 rules. Now just + return list of strings. + (rfc2047-encode-region): Change test for need to encode word. + Change rfc2047-encode call. + (rfc2047-encode): Remove CHARSET arg and decide encoding from + region contents. + 2002-10-21 Juanma Barranquero <lektu@terra.es> * gnus-cus.el (gnus-group-customize): Fix typo. diff --git a/lisp/gnus/rfc2047.el b/lisp/gnus/rfc2047.el index a5f3151d436..9c768f24bea 100644 --- a/lisp/gnus/rfc2047.el +++ b/lisp/gnus/rfc2047.el @@ -1,5 +1,5 @@ ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages -;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. +;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> @@ -27,11 +27,12 @@ ;;; Code: -(eval-when-compile (require 'cl)) +(eval-when-compile + (require 'cl) + (defvar message-posting-charset)) (require 'qp) (require 'mm-util) -(require 'ietf-drums) (require 'mail-prsvr) (require 'base64) ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus. @@ -41,6 +42,8 @@ (defvar rfc2047-header-encoding-alist '(("Newsgroups" . nil) ("Message-ID" . nil) + ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . + address-mime) (t . mime)) "*Header/encoding method alist. The list is traversed sequentially. The keys can either be @@ -50,8 +53,10 @@ The values can be: 1) nil, in which case no encoding is done; 2) `mime', in which case the header will be encoded according to RFC2047; -3) a charset, in which case it will be encoded as that charset; -4) `default', in which case the field will be encoded as the rest +3) `address-mime', like `mime', but takes account of the rules for address + fields (where quoted strings and comments must be treated separately); +4) a charset, in which case it will be encoded as that charset; +5) `default', in which case the field will be encoded as the rest of the article.") (defvar rfc2047-charset-encoding-alist @@ -62,8 +67,8 @@ The values can be: (iso-8859-4 . Q) (iso-8859-5 . B) (koi8-r . B) - (iso-8859-7 . Q) - (iso-8859-8 . Q) + (iso-8859-7 . B) + (iso-8859-8 . B) (iso-8859-9 . Q) (iso-8859-14 . Q) (iso-8859-15 . Q) @@ -78,7 +83,8 @@ The values can be: (iso-2022-jp-2 . B) (iso-2022-int-1 . B)) "Alist of MIME charsets to RFC2047 encodings. -Valid encodings are nil, `Q' and `B'.") +Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, +quoted-printable and base64 respectively.") (defvar rfc2047-encoding-function-alist '((Q . rfc2047-q-encode-region) @@ -87,7 +93,8 @@ Valid encodings are nil, `Q' and `B'.") "Alist of RFC2047 encodings to encoding functions.") (defvar rfc2047-q-encoding-alist - '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") + '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" + . "-A-Za-z0-9!*+/" ) ;; = (\075), _ (\137), ? (\077) are used in the encoded word. ;; Avoid using 8bit characters. ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" @@ -112,6 +119,12 @@ Valid encodings are nil, `Q' and `B'.") (point-max)))) (goto-char (point-min))) +(defvar rfc2047-encoding-type 'address-mime + "The type of encoding done by `rfc2047-encode-region'. +This should be dynamically bound around calls to +`rfc2047-encode-region' to either `mime' or `address-mime'. See +`rfc2047-header-encoding-alist', for definitions.") + (defun rfc2047-encode-message-header () "Encode the message header according to `rfc2047-header-encoding-alist'. Should be called narrowed to the head of the message." @@ -142,8 +155,11 @@ Should be called narrowed to the head of the message." (setq alist nil method (cdr elem)))) (cond - ((eq method 'mime) - (rfc2047-encode-region (point-min) (point-max))) + ((or (eq method 'address-mime) (eq method 'mime)) + (goto-char (point-min)) + (re-search-forward "^[^:]+: *" nil t) + (let ((rfc2047-encoding-type method)) + (rfc2047-encode-region (point) (point-max)))) ((eq method 'default) (if (and (featurep 'mule) (if (boundp 'default-enable-multibyte-characters) @@ -179,80 +195,120 @@ The buffer may be narrowed." (setq found t))) found)) -(defun rfc2047-dissect-region (b e) - "Dissect the region between B and E into words." - (let ((word-chars "-A-Za-z0-9!*+/") - ;; Not using ietf-drums-specials-token makes life simple. - mail-parse-mule-charset - words point current - result word) - (save-restriction - (narrow-to-region b e) - (goto-char (point-min)) - (skip-chars-forward "\000-\177") - (while (not (eobp)) - (setq point (point)) - (skip-chars-backward word-chars b) - (unless (eq b (point)) - (push (cons (buffer-substring b (point)) nil) words)) - (setq b (point)) - (goto-char point) - (setq current (mm-charset-after)) - (forward-char 1) - (skip-chars-forward word-chars) - (while (and (not (eobp)) - (eq (mm-charset-after) current)) - (forward-char 1) - (skip-chars-forward word-chars)) - (unless (eq b (point)) - (push (cons (buffer-substring b (point)) current) words)) - (setq b (point)) - (skip-chars-forward "\000-\177")) - (unless (eq b (point)) - (push (cons (buffer-substring b (point)) nil) words))) - ;; merge adjacent words - (setq word (pop words)) - (while word - (if (and (cdr word) - (caar words) - (not (cdar words)) - (not (string-match "[^ \t]" (caar words)))) - (if (eq (cdr (nth 1 words)) (cdr word)) - (progn - (setq word (cons (concat - (car (nth 1 words)) (caar words) - (car word)) - (cdr word))) - (pop words) - (pop words)) - (push (cons (concat (caar words) (car word)) (cdr word)) - result) - (pop words) - (setq word (pop words))) - (push word result) - (setq word (pop words)))) - result)) +;; Use this syntax table when parsing into regions that may need +;; encoding. Double quotes are string delimiters, backslash is +;; character quoting, and all other RFC 2822 special characters are +;; treated as punctuation so we can use forward-sexp/forward-word to +;; skip to the end of regions appropriately. Nb. ietf-drums does +;; things differently. +(defconst rfc2047-syntax-table + (let ((table (make-char-table 'syntax-table '(2)))) + (modify-syntax-entry ?\\ "\\" table) + (modify-syntax-entry ?\" "\"" table) + (modify-syntax-entry ?\( "." table) + (modify-syntax-entry ?\) "." table) + (modify-syntax-entry ?\< "." table) + (modify-syntax-entry ?\> "." table) + (modify-syntax-entry ?\[ "." table) + (modify-syntax-entry ?\] "." table) + (modify-syntax-entry ?: "." table) + (modify-syntax-entry ?\; "." table) + (modify-syntax-entry ?, "." table) + (modify-syntax-entry ?@ "." table) + table)) +(defun rfc2047-dissect-region (b e) + "Dissect the region between B and E into tokens. +The tokens comprise sequences of atoms, quoted strings, special +characters and whitespace." + (save-restriction + (narrow-to-region b e) + (if (eq 'mime rfc2047-encoding-type) + ;; Simple case -- no need to tokenize. + (list (buffer-substring b e)) + ;; `address-mime' case -- take care of quoted words, comments. + (with-syntax-table rfc2047-syntax-table + (let ((start (point)) + words last-encoded end) + (goto-char (point-min)) + (condition-case nil ; in case of unbalanced specials + ;; Look for rfc2822-style: sequences of atoms, quoted + ;; strings, specials, whitespace. (Specials mustn't be + ;; encoded.) + (while (not (eobp)) + (setq start (point)) + (cond + ((not (char-after))) ; eob + ((or (eq ?\ (char-after)) + (eq ?\t (char-after))) + (skip-chars-forward " \t") + (push (buffer-substring start (point)) words)) + ;; else token start + ((eq ?\" (char-syntax (char-after))) + ;; Quoted word. + (forward-sexp) + (setq end (point)) + ;; Does it need encoding? + (goto-char start) + (skip-chars-forward "\000-\177" end) + (if (= end (point)) + ;; It doesn't need encoding. + (progn (push (buffer-substring start end) words) + (setq last-encoded nil)) + ;; It needs encoding. Strip the quotes first, + ;; since encoded words can't occur in quotes. + (goto-char end) + (setq end (1- end) + start (1+ start)) + (if (and last-encoded + (string-match "^[ \t]+\\'" (car-safe words))) + ;; There was a preceding quoted word followed by + ;; whitespace. Include the whitespace in this + ;; word to avoid it getting lost. + (push (concat (pop words) (buffer-substring start end)) + words) + (push (buffer-substring start end) words)) + (setq last-encoded t))) ; record that it was encoded + ((eq ?. (char-syntax (char-after))) + ;; Skip other delimiters, but record that they've + ;; potentially separated quoted words. + (forward-char) + (push (string (char-before)) words) + (setq last-encoded nil)) + (t ; normal token/whitespace sequence + ;; Find the end. + (forward-word 1) + (skip-chars-backward " \t") + (setq end (point)) + ;; Deal with encoding and leading space as for + ;; quoted words. + (goto-char start) + (skip-chars-forward "\000-\177" end) + (setq last-encoded (/= end (point))) + (goto-char end) + (push (buffer-substring start end) words)))) + (error (error "Invalid data for rfc2047 encoding: %s" + (buffer-substring b e)))) + (nreverse words)))))) + +;; Fixme: why does this cons a list of words and insert them, rather +;; than encoding in place? (defun rfc2047-encode-region (b e) - "Encode all encodable words in region B to E." + "Encode all encodable words in region B to E. +By default, the region is treated as containing addresses (see +`rfc2047-special-chars')." (let ((words (rfc2047-dissect-region b e)) word) (save-restriction (narrow-to-region b e) (delete-region (point-min) (point-max)) (while (setq word (pop words)) - (if (not (cdr word)) - (insert (car word)) - (rfc2047-fold-region (gnus-point-at-bol) (point)) + (if (string-match "\\`[\0-\177]*\\'" word) ; including whitespace + (insert word) (goto-char (point-max)) - (if (> (- (point) (save-restriction - (widen) - (gnus-point-at-bol))) 76) - (insert "\n ")) ;; Insert blank between encoded words (if (eq (char-before) ?=) (insert " ")) (rfc2047-encode (point) - (progn (insert (car word)) (point)) - (cdr word)))) + (progn (insert word) (point))))) (rfc2047-fold-region (point-min) (point-max))))) (defun rfc2047-encode-string (string) @@ -262,40 +318,49 @@ The buffer may be narrowed." (rfc2047-encode-region (point-min) (point-max)) (buffer-string))) -(defun rfc2047-encode (b e charset) - "Encode the word in the region B to E with CHARSET." - (let* ((mime-charset (mm-mime-charset charset)) - (cs (mm-charset-to-coding-system mime-charset)) - (encoding (or (cdr (assq mime-charset +(defun rfc2047-encode (b e) + "Encode the word(s) in the region B to E." + (let* ((mime-charset (mm-find-mime-charset-region b e)) + (cs (if (> (length mime-charset) 1) + ;; Fixme: instead of this, try to break region into + ;; parts that can be encoded separately. + (error "Can't rfc2047-encode `%s'" + (buffer-substring b e)) + (setq mime-charset (car mime-charset)) + (mm-charset-to-coding-system mime-charset))) + (encoding (if (assq mime-charset + rfc2047-charset-encoding-alist) + (cdr (assq mime-charset rfc2047-charset-encoding-alist)) - 'B)) + 'B)) (start (concat "=?" (downcase (symbol-name mime-charset)) "?" (downcase (symbol-name encoding)) "?")) (first t)) - (save-restriction - (narrow-to-region b e) - (when (eq encoding 'B) - ;; break into lines before encoding - (goto-char (point-min)) - (while (not (eobp)) - (goto-char (min (point-max) (+ 15 (point)))) - (unless (eobp) - (insert "\n")))) - (if (and (mm-multibyte-p) - (mm-coding-system-p cs)) - (mm-encode-coding-region (point-min) (point-max) cs)) - (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) - (point-min) (point-max)) - (goto-char (point-min)) - (while (not (eobp)) - (unless first - (insert " ")) - (setq first nil) - (insert start) - (end-of-line) - (insert "?=") - (forward-line 1))))) + (if mime-charset + (save-restriction + (narrow-to-region b e) + (when (eq encoding 'B) + ;; break into lines before encoding + (goto-char (point-min)) + (while (not (eobp)) + (goto-char (min (point-max) (+ 15 (point)))) + (unless (eobp) + (insert "\n")))) + (if (and (mm-multibyte-p) + (mm-coding-system-p cs)) + (mm-encode-coding-region (point-min) (point-max) cs)) + (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) + (point-min) (point-max)) + (goto-char (point-min)) + (while (not (eobp)) + (unless first + (insert " ")) + (setq first nil) + (insert start) + (end-of-line) + (insert "?=") + (forward-line 1)))))) (defun rfc2047-fold-region (b e) "Fold long lines in region B to E." |