summaryrefslogtreecommitdiff
path: root/lisp/tar-mode.el
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2023-07-27 11:36:00 +0300
committerEli Zaretskii <eliz@gnu.org>2023-07-27 11:36:00 +0300
commit2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a (patch)
tree2321e4abe8ab2ad3d52ec5abf55304584892ee59 /lisp/tar-mode.el
parentb936ff0963e69d30d77cec5323a95bc2385cf212 (diff)
downloademacs-2dc5f17c3ecf6864fdcb8ebae73c02a8d04c415a.tar.gz
Support Posix-standard pax extended header in tar files
* lisp/tar-mode.el (pax-extended-attribute-record-regexp) (tar-attr-vector): New variables. (pax-gid-index, pax-gname-index, pax-linkpath-index) (pax-mtime-index, pax-path-index, pax-size-index, pax-uid-index) (pax-uname-index): New constants. (pax-header-gid, pax-header-gname, pax-header-linkpath) (pax-header-mtime, pax-header-path, pax-header-size) (pax-header-uid, pax-header-uname): New accessors to pax header. (pax-decode-string, tar-parse-pax-extended-header): New functions. (tar-header-block-tokenize): Recognize and handle Posix-standard pax extended header, and use its attributes instead of those in the standard tar header. (Bug#64686)
Diffstat (limited to 'lisp/tar-mode.el')
-rw-r--r--lisp/tar-mode.el260
1 files changed, 199 insertions, 61 deletions
diff --git a/lisp/tar-mode.el b/lisp/tar-mode.el
index c9206028e94..4e9843123b0 100644
--- a/lisp/tar-mode.el
+++ b/lisp/tar-mode.el
@@ -215,6 +215,99 @@ Preserve the modified states of the buffers and set `tar-data-swapped'."
"Round S up to the next multiple of 512."
(ash (ash (+ s 511) -9) 9))
+;; Reference:
+;; https://pubs.opengroup.org/onlinepubs/9699919799/utilities/pax.html#tag_20_92_13_02
+(defconst pax-extended-attribute-record-regexp
+ ;; We omit attributes that are "reserved" by Posix, since no
+ ;; processing has been defined for them.
+ "\\([0-9]+\\) \\(gid\\|gname\\|hdrcharset\\|linkpath\\|mtime\\|path\\|size\\|uid\\|uname\\)="
+ "Regular expression for looking up extended attributes in a
+Posix-standard pax extended header of a tar file.
+Only attributes that `tar-mode' can grok are mentioned.")
+
+(defconst pax-gid-index 0)
+(defconst pax-gname-index 1)
+(defconst pax-linkpath-index 2)
+(defconst pax-mtime-index 3)
+(defconst pax-path-index 4)
+(defconst pax-size-index 5)
+(defconst pax-uid-index 6)
+(defconst pax-uname-index 7)
+(defsubst pax-header-gid (attr-vec)
+ (aref attr-vec pax-gid-index))
+(defsubst pax-header-gname (attr-vec)
+ (aref attr-vec pax-gname-index))
+(defsubst pax-header-linkpath (attr-vec)
+ (aref attr-vec pax-linkpath-index))
+(defsubst pax-header-mtime (attr-vec)
+ (aref attr-vec pax-mtime-index))
+(defsubst pax-header-path (attr-vec)
+ (aref attr-vec pax-path-index))
+(defsubst pax-header-size (attr-vec)
+ (aref attr-vec pax-size-index))
+(defsubst pax-header-uid (attr-vec)
+ (aref attr-vec pax-uid-index))
+(defsubst pax-header-uname (attr-vec)
+ (aref attr-vec pax-uid-index))
+
+(defsubst pax-decode-string (str coding)
+ (if str
+ (decode-coding-string str coding)
+ str))
+
+(defvar tar-attr-vector '[nil nil nil nil nil nil nil nil])
+(defun tar-parse-pax-extended-header (pos)
+ "Parse a pax external header of a Posix-format tar file."
+ (let ((end (+ pos 512))
+ (result tar-attr-vector)
+ (coding 'utf-8-unix)
+ attr value record-len value-len)
+ (dotimes (i 8)
+ (aset result i nil))
+ (goto-char pos)
+ (while (and (< pos end)
+ (re-search-forward pax-extended-attribute-record-regexp
+ end 'move))
+ (setq record-len (string-to-number (match-string 1))
+ attr (match-string 2)
+ value-len (- record-len
+ (length (match-string 1))
+ 1
+ (length (match-string 2))
+ 2)
+ value (buffer-substring (point) (+ (point) value-len)))
+ (setq pos (goto-char (+ (point) value-len 1)))
+ (cond
+ ((equal attr "gid")
+ (aset result pax-gid-index value))
+ ((equal attr "gname")
+ (aset result pax-gname-index value))
+ ((equal attr "linkpath")
+ (aset result pax-linkpath-index value))
+ ((equal attr "mtime")
+ (aset result pax-mtime-index (string-to-number value)))
+ ((equal attr "path")
+ (aset result pax-path-index value))
+ ((equal attr "size")
+ (aset result pax-size-index value))
+ ((equal attr "uid")
+ (aset result pax-uid-index value))
+ ((equal attr "uname")
+ (aset result pax-uname-index value))
+ ((equal attr "hdrcharset")
+ (setq coding (if (equal value "BINARY") 'no-conversion 'utf-8-unix))))
+ (setq pos (+ pos (skip-chars-forward "\000"))))
+ ;; Decode string-valued attributes.
+ (aset result pax-gname-index
+ (pax-decode-string (aref result pax-gname-index) coding))
+ (aset result pax-linkpath-index
+ (pax-decode-string (aref result pax-linkpath-index) coding))
+ (aset result pax-path-index
+ (pax-decode-string (aref result pax-path-index) coding))
+ (aset result pax-uname-index
+ (pax-decode-string (aref result pax-uname-index) coding))
+ result))
+
(defun tar-header-block-tokenize (pos coding &optional disable-slash)
"Return a `tar-header' structure.
This is a list of name, mode, uid, gid, size,
@@ -271,67 +364,112 @@ of the file header. This is used for \"old GNU\" Tar format."
(if (and (null link-p) (null disable-slash) (string-match "/\\'" name))
(setq link-p 5)) ; directory
- (if (and (equal name "././@LongLink")
- ;; Supposedly @LongLink is only used for GNUTAR
- ;; format (i.e. "ustar ") but some POSIX Tar files
- ;; (with "ustar\0") have been seen using it as well.
- (member magic-str '("ustar " "ustar\0")))
- ;; This is a GNU Tar long-file-name header.
- (let* ((size (tar-parse-octal-integer
- string tar-size-offset tar-time-offset))
- ;; The long name is in the next 512-byte block.
- ;; We've already moved POS there, when we computed
- ;; STRING above.
- (name (decode-coding-string
- ;; -1 so as to strip the terminating 0 byte.
- (buffer-substring pos (+ pos size -1)) coding))
- ;; Tokenize the header of the _real_ file entry,
- ;; which is further 512 bytes into the archive.
- (descriptor (tar-header-block-tokenize
- (+ pos (tar-roundup-512 size)) coding
- ;; Don't intuit directories from
- ;; the trailing slash, because the
- ;; truncated name might by chance end
- ;; in a slash.
- 'ignore-trailing-slash)))
- ;; Fix the descriptor of the real file entry by using
- ;; the information from the long name entry.
- (cond
- ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
- (setf (tar-header-name descriptor) name))
- ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
- (setf (tar-header-link-name descriptor) name))
- (t
- (message "Unrecognized GNU Tar @LongLink format")))
- ;; Fix the "link-type" attribute, based on the long name.
- (if (and (null (tar-header-link-type descriptor))
- (string-match "/\\'" name))
- (setf (tar-header-link-type descriptor) 5)) ; directory
- (setf (tar-header-header-start descriptor)
- (copy-marker (- pos 512) t))
- descriptor)
-
- (make-tar-header
- (copy-marker pos nil)
- name
- (tar-parse-octal-integer string tar-mode-offset tar-uid-offset)
- (tar-parse-octal-integer string tar-uid-offset tar-gid-offset)
- (tar-parse-octal-integer string tar-gid-offset tar-size-offset)
- (tar-parse-octal-integer string tar-size-offset tar-time-offset)
- (tar-parse-octal-integer string tar-time-offset tar-chk-offset)
- (tar-parse-octal-integer string tar-chk-offset tar-linkp-offset)
- link-p
- linkname
- uname-valid-p
- (when uname-valid-p
- (decode-coding-string
- (substring string tar-uname-offset uname-end) coding))
- (when uname-valid-p
- (decode-coding-string
- (substring string tar-gname-offset gname-end) coding))
- (tar-parse-octal-integer string tar-dmaj-offset tar-dmin-offset)
- (tar-parse-octal-integer string tar-dmin-offset tar-prefix-offset)
- ))))))
+ (if (member magic-str '("ustar " "ustar\0"))
+ (if (equal name "././@LongLink")
+ ;; Supposedly @LongLink is only used for GNUTAR
+ ;; format (i.e. "ustar ") but some POSIX Tar files
+ ;; (with "ustar\0") have been seen using it as well.
+ ;; This is a GNU Tar long-file-name header.
+ (let* ((size (tar-parse-octal-integer
+ string tar-size-offset tar-time-offset))
+ ;; The long name is in the next 512-byte block.
+ ;; We've already moved POS there, when we
+ ;; computed STRING above.
+ (name (decode-coding-string
+ ;; -1 so as to strip the terminating 0 byte.
+ (buffer-substring pos (+ pos size -1)) coding))
+ ;; Tokenize the header of the _real_ file entry,
+ ;; which is further 512 bytes into the archive.
+ (descriptor (tar-header-block-tokenize
+ (+ pos (tar-roundup-512 size)) coding
+ ;; Don't intuit directories from
+ ;; the trailing slash, because the
+ ;; truncated name might by chance end
+ ;; in a slash.
+ 'ignore-trailing-slash)))
+ ;; Fix the descriptor of the real file entry by using
+ ;; the information from the long name entry.
+ (cond
+ ((eq link-p (- ?L ?0)) ;GNUTYPE_LONGNAME.
+ (setf (tar-header-name descriptor) name))
+ ((eq link-p (- ?K ?0)) ;GNUTYPE_LONGLINK.
+ (setf (tar-header-link-name descriptor) name))
+ (t
+ (message "Unrecognized GNU Tar @LongLink format")))
+ ;; Fix the "link-type" attribute, based on the long name.
+ (if (and (null (tar-header-link-type descriptor))
+ (string-match "/\\'" name))
+ (setf (tar-header-link-type descriptor) 5)) ; directory
+ (setf (tar-header-header-start descriptor)
+ (copy-marker (- pos 512) t))
+ descriptor)
+ ;; Posix pax extended header. FIXME: support ?g as well.
+ (if (eq link-p (- ?x ?0))
+ ;; Get whatever attributes are in the extended header,
+ (let* ((pax-attrs (tar-parse-pax-extended-header pos))
+ (gid (pax-header-gid pax-attrs))
+ (gname (pax-header-gname pax-attrs))
+ (linkpath (pax-header-linkpath pax-attrs))
+ (mtime (pax-header-mtime pax-attrs))
+ (path (pax-header-path pax-attrs))
+ (size (pax-header-size pax-attrs))
+ (uid (pax-header-uid pax-attrs))
+ (uname (pax-header-uname pax-attrs))
+ ;; Tokenize the header of the _real_ file entry,
+ ;; which is further 512 bytes into the archive.
+ (descriptor
+ (tar-header-block-tokenize (+ pos 512) coding
+ 'ignore-trailing-slash)))
+ ;; Fix the descriptor of the real file entry by
+ ;; overriding some of the fields with the information
+ ;; from the extended header.
+ (if gid
+ (setf (tar-header-gid descriptor) gid))
+ (if gname
+ (setf (tar-header-gname descriptor) gname))
+ (if linkpath
+ (setf (tar-header-link-name descriptor) linkpath))
+ (if mtime
+ (setf (tar-header-date descriptor) mtime))
+ (if path
+ (setf (tar-header-name descriptor) path))
+ (if size
+ (setf (tar-header-size descriptor) size))
+ (if uid
+ (setf (tar-header-uid descriptor) uid))
+ (if uname
+ (setf (tar-header-uname descriptor) uname))
+ descriptor)
+
+ (make-tar-header
+ (copy-marker pos nil)
+ name
+ (tar-parse-octal-integer string tar-mode-offset
+ tar-uid-offset)
+ (tar-parse-octal-integer string tar-uid-offset
+ tar-gid-offset)
+ (tar-parse-octal-integer string tar-gid-offset
+ tar-size-offset)
+ (tar-parse-octal-integer string tar-size-offset
+ tar-time-offset)
+ (tar-parse-octal-integer string tar-time-offset
+ tar-chk-offset)
+ (tar-parse-octal-integer string tar-chk-offset
+ tar-linkp-offset)
+ link-p
+ linkname
+ uname-valid-p
+ (when uname-valid-p
+ (decode-coding-string
+ (substring string tar-uname-offset uname-end) coding))
+ (when uname-valid-p
+ (decode-coding-string
+ (substring string tar-gname-offset gname-end) coding))
+ (tar-parse-octal-integer string tar-dmaj-offset
+ tar-dmin-offset)
+ (tar-parse-octal-integer string tar-dmin-offset
+ tar-prefix-offset)
+ ))))))))
;; Pseudo-field.
(defun tar-header-data-end (descriptor)