aboutsummaryrefslogtreecommitdiff
path: root/src/connection
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2021-07-25 13:03:57 -0700
committerSean Whitton <spwhitton@spwhitton.name>2021-11-08 12:31:48 -0700
commit42489752b4c78f6bbc80bb56a4347b692a067c29 (patch)
treeb7df4b0d7ad0fdd8dc6c25124947c586ba6d2d45 /src/connection
parent9d857f62af05ff2f9a4ec22f1cfacecf071b668a (diff)
downloadconsfigurator-42489752b4c78f6bbc80bb56a4347b692a067c29.tar.gz
add Linux namespace-entering connections
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'src/connection')
-rw-r--r--src/connection/fork.lisp49
-rw-r--r--src/connection/linux-namespace.lisp253
2 files changed, 302 insertions, 0 deletions
diff --git a/src/connection/fork.lisp b/src/connection/fork.lisp
index 16c9328..aa1626e 100644
--- a/src/connection/fork.lisp
+++ b/src/connection/fork.lisp
@@ -35,3 +35,52 @@ Must not start up any threads."))
exit
:on-failure (failed-change
"~&Fork connection child failed; stderr was ~%~%~A" err))))
+
+
+;;;; Dumping and then immediately reinvoking Lisp
+
+(defclass init-hooks-connection (fork-connection) ()
+ (:documentation "On SBCL, call POST-FORK using SB-EXT:*INIT-HOOKS*.
+
+The primary purpose of this connection type is to obtain a truly
+single-threaded context for the execution of POST-FORK."))
+
+#+(and sbcl sb-thread)
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ ;; UIOP:VERSION< cannot handle Debian-patched SBCL version numbers, so we
+ ;; split it up ourselves.
+ (destructuring-bind (major minor patch . rest)
+ (mapcar (lambda (s) (parse-integer s :junk-allowed t))
+ (split-string (lisp-implementation-version) :separator '(#\.)))
+ (declare (ignore rest))
+ (unless (or (> major 2)
+ (and (= major 2)
+ (or (> minor 1) (and (= minor 1) (> patch 7)))))
+ (pushnew 'older-sbcl *features*))))
+
+#+sbcl
+(defmethod continue-connection ((connection init-hooks-connection) remaining)
+ (eval-in-reinvoked
+ `(push
+ (lambda ()
+ (handler-bind
+ ((serious-condition
+ (lambda (c)
+ (trivial-backtrace:print-backtrace c :output *error-output*)
+ (uiop:quit 3))))
+ ;; Handle the finaliser thread in older SBCL, before the change in
+ ;; 2.1.8 to call *INIT-HOOKS* before starting system threads.
+ #+consfigurator.connection.fork::older-sbcl
+ (sb-int:with-system-mutex (sb-thread::*make-thread-lock*)
+ (sb-impl::finalizer-thread-stop))
+ (post-fork ,connection)
+ #+consfigurator.connection.fork::older-sbcl
+ (sb-impl::finalizer-thread-start)))
+ sb-ext:*init-hooks*)
+ `(continue-deploy* ,connection ',remaining) (out err exit)
+ (when-let ((lines (lines out)))
+ (inform t lines))
+ (return-exit
+ exit
+ :on-failure (failed-change
+ "~&Reinvoked Lisp image failed; stderr was ~%~%~A" err))))
diff --git a/src/connection/linux-namespace.lisp b/src/connection/linux-namespace.lisp
new file mode 100644
index 0000000..89b5daf
--- /dev/null
+++ b/src/connection/linux-namespace.lisp
@@ -0,0 +1,253 @@
+;;; Consfigurator -- Lisp declarative configuration management system
+
+;;; Copyright (C) 2021 Sean Whitton <spwhitton@spwhitton.name>
+
+;;; This file is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3, or (at your option)
+;;; any later version.
+
+;;; This file is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+(in-package :consfigurator.connection.linux-namespace)
+(named-readtables:in-readtable :consfigurator)
+
+;;; 'SETNS is SBCL-specific due to handling of non-user threads like the
+;;; finaliser thread: we must be truly single-threaded in order to enter a
+;;; different user namespace. If we can't use it then we fall back to a
+;;; POSIX-type connection into the container. In the latter situation the
+;;; user could follow the :SYSTEMD-MACHINED connection with a connection type
+;;; which starts up a remote Lisp image within the container. This will be
+;;; significantly slower, so if there is more than one container it will
+;;; probably be worth arranging for the use of 'SETNS.
+;;;
+;;; An alternative to calling setns(2) ourselves, in the Lisp-type connection
+;;; case, might be to dump an image, write it to a temporary file within the
+;;; container using WITH-REMOTE-TEMPORARY-FILE & nsenter(1), and then reinvoke
+;;; the image, again using nsenter(1). This would be more portable with
+;;; respect to Lisp implementations. However, it would run into the problems
+;;; described in "Dumping and reinvoking Lisp" in pitfalls.rst. With the
+;;; current approach, shared library dependencies need be available only
+;;; outside the container, and then after entering the container they will
+;;; still be usable.
+
+(defmethod establish-connection
+ ((type (eql :systemd-machined)) remaining &key name uid gid)
+ (establish-connection
+ #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter remaining
+ :pid (or (loop for line in (runlines "machinectl" "show" name)
+ when (string-prefix-p "Leader=" line)
+ return (subseq line 7))
+ (error "Could not determine PID for machine ~A." name))
+ :uid uid :gid gid))
+
+
+;;;; :NSENTER
+
+(defmethod establish-connection
+ ((type (eql :nsenter)) remaining &key pid uid gid)
+ (declare (ignore remaining))
+ (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid)
+ (make-instance 'nsenter-connection :pid pid :uid uid :gid gid))
+
+(defclass linux-namespace-connection ()
+ ((pid :type integer :initarg :pid
+ :initform (simple-program-error "Must supply namespace leader PID.")
+ :documentation
+ "A PID of a process which is already within all of the namespaces.")
+ (env :type list :initform nil)
+ (uid :initarg :uid :initform nil)
+ (gid :initarg :gid :initform nil))
+ (:documentation
+ "A connection which works by reassociating to a set of Linux namespaces."))
+
+(defmethod initialize-instance :after
+ ((connection linux-namespace-connection) &key)
+ (with-slots (pid env) connection
+ ;; In the case that PID is in another user namespace, we won't be able to
+ ;; read the environment unless we're either root or we first enter the
+ ;; other user namespace.
+ (let ((cmd
+#?"cat /proc/${pid}/environ || nsenter -U -t ${pid} cat /proc/${pid}/environ"))
+ (dolist (entry (split-string (run cmd) :separator '(#.(code-char 0))))
+ (when (plusp (length entry))
+ (push entry env))))))
+
+(defclass nsenter-connection
+ (linux-namespace-connection shell-wrap-connection) ())
+
+(defmethod connection-shell-wrap ((connection nsenter-connection) cmd)
+ (with-slots (pid uid gid env) connection
+ (format
+ nil
+ "nsenter ~@[-S ~D ~]~@[-G ~D ~]-at ~D env -i ~{~A~^ ~} sh -c ~A"
+ uid gid pid (mapcar #'escape-sh-token env) (escape-sh-token cmd))))
+
+
+;;;; 'SETNS
+
+;;; Whether to setuid/setgid: setting aside --preserve-credentials, which we
+;;; don't support, nsenter(1) will setuid (resp. setgid) when a UID
+;;; (resp. GID) is supplied on the command line, or when asked to enter a new
+;;; user namespace, either with -U or -t. In the latter case both default to
+;;; 0. So that we can abstract over 'SETNS and :NSENTER using a connection
+;;; type like :SYSTEMD-MACHINED, we mirror these semantics in our 'SETNS.
+#+sbcl
+(defmethod establish-connection
+ ((type (eql 'setns)) remaining &key pid uid gid)
+ "Use setns(2) to enter the Linux namespaces of process PID. Additionally,
+
+- If PID has a distinct user namespace and we have permission to setgroups(2)
+ in the initial user namespace, then before entering the target userns,
+
+ - if the target userns is owned by root, clear supplementary groups
+
+ - if the target userns is owned by nonroot, call initgroups(3) to assume the
+ supplementary groups of the owner.
+
+- After entering the target namespaces:
+
+ - If UID, or PID has a distinct userns, attempt to setuid(2) to UID, in the
+ latter case defaulting UID to 0. Also change to UID's home directory, and
+ update HOME, PATH, USER and LOGNAME environment variables.
+
+ - If GID, or PID has a distinct userns, attempt to setgid(2) to GID, in the
+ latter case defaulting GID to 0. Also, if setgroups(2) is permitted
+ within the target user namespace,
+
+ - if we also called setuid(2) then call initgroups(3) to assume the
+ supplementary groups belonging to UID
+
+ - if we called only setgid(2), clear supplementary groups.
+
+Thus, if PID has a distinct userns then the userns's uid_map and gid_map must
+already have been written, and must include mappings for UID and GID, which
+default to 0 and 0. It is not an error if we do not have the ability to
+setgroups(2) in either the starting user namespace or the target user
+namespace, in each case either due to a lack of privilege or because
+setgroups(2) is denied in the namespace."
+ (informat 1 "~&Reassociating to Linux namespaces of PID ~D" pid)
+ (alet (make-instance 'nsenter-connection :pid pid :uid uid :gid gid)
+ (upload-all-prerequisite-data it)
+ (change-class it 'setns-connection)
+ (continue-connection it remaining)))
+
+(defclass setns-connection
+ (linux-namespace-connection init-hooks-connection) ())
+
+(define-constant +namespace-types+ `(("user" . ,+CLONE_NEWUSER+)
+ ("cgroup" . ,+CLONE_NEWCGROUP+)
+ ("ipc" . ,+CLONE_NEWIPC+)
+ ("uts" . ,+CLONE_NEWUTS+)
+ ("net" . ,+CLONE_NEWNET+)
+ ("pid" . ,+CLONE_NEWPID+)
+ ("mnt" . ,+CLONE_NEWNS+)
+ ("time" . ,+CLONE_NEWTIME+))
+ :test #'equal)
+
+(define-error-retval-cfun () "setns" :int (fd :int) (type :int))
+
+#+sbcl
+(defmethod post-fork ((connection setns-connection))
+ (with-slots (pid uid gid env) connection
+ (let* (user opened-fds
+ ;; Check whether the target user namespace is the current user
+ ;; namespace because it is never permitted to reenter the current
+ ;; user namespace using setns(2).
+ (us (nix:stat #?"/proc/${(nix:getpid)}/ns/user"))
+ (them (nix:stat #?"/proc/${pid}/ns/user"))
+ (setuserns (not (and (= (nix:stat-dev us) (nix:stat-dev them))
+ (= (nix:stat-ino us) (nix:stat-ino them)))))
+ (uid (or uid (and setuserns 0)))
+ (gid (or gid (and setuserns 0))))
+ (unwind-protect
+ (flet ((sysopen (path)
+ (aprog1 (nix:open path nix:O-RDONLY)
+ (push it opened-fds))))
+ (let ((ns-fds
+ (loop for (name . type) in (if setuserns
+ +namespace-types+
+ (cdr +namespace-types+))
+ collect
+ (cons (sysopen #?"/proc/${pid}/ns/${name}") type)))
+ (root-fd (sysopen #?"/proc/${pid}/root")))
+ (when (and setuserns (setgroups-p))
+ (let ((owner (get-userns-owner (caar ns-fds))))
+ (if (zerop owner)
+ (nix:setgroups nil)
+ (alet (osicat:user-info owner)
+ ;; As a precaution, we could also setuid & setgid to
+ ;; OWNER here. However, it ought to be meaningless
+ ;; to do so, because a process loses all capabilities
+ ;; in the parent or previous user namespace.
+ (nix:initgroups (cdr (assoc :name it))
+ (cdr (assoc :group-id it)))))))
+ ;; Reset to a standard SHELL and PATH & clear out rest of env.
+ (posix-login-environment)
+ ;; It might be that we need to enter the userns in order to
+ ;; have the capability to enter the other namespaces, or it
+ ;; might be that we want to enter the userns in order to reduce
+ ;; our privilege. So that we can handle both of these, try
+ ;; entering the other namespaces both before and after entering
+ ;; the new user namespace; the first set of attempts will fail
+ ;; silently in the former case.
+ ;;
+ ;; This technique is based on part of nsenter(1) from
+ ;; util-linux, but additionally, in the first pass we accept
+ ;; errors only of type EPERM, and see below about EPERM in the
+ ;; second pass.
+ (when setuserns
+ (loop for cell on (cdr ns-fds)
+ do (handler-case (setns (caar cell) (cdar cell))
+ (nix:eperm ())
+ (:no-error (&rest ignore)
+ (declare (ignore ignore))
+ (rplaca (car cell) nil)))))
+ (loop for (fd . type) in ns-fds
+ ;; Accept failures due to insufficient capabilities which
+ ;; occur after we've entered the new userns, as that
+ ;; indicates that the namespace we tried to join belongs
+ ;; to a parent userns, in which case if we were ever
+ ;; going to join it would have to have been on 1st pass.
+ if (and fd setuserns (not (eql type +CLONE_NEWUSER+)))
+ do (handler-case (setns fd type) (nix:eperm ()))
+ else if fd do (setns fd type))
+ ;; If we entered new PID or time namespaces then need to fork
+ ;; so we're actually within them; for simplicity, always fork.
+ (mapc-open-output-streams
+ #'force-output
+ *standard-output* *error-output* *debug-io* *terminal-io*)
+ (let ((child (nix:fork)))
+ (when (plusp child)
+ (let ((status (nth-value 1 (nix:waitpid child))))
+ (if (nix:WIFEXITED status)
+ (uiop:quit (nix:WEXITSTATUS status))
+ (error
+ "PID namespace child did not exit normally.")))))
+ ;; If the namespace leader is chrooted then we want to be too.
+ (nix:fchdir root-fd) (chroot ".")))
+ (mapc #'nix:close opened-fds))
+ (when uid
+ (alet (or (osicat:user-info uid)
+ (error "~&Could not look up user info for UID ~A." uid))
+ (setf user (cdr (assoc :name it)))
+ (posix-login-environment uid user (cdr (assoc :home it)))))
+ (dolist (entry env)
+ (let* ((pos (position #\= entry))
+ (var (subseq entry 0 pos))
+ (val (subseq entry (1+ pos))))
+ (unless
+ (and uid
+ (memstring= var '("HOME" "SHELL" "USER" "LOGNAME" "PATH")))
+ (setf (getenv var) val))))
+ (when gid
+ (nix:setgid gid)
+ (when (setgroups-p)
+ (if uid (nix:initgroups user gid) (nix:setgroups nil))))
+ (when uid (nix:setuid uid)))))