diff options
author | Sean Whitton <spwhitton@spwhitton.name> | 2021-08-04 17:09:47 -0700 |
---|---|---|
committer | Sean Whitton <spwhitton@spwhitton.name> | 2021-11-08 16:00:27 -0700 |
commit | 1e99ee6ff7f47db2052e226d7b071e31ff33b56c (patch) | |
tree | c27a22b6cb4e7d2c8b0b1aad4dc747c31102958d /src | |
parent | 42489752b4c78f6bbc80bb56a4347b692a067c29 (diff) | |
download | consfigurator-1e99ee6ff7f47db2052e226d7b071e31ff33b56c.tar.gz |
add LXC properties, :LXC{,-UNPRIV-ATTACH} connections, WITH-HOMEDIR
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'src')
-rw-r--r-- | src/combinator.lisp | 31 | ||||
-rw-r--r-- | src/connection/chroot.lisp | 4 | ||||
-rw-r--r-- | src/connection/linux-namespace.lisp | 157 | ||||
-rw-r--r-- | src/connection/shell-wrap.lisp | 3 | ||||
-rw-r--r-- | src/data.lisp | 12 | ||||
-rw-r--r-- | src/libc.lisp | 1 | ||||
-rw-r--r-- | src/package.lisp | 46 | ||||
-rw-r--r-- | src/property/lxc.lisp | 329 | ||||
-rw-r--r-- | src/util/linux-namespace.lisp | 122 |
9 files changed, 666 insertions, 39 deletions
diff --git a/src/combinator.lisp b/src/combinator.lisp index 26a6767..088d3cd 100644 --- a/src/combinator.lisp +++ b/src/combinator.lisp @@ -360,3 +360,34 @@ an :UNAPPLY subroutine for a property which works by calling other properties." :unapply (lambda-ignoring-args (propappapply unapply-propapp))) apply-propapp))) + +(defmacro with-homedir ((&key user dir) &body propapps) + "Apply PROPAPPS with a different home and initial working directory, either +DIR or the home directory of USER." + (when (and user dir) + (simple-program-error + "WITH-HOMEDIR: Both USER and DIR arguments supplied.")) + `(with-homedir* ,user ,dir + ,(if (cdr propapps) `(eseqprops ,@propapps) (car propapps)))) + +(define-function-property-combinator with-homedir* (user dir propapp) + (flet ((change (f) + ;; Ensure the :CONSFIGURATOR-CACHE connattr is populated because + ;; determining it may look at HOME. In particular, we want to + ;; avoid looking in the new HOME for cached data to upload. + (when (lisp-connection-p) (get-connattr :consfigurator-cache)) + (let ((new (or dir (stripln (run (strcat "echo ~" user)))))) + (with-connattrs (:remote-home new) + (with-remote-current-directory (new) + (if (lisp-connection-p) + (let ((orig (getenv "HOME"))) + (setf (getenv "HOME") new) + (unwind-protect (funcall f propapp) + (setf (getenv "HOME") orig))) + (funcall f propapp))))))) + (:retprop :type (propapptype propapp) + :desc (get (car propapp) 'desc) + :hostattrs (get (car propapp) 'hostattrs) + :apply (lambda-ignoring-args (change #'propappapply)) + :unapply (lambda-ignoring-args (change #'propappunapply)) + :args (cdr propapp)))) diff --git a/src/connection/chroot.lisp b/src/connection/chroot.lisp index 8f829d3..4c1db70 100644 --- a/src/connection/chroot.lisp +++ b/src/connection/chroot.lisp @@ -100,7 +100,9 @@ should be the mount point, without the chroot's root prefixed.") (unless (and (lisp-connection-p) (zerop (nix:geteuid))) (error "~&Forking into a chroot requires a Lisp image running as root")) (informat 1 "~&Forking into chroot at ~A" into) - (let* ((into (ensure-pathname into :want-absolute t :ensure-directory t)) + (let* ((into (ensure-pathname into + :defaults (uiop:getcwd) + :ensure-absolute t :ensure-directory t)) (connection (make-instance 'shell-chroot-connection :into into))) ;; Populate the CONSFIGURATOR::ID and :REMOTE-HOME connattrs correctly to ;; ensure they don't get bogus values when this connection object is used diff --git a/src/connection/linux-namespace.lisp b/src/connection/linux-namespace.lisp index 89b5daf..dc913e8 100644 --- a/src/connection/linux-namespace.lisp +++ b/src/connection/linux-namespace.lisp @@ -22,42 +22,36 @@ ;;; finaliser thread: we must be truly single-threaded in order to enter a ;;; different user namespace. If we can't use it then we fall back to a ;;; POSIX-type connection into the container. In the latter situation the -;;; user could follow the :SYSTEMD-MACHINED connection with a connection type -;;; which starts up a remote Lisp image within the container. This will be -;;; significantly slower, so if there is more than one container it will -;;; probably be worth arranging for the use of 'SETNS. +;;; user could follow the :SYSTEMD-MACHINED/:LXC/etc. connection with a +;;; connection type which starts up a remote Lisp image within the container. +;;; This will be significantly slower, so if there is more than one container +;;; it will probably be worth arranging for the use of 'SETNS. +;;; +;;; For containers which can be launched only by root, like systemd-nspawn, we +;;; use nsenter(1) for the POSIX-type connection into them. For containers +;;; owned by non-root, however, we use container-specific commands like +;;; lxc-unpriv-attach(1). This is because nsenter(1) with --all can fail with +;;; permission errors depending on which namespaces are had in common inside +;;; and outside of the container. For example, if the container has the same +;;; time namespace, nsenter(1) will fail to re-enter it because the non-root +;;; user lacks CAP_SYS_ADMIN for the user namespace owning that time +;;; namespace. 'SETNS handles this case by ignoring EPERM in the second pass. ;;; ;;; An alternative to calling setns(2) ourselves, in the Lisp-type connection ;;; case, might be to dump an image, write it to a temporary file within the ;;; container using WITH-REMOTE-TEMPORARY-FILE & nsenter(1), and then reinvoke -;;; the image, again using nsenter(1). This would be more portable with -;;; respect to Lisp implementations. However, it would run into the problems -;;; described in "Dumping and reinvoking Lisp" in pitfalls.rst. With the -;;; current approach, shared library dependencies need be available only -;;; outside the container, and then after entering the container they will -;;; still be usable. - -(defmethod establish-connection - ((type (eql :systemd-machined)) remaining &key name uid gid) - (establish-connection - #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter remaining - :pid (or (loop for line in (runlines "machinectl" "show" name) - when (string-prefix-p "Leader=" line) - return (subseq line 7)) - (error "Could not determine PID for machine ~A." name)) - :uid uid :gid gid)) - - -;;;; :NSENTER - -(defmethod establish-connection - ((type (eql :nsenter)) remaining &key pid uid gid) - (declare (ignore remaining)) - (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid) - (make-instance 'nsenter-connection :pid pid :uid uid :gid gid)) +;;; the image, again using nsenter(1), or using lxc-unpriv-attach(1), etc. +;;; This would be more portable with respect to Lisp implementations. +;;; However, it would run into the problems described in "Dumping and +;;; reinvoking Lisp" in pitfalls.rst. With the current approach, shared +;;; library dependencies need be available only outside the container, and +;;; then after entering the container they will still be usable. (defclass linux-namespace-connection () - ((pid :type integer :initarg :pid + ((name :type string :initarg :name + :documentation + "The name of the container as output by commands like lxc-ls(1).") + (pid :type integer :initarg :pid :initform (simple-program-error "Must supply namespace leader PID.") :documentation "A PID of a process which is already within all of the namespaces.") @@ -79,6 +73,9 @@ (when (plusp (length entry)) (push entry env)))))) + +;;;; :NSENTER + (defclass nsenter-connection (linux-namespace-connection shell-wrap-connection) ()) @@ -89,6 +86,101 @@ "nsenter ~@[-S ~D ~]~@[-G ~D ~]-at ~D env -i ~{~A~^ ~} sh -c ~A" uid gid pid (mapcar #'escape-sh-token env) (escape-sh-token cmd)))) +(defmethod establish-connection + ((type (eql :nsenter)) remaining &key name pid uid gid) + (declare (ignore remaining)) + (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid) + (make-instance 'nsenter-connection :name name :pid pid :uid uid :gid gid)) + +(defmethod establish-connection + ((type (eql :systemd-machined)) remaining &key name uid gid) + (let ((type #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter) + (pid (or (loop for line in (runlines "machinectl" "show" name) + when (string-prefix-p "Leader=" line) + return (subseq line 7)) + (error "Could not determine PID for container ~A." name)))) + (apply #'establish-connection type remaining + :name name :pid pid :uid uid :gid gid + (and (eql 'setns type) '(:posix-type nsenter-connection))))) + + +;;;; :LXC-UNPRIV-ATTACH + +(defclass lxc-unpriv-attach-connection + (linux-namespace-connection shell-wrap-connection) + ((owner :initarg :owner :initform nil) + (owner-uid :initarg :owner-uid :initform nil))) + +(defmethod initialize-instance :after + ((connection lxc-unpriv-attach-connection) &key) + (with-slots (owner owner-uid) connection + (when owner (setf owner-uid (user:passwd-entry 2 owner))))) + +(defmethod connection-shell-wrap + ((connection lxc-unpriv-attach-connection) cmd) + (with-slots (owner owner-uid name uid gid env) connection + ;; Here we reimplement lxc-unpriv-attach(1) in order to pass --quiet to + ;; systemd-run(1), else we get extra output on stderr. + (let ((args `("systemd-run" "--scope" "--quiet" "-p" "Delegate=yes" + "/usr/bin/lxc-attach" "-n" ,name "--clear-env" + ,@(loop for env in env collect "--set-var" collect env) + ,@(and uid `("-u" ,(write-to-string uid))) + ,@(and gid `("-g" ,(write-to-string gid))) + "--" "sh" "-c" ,cmd))) + (if (and owner (not (string= owner (get-connattr :remote-user)))) + (with-connattrs (:remote-uid owner-uid) + (list* "runuser" "-u" owner "--" (apply #'systemd--user args))) + (apply #'systemd--user args))))) + +(defmethod establish-connection + ((type (eql :lxc-unpriv-attach)) remaining &key owner name pid uid gid) + (declare (ignore remaining)) + (informat 1 "~&Entering namespaces of PID ~D with lxc-unpriv-attach(1)" pid) + (make-instance 'lxc-unpriv-attach-connection + :owner owner :name name :pid pid :uid uid :gid gid)) + +(defmethod establish-connection + ((type (eql :lxc)) remaining + &key owner (name (and (not remaining) (get-hostname))) uid gid) + "Attach to the LXC named NAME and owned by OWNER, defaulting to the current +user. Switch to UID and GID inside the LXC. + +When the previously established connection hop is a Lisp-type connection, this +connection type will dump and reinvoke Lisp. Thus, connections established +since the Lisp image was started up but before this one must not have rendered +the original ~/.cache/common-lisp/ unreadable, or the reinvoked image will +fail to start. For example, + + (:ssh :sbcl (:lxc :name \"foo\")) + +and + + ((:ssh :user \"root\") :sbcl (:lxc :owner \"user\" :name \"foo\")) + +will work but + + ((:ssh :user \"root\") :sbcl (:setuid :to \"user\") (:lxc :name \"foo\")) + +will not. See \"Dumping and reinvoking Lisp\" in the \"Pitfalls and +limitations\" section of the Consfigurator manual. + +When the current connection is a Lisp-type connection, this internally uses +setns(2) to enter the container. See \"Connections which use setns(2) to +enter containers\" in the Consfigurator manual for security implications." + (let ((type #+sbcl (if (lisp-connection-p) 'setns :lxc-unpriv-attach) + #-sbcl :lxc-unpriv-attach) + (pid (loop + for (lxc pid) + in (mapcar #'words (cdr (lxc:lxc-ls owner "-1fFNAME,PID"))) + when (string= lxc name) + return pid + finally + (error "Could not determine PID for container ~A." name)))) + (apply #'establish-connection type remaining + :owner owner :name name :pid pid :uid uid :gid gid + (and (eql 'setns type) + '(:posix-type lxc-unpriv-attach-connection))))) + ;;;; 'SETNS @@ -100,7 +192,8 @@ ;;; type like :SYSTEMD-MACHINED, we mirror these semantics in our 'SETNS. #+sbcl (defmethod establish-connection - ((type (eql 'setns)) remaining &key pid uid gid) + ((type (eql 'setns)) remaining + &rest args &key pid posix-type &allow-other-keys) "Use setns(2) to enter the Linux namespaces of process PID. Additionally, - If PID has a distinct user namespace and we have permission to setgroups(2) @@ -133,7 +226,7 @@ setgroups(2) in either the starting user namespace or the target user namespace, in each case either due to a lack of privilege or because setgroups(2) is denied in the namespace." (informat 1 "~&Reassociating to Linux namespaces of PID ~D" pid) - (alet (make-instance 'nsenter-connection :pid pid :uid uid :gid gid) + (alet (apply #'make-instance posix-type (remove-from-plist args :posix-type)) (upload-all-prerequisite-data it) (change-class it 'setns-connection) (continue-connection it remaining))) diff --git a/src/connection/shell-wrap.lisp b/src/connection/shell-wrap.lisp index bcc8fc8..49cfd83 100644 --- a/src/connection/shell-wrap.lisp +++ b/src/connection/shell-wrap.lisp @@ -23,7 +23,8 @@ (defgeneric connection-shell-wrap (connection cmd)) (defmethod connection-run ((c shell-wrap-connection) cmd input) - (mrun :may-fail :input input (connection-shell-wrap c cmd))) + (apply #'mrun :may-fail :input input + (ensure-cons (connection-shell-wrap c cmd)))) (defun %readfile (c path &optional delete) (multiple-value-bind (out exit) diff --git a/src/data.lisp b/src/data.lisp index 4962ae0..1b74338 100644 --- a/src/data.lisp +++ b/src/data.lisp @@ -469,7 +469,17 @@ properties, or data sources which return objects referencing existing files." :version (caddr triple)))) (defun get-local-data-cache-dir () - (merge-pathnames "consfigurator/data/" (uiop:xdg-cache-home))) + (merge-pathnames + "data/" + ;; A combinator like WITH-HOMEDIR might have temporarily set the HOME + ;; and/or XDG_CACHE_HOME environment variables, so use a cached value if we + ;; can find one. + (or (loop for conn = *connection* then (connection-parent conn) + while conn + when (subtypep (type-of conn) 'lisp-connection) + return (connection-connattr conn :consfigurator-cache)) + (connection-connattr + (establish-connection :local nil) :consfigurator-cache)))) ;;;; Passphrases diff --git a/src/libc.lisp b/src/libc.lisp index bd1bd48..41a601b 100644 --- a/src/libc.lisp +++ b/src/libc.lisp @@ -3,6 +3,7 @@ (include "unistd.h") (ctype uid_t "uid_t") +(ctype gid_t "gid_t") #+linux (progn diff --git a/src/package.lisp b/src/package.lisp index a12b008..c2d0348 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -86,6 +86,7 @@ ;; libc.lisp #:uid_t + #:gid_t #:+CLONE_NEWCGROUP+ #:+CLONE_NEWIPC+ @@ -259,6 +260,7 @@ #:as #:with-flagfile #:with-unapply + #:with-homedir ;; host.lisp #:host @@ -406,8 +408,16 @@ #:capability-p)) (defpackage :consfigurator.util.linux-namespace - (:use #:cl #:consfigurator #:consfigurator.util.posix1e #:cffi) - (:export #:setgroups-p + (:use #:cl + #:anaphora + #:alexandria + #:consfigurator + #:consfigurator.util.posix1e + #:cffi) + (:export #:get-ids-offset + #:reduce-id-maps + #:shift-ids + #:setgroups-p #:get-userns-owner)) (defpackage :consfigurator.property.cmd @@ -944,12 +954,38 @@ (#:os #:consfigurator.property.os)) (:export #:has-swap-file)) +(defpackage :consfigurator.property.lxc + (:use #:cl + #:anaphora + #:alexandria + #:consfigurator + #:consfigurator.util.linux-namespace + #:cffi) + (:local-nicknames (#:file #:consfigurator.property.file) + (#:apt #:consfigurator.property.apt) + (#:os #:consfigurator.property.os) + (#:service #:consfigurator.property.service) + (#:chroot #:consfigurator.property.chroot) + (#:user #:consfigurator.property.user) + (#:systemd #:consfigurator.property.systemd)) + (:export #:installed + #:user-container-started + #:when-user-container-running + #:user-containers-autostart + #:usernet-usable-by + #:user-container-for + #:user-container-for. + #:user-container + #:user-container. + + #:lxc-ls)) + (defpackage :consfigurator.connection.local (:use #:cl #:consfigurator #:alexandria) (:export #:local-connection)) (defpackage :consfigurator.connection.shell-wrap - (:use #:cl #:consfigurator) + (:use #:cl #:alexandria #:consfigurator) (:export #:shell-wrap-connection #:connection-shell-wrap)) (defpackage :consfigurator.connection.fork @@ -1016,7 +1052,9 @@ #:consfigurator #:consfigurator.util.linux-namespace #:consfigurator.connection.fork - #:consfigurator.connection.shell-wrap)) + #:consfigurator.connection.shell-wrap) + (:local-nicknames (#:user #:consfigurator.property.user) + (#:lxc #:consfigurator.property.lxc))) (defpackage :consfigurator.data.asdf (:use #:cl #:alexandria #:consfigurator)) diff --git a/src/property/lxc.lisp b/src/property/lxc.lisp new file mode 100644 index 0000000..4feb34f --- /dev/null +++ b/src/property/lxc.lisp @@ -0,0 +1,329 @@ +;;; Consfigurator -- Lisp declarative configuration management system + +;;; Copyright (C) 2021 Sean Whitton <spwhitton@spwhitton.name> + +;;; This file is free software; you can redistribute it and/or modify +;;; it under the terms of the GNU General Public License as published by +;;; the Free Software Foundation; either version 3, or (at your option) +;;; any later version. + +;;; This file is distributed in the hope that it will be useful, +;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;;; GNU General Public License for more details. + +;;; You should have received a copy of the GNU General Public License +;;; along with this program. If not, see <http://www.gnu.org/licenses/>. + +(in-package :consfigurator.property.lxc) +(named-readtables:in-readtable :consfigurator) + +;;;; Properties and combinators + +(defproplist installed :posix () + "Install the LXC userspace tools." + (:desc "LXC installed") + (os:etypecase + (debianlike (apt:installed "lxc")))) + +(defmacro default-maps-params (uid-maps-param gid-maps-param) + `(setq ,uid-maps-param + (or ,uid-maps-param + (list (cons 0 (multiple-value-list + (get-ids-offset user "/etc/subuid"))))) + ,gid-maps-param + (or ,gid-maps-param + (list (cons 0 (multiple-value-list + (get-ids-offset user "/etc/subgid"))))))) + +(defprop user-container-started :posix (host &optional owner) + "Ensure the LXC unprivileged container for the host designated by HOST owned +by OWNER, defaulting to the current user, is started. +(I.e., if HOST is a string, ensure the container named HOST is started; if +HOST is a HOST value, start the container whose name is HOST's hostname.)" + (:desc #?"LXC container ${(get-hostname host)} started") + (:check (or (service:no-services-p) (user-container-active-p host owner))) + (:apply (lxc-cmd owner "lxc-unpriv-start" "-n" (get-hostname host)))) + +(defmacro when-user-container-running ((host &key owner) &body propapps) + "Apply PROPAPPS only when the unprivileged LXC for the host designated by HOST +and owned by OWNER, defaulting to the current user, is already started." + `(when-user-container-running* + ,host ,owner + ,(if (cdr propapps) `(eseqprops ,@propapps) (car propapps)))) + +(define-function-property-combinator + when-user-container-running* (host owner propapp) + (macrolet ((check-running (form) + `(if (user-container-running-p host owner) ,form :no-change))) + (:retprop :type (propapptype propapp) + :desc (get (car propapp) 'desc) + :hostattrs (get (car propapp) 'hostattrs) + :apply (lambda-ignoring-args + (check-running (propappapply propapp))) + :unapply (lambda-ignoring-args + (check-running (propappunapply propapp))) + :args (cdr propapp)))) + +(defproplist user-containers-autostart :posix (user) + "Install a systemd user unit for USER to autostart all LXC user containers +owned by that user which have lxc.start.auto turned on. Also ensures that +lingering is enabled for USER, so the user unit triggers at system boot. + +A limitation of the current implementation is that it assumes XDG_CONFIG_HOME +is ~/.config." + (:desc #?"LXC autostart systemd user unit installed for ${user}") + (user:has-account user) + (systemd:lingering-enabled user) + (as user + (file:directory-exists ".config/systemd/user") + (file:has-content ".config/systemd/user/lxc-autostart.service" + '("[Unit]" + "Description=\"lxc-autostart\"" + + "[Service]" + "Type=oneshot" + "Delegate=yes" + "ExecStart=/usr/bin/lxc-autostart" + "ExecStop=/usr/bin/lxc-autostart --shutdown" + "RemainAfterExit=yes" + + "[Install]" + "WantedBy=default.target")) + (systemd:enabled "lxc-autostart" t))) + +(defprop usernet-usable-by :posix + (user &optional (interface "lxcbr0") (count 10)) + "Ensure that USER is allowed to attach up to COUNT unprivileged LXCs to the +LXC-managed bridge INTERFACE. + +As a special case, INTERFACE may also be \"none\", which gives USER permission +to create veth pairs where the peer outside the container is not attached to +any bridge." + (:desc #?"${user} may attach LXC veth devices to ${interface}") + (:apply (file:map-file-lines + "/etc/lxc/lxc-usernet" + (lambda (lines) + (loop with done + and want = (format nil "~A veth ~A ~D" user interface count) + and prefix = (strcat user " veth " interface) + for line in lines + if (string-prefix-p prefix line) + unless done collect want into accum and do (setq done t) + end + else collect line into accum + finally (return + (if done accum (nconc accum (list want))))))))) + +(defprop %ids-shifted-for :lisp + (user directory uid-maps gid-maps + &optional + (rootfs + (merge-pathnames "rootfs/" + (ensure-directory-pathname directory)))) + "Recursively shift the user and group ownership of ROOTFS according to +UID-MAPS and GID-MAPS and chown DIRECTORY to root's UID according to UID-MAPS. +Not idempotent! Also set the mode of DIRECTORY to 0770, as is standard for +unprivileged LXCs." + (:apply + (default-maps-params uid-maps gid-maps) + (let ((dir (ensure-directory-pathname directory)) + (uidmap (reduce-id-maps uid-maps)) + (gidmap (reduce-id-maps gid-maps))) + (handler-bind ((serious-condition + ;; Don't leave a partially-shifted tree. + (lambda-ignoring-args (delete-remote-trees rootfs)))) + (shift-ids rootfs uidmap gidmap)) + ;; Don't see how to pass (gid_t)-1 as the third argument via CFFI. Note + ;; that gid_t is not guaranteed to be unsigned. + (nix:chown dir (funcall uidmap 0) (nix:stat-gid (nix:stat dir))) + (nix:chmod dir #o770)))) + +(defprop %container-config-populated :posix + (prelude-lines user uid-maps gid-maps directory autostart hostname + additional-lines) + (:apply + (default-maps-params uid-maps gid-maps) + (let ((uid-maps (loop for (inside outside count) in uid-maps + collect (format nil "lxc.idmap = u ~D ~D ~D" + inside outside count))) + (gid-maps (loop for (inside outside count) in gid-maps + collect (format nil "lxc.idmap = g ~D ~D ~D" + inside outside count))) + (rootfs + (strcat + "dir:" + (unix-namestring + (merge-pathnames + "rootfs" + (merge-pathnames directory (get-connattr :remote-home))))))) + (file:has-content (merge-pathnames "config" directory) + (append prelude-lines uid-maps gid-maps + (list (strcat "lxc.rootfs.path = " rootfs) + (strcat "lxc.start.auto = " (if autostart "1" "0")) + (strcat "lxc.uts.name = " hostname)) + additional-lines) + :mode #o640)))) + +(defpropspec user-container-for :lisp + (options user host &optional additional-properties + &aux (host* (preprocess-host + (make-child-host + :hostattrs (hostattrs host) + :propspec (host-propspec + (union-propspec-into-host + host additional-properties)))))) + "Build an unprivileged, non-system-wide LXC container for HOST. +Must be applied using a connection chain which grants root access, primarily +for the sake of bootstrapping the container's root filesystem. Once built, +however, the container will be launched by USER, which should be non-root. + +If the container has already been bootstrapped and is running at the time this +property is applied, enter the container and apply all its properties. + +OPTIONS is a plist of keyword parameters: + + - :AUTOSTART -- Lisp boolean corresponding to lxc.start.auto in the + container's config file, and also determines whether applying this + property attempts to start the container. Defaults to nil. See also + LXC:USER-CONTAINERS-AUTOSTART. + + - :PRELUDE-LINES -- additional lines to prepend to the container's + configuration file, before the lines generated by this property. See + lxc.container.conf(5). The default value is usually sufficient; if you + add lines, you will probably want to include the lines from the default + value too. + + - :ADDITIONAL-LINES -- additional lines to append to the container's + configuration file, after the lines generated by this property. See + lxc.container.conf(5). In most cases you will need to include, at a + minimum, lines setting up a network interface for the container. The + default value serves as an example of a standard way to do this; if you + use them unmodified, you will also need to apply LXC:USERNET-USABLE-BY for + USER before this property. + + - :UID-MAPS -- a list of the form (INSIDE OUTSIDE COUNT), or a list of such + lists, specifying the subordinate UIDs for the container's user namespace. + OUTSIDE is the beginning of a UID range, as seen from outside the + container, and INSIDE is the UID that OUTSIDE is mapped to, as seen from + inside the container. COUNT is the number of consecutive UIDs mapped. + This property will ensure that USER has permission to use that range of + UIDs by updating /etc/subuid if necessary. + + As a special case, if NIL, instead use the first range of UIDs assigned to + USER in /etc/subuid, with a value of zero for INSIDE, and do not modify + /etc/subuid. (If you want to use the first range of UIDs assigned to USER + in /etc/subuid and also other ranges, you must specify them all explicitly + and cannot rely on this special case.) + + It is usually sufficient not to specify this parameter, as distribution + scripts automatically add an entry to /etc/subuid for each regular user, + and most containers use a value of zero for INSIDE. + + - :GID-MAPS -- as :UID-MAPS, but for GIDs and /etc/subgid. + + - :CHROOT-OPTIONS -- passed on to CHROOT:OS-BOOTSTRAPPED-FOR, which see. + +A limitation of the current implementation is the the root filesystem of the +container is always created under ~/.local/share/lxc/HOSTNAME where HOSTNAME +is the hostname of HOST, ignoring any configured XDG_DATA_HOME for USER. + +Internally we use setns(2) to enter the container. See \"Connections which +use setns(2) to enter containers\" in the Consfigurator manual for security +implications." + (:desc #?"LXC container for ${(get-hostname host*)} configured") + ;; Same hostname probably means that the container HOST inherited the + ;; container host's hostname as one was not explicitly set; probably a + ;; mistake. + (when (string= (get-hostname host*) (get-hostname)) + (aborted-change "LXC container has same hostname as container host.")) + (destructuring-bind + (&key chroot-options autostart uid-maps gid-maps + (prelude-lines '("lxc.include = /usr/share/lxc/config/common.conf" + "lxc.include = /usr/share/lxc/config/userns.conf")) + (additional-lines '("lxc.net.0.type = veth" + "lxc.net.0.flags = up" + "lxc.net.0.link = lxcbr0")) + &aux + (directory + (ensure-directory-pathname + (merge-pathnames (get-hostname host*) ".local/share/lxc/"))) + (rootfs (merge-pathnames "rootfs/" directory)) + (uid-maps (if (listp (car uid-maps)) uid-maps (list uid-maps))) + (gid-maps (if (listp (car gid-maps)) gid-maps (list gid-maps)))) + options + `(eseqprops + (installed) + (user:has-account ,user) + (systemd:lingering-enabled ,user) ; required for lxc-ls(1) to work at all + ,@(aand (loop for (inside outside count) in uid-maps + collect (format nil "~A:~D:~D" user outside count)) + `((desc ,#?"/etc/subuid has mappings for ${(get-hostname host*)}" + (file:contains-lines "/etc/subuid" ,@it)))) + ,@(aand (loop for (inside outside count) in gid-maps + collect (format nil "~A:~D:~D" user outside count)) + `((desc ,#?"/etc/subgid has mappings for ${(get-hostname host*)}" + (file:contains-lines "/etc/subgid" ,@it)))) + ,(propapp (desc "Base directory for container exists" + (as user (file:directory-exists directory)))) + (with-homedir (:user ,user) + (with-flagfile ,(merge-pathnames "rootfs.bootstrapped" directory) + ;; It would be nice to branch here such that if we are about to + ;; start up the container and enter it, just bootstrap a minimal + ;; root filesystem, and only otherwise get all the other properties + ;; applied before the ID shifting. I.e. + ;; + ;; (chroot:os-bootstrapped-for + ;; ,chroot-options ,rootfs + ;; ,@(if autostart + ;; `(,(make-host :hostattrs + ;; (list :os (get-hostattrs :os host*)))) + ;; `(,host ,additional-properties))) + ;; + ;; However, it might be that we need to apply the other properties + ;; in order that the container is startable; for example, getting + ;; systemd installed. + (chroot:os-bootstrapped-for + ,chroot-options ,rootfs ,host ,additional-properties) + (%ids-shifted-for ,user ,directory ,uid-maps ,gid-maps))) + ,(propapp + (desc "Container configuration file populated" + (as user + (%container-config-populated + prelude-lines user uid-maps gid-maps directory autostart + (car (split-string (get-hostname host*) :separator ".")) + additional-lines)))) + ,@(and autostart `((user-container-started ,host ,user))) + (when-user-container-running (,host :owner ,user) + (deploys ((:lxc :owner ,user :name ,(get-hostname host*))) + ,host ,additional-properties))))) + +(defproplist user-container :lisp (options user properties) + "Like LXC:USER-CONTAINER-FOR but define a new host using PROPERTIES." + (:desc "LXC container defined") + (user-container-for options user (make-host :propspec properties))) + + +;;;; Utility functions + +(defun lxc-cmd (&optional owner &rest cmd-and-args) + (let* ((runuser + (and owner (not (string= owner (get-connattr :remote-user))))) + (uid (if runuser + (user:passwd-entry 2 owner) + (get-connattr :remote-uid)))) + (apply #'run :env `(:DBUS_SESSION_BUS_ADDRESS nil + :XDG_RUNTIME_DIR ,(format nil "/run/user/~D" uid)) + (and runuser (list "runuser" "-u" owner "--")) cmd-and-args))) + +(defun lxc-ls (&optional owner &rest args) + "Return the lines of output from lxc-ls(1) called with ARGS and for OWNER." + (lines (apply #'lxc-cmd owner "lxc-ls" args))) + +(defun user-container-active-p (host &optional owner) + (and (not (service:no-services-p)) + (memstring= (get-hostname host) (lxc-ls owner "--active")))) + +(defun user-container-running-p (host &optional owner) + (and (not (service:no-services-p)) + (memstring= (get-hostname host) (lxc-ls owner "--running")))) diff --git a/src/util/linux-namespace.lisp b/src/util/linux-namespace.lisp index e362868..9acdd0b 100644 --- a/src/util/linux-namespace.lisp +++ b/src/util/linux-namespace.lisp @@ -18,6 +18,128 @@ (in-package :consfigurator.util.linux-namespace) (named-readtables:in-readtable :consfigurator) +(defun get-ids-offset (identifier file) + "Where IDENTIFIER is a username or uid, and FILE is structured like +/etc/subuid and /etc/subuid (see subuid(5) and subgid(5)), return the +numerical subordinate ID and numerical subordinate ID count for the first +entry in FILE for IDENTIFIER." + (with-open-file (file file) + (loop with info = (osicat:user-info identifier) + with fields + = (list (cdr (assoc :name info)) + (write-to-string (cdr (assoc :user-id info)))) + for line = (read-line file) + for (field start count) = (split-string line :separator '(#\:)) + when (memstring= field fields) + return (values (parse-integer start) (parse-integer count))))) + +(defun reduce-id-maps (id-maps) + "Where each of ID-MAPS is a list of three integers corresponding to the lines +of the uid_map (resp. gid_map) of a process in a different user namespace as +would be read by a process in the current user namespace, return a function +which maps UIDs (resp. GIDs) in the current user namespace to UIDs +(resp. GIDs) in the user namespace of the process. The function returns NIL, +not 65534, for values which are unmapped. + +A process's uid_map & gid_map files are under /proc; see user_namespaces(7)." + (let ((cache (make-hash-table)) + (sorted (sort (copy-list id-maps) #'< :key #'car))) + (labels ((make-subtree (limit) + (and (plusp limit) + (let* ((new-limit (floor limit 2)) + (left (make-subtree new-limit)) + (next (pop sorted))) + (destructuring-bind (inside outside count) next + (let ((end (1- (+ inside count)))) + (assert (and (every #'integerp next) + (not (minusp inside)) + (not (minusp outside)) + (plusp count)) + nil "Invalid ID map ~S" next) + (assert + (or (not sorted) (> (caar sorted) end)) + nil "ID maps overlap: ~S & ~S" next (car sorted)) + (list inside end (- outside inside) left + (make-subtree (1- (- limit new-limit))))))))) + (tree-lookup (id tree) + (and tree + (destructuring-bind (start end offset left right) tree + (cond ((> id end) (tree-lookup id right)) + ((< id start) (tree-lookup id left)) + (t (+ id offset)))))) + ;; Memoisation ought to be really worthwhile because we will + ;; likely be looking up the same few IDs over and over (e.g. 0). + (cache-or-tree-lookup (id tree) + (multiple-value-bind (result found) (gethash id cache) + (if found + result + (setf (gethash id cache) (tree-lookup id tree)))))) + (rcurry #'cache-or-tree-lookup (make-subtree (length sorted)))))) + +(defun shift-ids (root uidmap gidmap + &aux (seen (make-hash-table :test #'equal))) + "Recursively map the ownership and POSIX ACLs of files under ROOT by applying +the function UIDMAP to user ownership and UIDs appearing in ACLs, and the +function GIDMAP to group ownership and GIDs appearing in ACLs. Each of UIDMAP +and GIDMAP should return a non-negative integer or NIL for each non-negative +integer input; in the latter case, no update will be made to the UID or GID. + +For example, to recursively shift the ownership and POSIX ACLs of a filesystem +hierarchy to render it suitable for use as a root filesystem in a different +user namespace, you might use + + (shift-ids \"/var/lib/lxc/mycontainer/rootfs\" + (reduce-id-maps '(0 100000 65536)) + (reduce-id-maps '(0 100000 65536))) + +Here the list (0 100000 65536) describes the relationship between the present +user namespace and the container's user namespace; see the docstring for +CONSFIGURATOR.UTIL.LINUX-NAMESPACE:REDUCE-ID-MAPS and user_namespaces(7)." + (labels + ((shift (file) + (let* ((file (drop-trailing-slash (unix-namestring file))) + (stat (nix:lstat file)) + (pair (cons (nix:stat-dev stat) (nix:stat-ino stat))) + (uid (nix:stat-uid stat)) + (gid (nix:stat-gid stat)) + (mode (nix:stat-mode stat)) + (dirp (nix:s-isdir mode)) + (linkp (nix:s-islnk mode))) + (unless (gethash pair seen) + (setf (gethash pair seen) t) + (nix:lchown file + (or (funcall uidmap uid) uid) + (or (funcall gidmap gid) gid)) + (unless linkp + ;; Restore mode because chown wipes setuid/setgid. + (nix:chmod file mode) + ;; Now do the ACL shifts; directories have two. + (shift-acl file +ACL-TYPE-ACCESS+) + (when dirp (shift-acl file +ACL-TYPE-DEFAULT+))) + (when (and dirp (not linkp)) + (mapc #'shift (directory-contents file)))))) + (shift-acl (file type) + (with-acl-free (acl (acl-get-file file type)) + (with-foreign-objects + ((uid 'uid_t) (gid 'gid_t) (entry-p 'acl_entry_t)) + (loop with setp + for etype = +ACL-FIRST-ENTRY+ then +ACL-NEXT-ENTRY+ + while (plusp (acl-get-entry acl etype entry-p)) + for entry = (mem-ref entry-p 'acl_entry_t) + for tag-type = (acl-get-tag-type entry) + when (= tag-type +ACL-USER+) + do (awhen + (funcall uidmap (acl-get-qualifier entry 'uid_t)) + (setf setp t (mem-ref uid 'uid_t) it) + (acl-set-qualifier entry uid)) + when (= tag-type +ACL-GROUP+) + do (awhen + (funcall gidmap (acl-get-qualifier entry 'gid_t)) + (setf setp t (mem-ref gid 'gid_t) it) + (acl-set-qualifier entry gid)) + finally (when setp (acl-set-file file type acl))))))) + (shift (ensure-directory-pathname root)))) + #+linux (defun get-userns-owner (fd) (with-foreign-object (owner 'uid_t) |