aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2021-08-04 17:09:47 -0700
committerSean Whitton <spwhitton@spwhitton.name>2021-11-08 16:00:27 -0700
commit1e99ee6ff7f47db2052e226d7b071e31ff33b56c (patch)
treec27a22b6cb4e7d2c8b0b1aad4dc747c31102958d /src
parent42489752b4c78f6bbc80bb56a4347b692a067c29 (diff)
downloadconsfigurator-1e99ee6ff7f47db2052e226d7b071e31ff33b56c.tar.gz
add LXC properties, :LXC{,-UNPRIV-ATTACH} connections, WITH-HOMEDIR
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
Diffstat (limited to 'src')
-rw-r--r--src/combinator.lisp31
-rw-r--r--src/connection/chroot.lisp4
-rw-r--r--src/connection/linux-namespace.lisp157
-rw-r--r--src/connection/shell-wrap.lisp3
-rw-r--r--src/data.lisp12
-rw-r--r--src/libc.lisp1
-rw-r--r--src/package.lisp46
-rw-r--r--src/property/lxc.lisp329
-rw-r--r--src/util/linux-namespace.lisp122
9 files changed, 666 insertions, 39 deletions
diff --git a/src/combinator.lisp b/src/combinator.lisp
index 26a6767..088d3cd 100644
--- a/src/combinator.lisp
+++ b/src/combinator.lisp
@@ -360,3 +360,34 @@ an :UNAPPLY subroutine for a property which works by calling other properties."
:unapply (lambda-ignoring-args
(propappapply unapply-propapp)))
apply-propapp)))
+
+(defmacro with-homedir ((&key user dir) &body propapps)
+ "Apply PROPAPPS with a different home and initial working directory, either
+DIR or the home directory of USER."
+ (when (and user dir)
+ (simple-program-error
+ "WITH-HOMEDIR: Both USER and DIR arguments supplied."))
+ `(with-homedir* ,user ,dir
+ ,(if (cdr propapps) `(eseqprops ,@propapps) (car propapps))))
+
+(define-function-property-combinator with-homedir* (user dir propapp)
+ (flet ((change (f)
+ ;; Ensure the :CONSFIGURATOR-CACHE connattr is populated because
+ ;; determining it may look at HOME. In particular, we want to
+ ;; avoid looking in the new HOME for cached data to upload.
+ (when (lisp-connection-p) (get-connattr :consfigurator-cache))
+ (let ((new (or dir (stripln (run (strcat "echo ~" user))))))
+ (with-connattrs (:remote-home new)
+ (with-remote-current-directory (new)
+ (if (lisp-connection-p)
+ (let ((orig (getenv "HOME")))
+ (setf (getenv "HOME") new)
+ (unwind-protect (funcall f propapp)
+ (setf (getenv "HOME") orig)))
+ (funcall f propapp)))))))
+ (:retprop :type (propapptype propapp)
+ :desc (get (car propapp) 'desc)
+ :hostattrs (get (car propapp) 'hostattrs)
+ :apply (lambda-ignoring-args (change #'propappapply))
+ :unapply (lambda-ignoring-args (change #'propappunapply))
+ :args (cdr propapp))))
diff --git a/src/connection/chroot.lisp b/src/connection/chroot.lisp
index 8f829d3..4c1db70 100644
--- a/src/connection/chroot.lisp
+++ b/src/connection/chroot.lisp
@@ -100,7 +100,9 @@ should be the mount point, without the chroot's root prefixed.")
(unless (and (lisp-connection-p) (zerop (nix:geteuid)))
(error "~&Forking into a chroot requires a Lisp image running as root"))
(informat 1 "~&Forking into chroot at ~A" into)
- (let* ((into (ensure-pathname into :want-absolute t :ensure-directory t))
+ (let* ((into (ensure-pathname into
+ :defaults (uiop:getcwd)
+ :ensure-absolute t :ensure-directory t))
(connection (make-instance 'shell-chroot-connection :into into)))
;; Populate the CONSFIGURATOR::ID and :REMOTE-HOME connattrs correctly to
;; ensure they don't get bogus values when this connection object is used
diff --git a/src/connection/linux-namespace.lisp b/src/connection/linux-namespace.lisp
index 89b5daf..dc913e8 100644
--- a/src/connection/linux-namespace.lisp
+++ b/src/connection/linux-namespace.lisp
@@ -22,42 +22,36 @@
;;; finaliser thread: we must be truly single-threaded in order to enter a
;;; different user namespace. If we can't use it then we fall back to a
;;; POSIX-type connection into the container. In the latter situation the
-;;; user could follow the :SYSTEMD-MACHINED connection with a connection type
-;;; which starts up a remote Lisp image within the container. This will be
-;;; significantly slower, so if there is more than one container it will
-;;; probably be worth arranging for the use of 'SETNS.
+;;; user could follow the :SYSTEMD-MACHINED/:LXC/etc. connection with a
+;;; connection type which starts up a remote Lisp image within the container.
+;;; This will be significantly slower, so if there is more than one container
+;;; it will probably be worth arranging for the use of 'SETNS.
+;;;
+;;; For containers which can be launched only by root, like systemd-nspawn, we
+;;; use nsenter(1) for the POSIX-type connection into them. For containers
+;;; owned by non-root, however, we use container-specific commands like
+;;; lxc-unpriv-attach(1). This is because nsenter(1) with --all can fail with
+;;; permission errors depending on which namespaces are had in common inside
+;;; and outside of the container. For example, if the container has the same
+;;; time namespace, nsenter(1) will fail to re-enter it because the non-root
+;;; user lacks CAP_SYS_ADMIN for the user namespace owning that time
+;;; namespace. 'SETNS handles this case by ignoring EPERM in the second pass.
;;;
;;; An alternative to calling setns(2) ourselves, in the Lisp-type connection
;;; case, might be to dump an image, write it to a temporary file within the
;;; container using WITH-REMOTE-TEMPORARY-FILE & nsenter(1), and then reinvoke
-;;; the image, again using nsenter(1). This would be more portable with
-;;; respect to Lisp implementations. However, it would run into the problems
-;;; described in "Dumping and reinvoking Lisp" in pitfalls.rst. With the
-;;; current approach, shared library dependencies need be available only
-;;; outside the container, and then after entering the container they will
-;;; still be usable.
-
-(defmethod establish-connection
- ((type (eql :systemd-machined)) remaining &key name uid gid)
- (establish-connection
- #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter remaining
- :pid (or (loop for line in (runlines "machinectl" "show" name)
- when (string-prefix-p "Leader=" line)
- return (subseq line 7))
- (error "Could not determine PID for machine ~A." name))
- :uid uid :gid gid))
-
-
-;;;; :NSENTER
-
-(defmethod establish-connection
- ((type (eql :nsenter)) remaining &key pid uid gid)
- (declare (ignore remaining))
- (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid)
- (make-instance 'nsenter-connection :pid pid :uid uid :gid gid))
+;;; the image, again using nsenter(1), or using lxc-unpriv-attach(1), etc.
+;;; This would be more portable with respect to Lisp implementations.
+;;; However, it would run into the problems described in "Dumping and
+;;; reinvoking Lisp" in pitfalls.rst. With the current approach, shared
+;;; library dependencies need be available only outside the container, and
+;;; then after entering the container they will still be usable.
(defclass linux-namespace-connection ()
- ((pid :type integer :initarg :pid
+ ((name :type string :initarg :name
+ :documentation
+ "The name of the container as output by commands like lxc-ls(1).")
+ (pid :type integer :initarg :pid
:initform (simple-program-error "Must supply namespace leader PID.")
:documentation
"A PID of a process which is already within all of the namespaces.")
@@ -79,6 +73,9 @@
(when (plusp (length entry))
(push entry env))))))
+
+;;;; :NSENTER
+
(defclass nsenter-connection
(linux-namespace-connection shell-wrap-connection) ())
@@ -89,6 +86,101 @@
"nsenter ~@[-S ~D ~]~@[-G ~D ~]-at ~D env -i ~{~A~^ ~} sh -c ~A"
uid gid pid (mapcar #'escape-sh-token env) (escape-sh-token cmd))))
+(defmethod establish-connection
+ ((type (eql :nsenter)) remaining &key name pid uid gid)
+ (declare (ignore remaining))
+ (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid)
+ (make-instance 'nsenter-connection :name name :pid pid :uid uid :gid gid))
+
+(defmethod establish-connection
+ ((type (eql :systemd-machined)) remaining &key name uid gid)
+ (let ((type #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter)
+ (pid (or (loop for line in (runlines "machinectl" "show" name)
+ when (string-prefix-p "Leader=" line)
+ return (subseq line 7))
+ (error "Could not determine PID for container ~A." name))))
+ (apply #'establish-connection type remaining
+ :name name :pid pid :uid uid :gid gid
+ (and (eql 'setns type) '(:posix-type nsenter-connection)))))
+
+
+;;;; :LXC-UNPRIV-ATTACH
+
+(defclass lxc-unpriv-attach-connection
+ (linux-namespace-connection shell-wrap-connection)
+ ((owner :initarg :owner :initform nil)
+ (owner-uid :initarg :owner-uid :initform nil)))
+
+(defmethod initialize-instance :after
+ ((connection lxc-unpriv-attach-connection) &key)
+ (with-slots (owner owner-uid) connection
+ (when owner (setf owner-uid (user:passwd-entry 2 owner)))))
+
+(defmethod connection-shell-wrap
+ ((connection lxc-unpriv-attach-connection) cmd)
+ (with-slots (owner owner-uid name uid gid env) connection
+ ;; Here we reimplement lxc-unpriv-attach(1) in order to pass --quiet to
+ ;; systemd-run(1), else we get extra output on stderr.
+ (let ((args `("systemd-run" "--scope" "--quiet" "-p" "Delegate=yes"
+ "/usr/bin/lxc-attach" "-n" ,name "--clear-env"
+ ,@(loop for env in env collect "--set-var" collect env)
+ ,@(and uid `("-u" ,(write-to-string uid)))
+ ,@(and gid `("-g" ,(write-to-string gid)))
+ "--" "sh" "-c" ,cmd)))
+ (if (and owner (not (string= owner (get-connattr :remote-user))))
+ (with-connattrs (:remote-uid owner-uid)
+ (list* "runuser" "-u" owner "--" (apply #'systemd--user args)))
+ (apply #'systemd--user args)))))
+
+(defmethod establish-connection
+ ((type (eql :lxc-unpriv-attach)) remaining &key owner name pid uid gid)
+ (declare (ignore remaining))
+ (informat 1 "~&Entering namespaces of PID ~D with lxc-unpriv-attach(1)" pid)
+ (make-instance 'lxc-unpriv-attach-connection
+ :owner owner :name name :pid pid :uid uid :gid gid))
+
+(defmethod establish-connection
+ ((type (eql :lxc)) remaining
+ &key owner (name (and (not remaining) (get-hostname))) uid gid)
+ "Attach to the LXC named NAME and owned by OWNER, defaulting to the current
+user. Switch to UID and GID inside the LXC.
+
+When the previously established connection hop is a Lisp-type connection, this
+connection type will dump and reinvoke Lisp. Thus, connections established
+since the Lisp image was started up but before this one must not have rendered
+the original ~/.cache/common-lisp/ unreadable, or the reinvoked image will
+fail to start. For example,
+
+ (:ssh :sbcl (:lxc :name \"foo\"))
+
+and
+
+ ((:ssh :user \"root\") :sbcl (:lxc :owner \"user\" :name \"foo\"))
+
+will work but
+
+ ((:ssh :user \"root\") :sbcl (:setuid :to \"user\") (:lxc :name \"foo\"))
+
+will not. See \"Dumping and reinvoking Lisp\" in the \"Pitfalls and
+limitations\" section of the Consfigurator manual.
+
+When the current connection is a Lisp-type connection, this internally uses
+setns(2) to enter the container. See \"Connections which use setns(2) to
+enter containers\" in the Consfigurator manual for security implications."
+ (let ((type #+sbcl (if (lisp-connection-p) 'setns :lxc-unpriv-attach)
+ #-sbcl :lxc-unpriv-attach)
+ (pid (loop
+ for (lxc pid)
+ in (mapcar #'words (cdr (lxc:lxc-ls owner "-1fFNAME,PID")))
+ when (string= lxc name)
+ return pid
+ finally
+ (error "Could not determine PID for container ~A." name))))
+ (apply #'establish-connection type remaining
+ :owner owner :name name :pid pid :uid uid :gid gid
+ (and (eql 'setns type)
+ '(:posix-type lxc-unpriv-attach-connection)))))
+
;;;; 'SETNS
@@ -100,7 +192,8 @@
;;; type like :SYSTEMD-MACHINED, we mirror these semantics in our 'SETNS.
#+sbcl
(defmethod establish-connection
- ((type (eql 'setns)) remaining &key pid uid gid)
+ ((type (eql 'setns)) remaining
+ &rest args &key pid posix-type &allow-other-keys)
"Use setns(2) to enter the Linux namespaces of process PID. Additionally,
- If PID has a distinct user namespace and we have permission to setgroups(2)
@@ -133,7 +226,7 @@ setgroups(2) in either the starting user namespace or the target user
namespace, in each case either due to a lack of privilege or because
setgroups(2) is denied in the namespace."
(informat 1 "~&Reassociating to Linux namespaces of PID ~D" pid)
- (alet (make-instance 'nsenter-connection :pid pid :uid uid :gid gid)
+ (alet (apply #'make-instance posix-type (remove-from-plist args :posix-type))
(upload-all-prerequisite-data it)
(change-class it 'setns-connection)
(continue-connection it remaining)))
diff --git a/src/connection/shell-wrap.lisp b/src/connection/shell-wrap.lisp
index bcc8fc8..49cfd83 100644
--- a/src/connection/shell-wrap.lisp
+++ b/src/connection/shell-wrap.lisp
@@ -23,7 +23,8 @@
(defgeneric connection-shell-wrap (connection cmd))
(defmethod connection-run ((c shell-wrap-connection) cmd input)
- (mrun :may-fail :input input (connection-shell-wrap c cmd)))
+ (apply #'mrun :may-fail :input input
+ (ensure-cons (connection-shell-wrap c cmd))))
(defun %readfile (c path &optional delete)
(multiple-value-bind (out exit)
diff --git a/src/data.lisp b/src/data.lisp
index 4962ae0..1b74338 100644
--- a/src/data.lisp
+++ b/src/data.lisp
@@ -469,7 +469,17 @@ properties, or data sources which return objects referencing existing files."
:version (caddr triple))))
(defun get-local-data-cache-dir ()
- (merge-pathnames "consfigurator/data/" (uiop:xdg-cache-home)))
+ (merge-pathnames
+ "data/"
+ ;; A combinator like WITH-HOMEDIR might have temporarily set the HOME
+ ;; and/or XDG_CACHE_HOME environment variables, so use a cached value if we
+ ;; can find one.
+ (or (loop for conn = *connection* then (connection-parent conn)
+ while conn
+ when (subtypep (type-of conn) 'lisp-connection)
+ return (connection-connattr conn :consfigurator-cache))
+ (connection-connattr
+ (establish-connection :local nil) :consfigurator-cache))))
;;;; Passphrases
diff --git a/src/libc.lisp b/src/libc.lisp
index bd1bd48..41a601b 100644
--- a/src/libc.lisp
+++ b/src/libc.lisp
@@ -3,6 +3,7 @@
(include "unistd.h")
(ctype uid_t "uid_t")
+(ctype gid_t "gid_t")
#+linux
(progn
diff --git a/src/package.lisp b/src/package.lisp
index a12b008..c2d0348 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -86,6 +86,7 @@
;; libc.lisp
#:uid_t
+ #:gid_t
#:+CLONE_NEWCGROUP+
#:+CLONE_NEWIPC+
@@ -259,6 +260,7 @@
#:as
#:with-flagfile
#:with-unapply
+ #:with-homedir
;; host.lisp
#:host
@@ -406,8 +408,16 @@
#:capability-p))
(defpackage :consfigurator.util.linux-namespace
- (:use #:cl #:consfigurator #:consfigurator.util.posix1e #:cffi)
- (:export #:setgroups-p
+ (:use #:cl
+ #:anaphora
+ #:alexandria
+ #:consfigurator
+ #:consfigurator.util.posix1e
+ #:cffi)
+ (:export #:get-ids-offset
+ #:reduce-id-maps
+ #:shift-ids
+ #:setgroups-p
#:get-userns-owner))
(defpackage :consfigurator.property.cmd
@@ -944,12 +954,38 @@
(#:os #:consfigurator.property.os))
(:export #:has-swap-file))
+(defpackage :consfigurator.property.lxc
+ (:use #:cl
+ #:anaphora
+ #:alexandria
+ #:consfigurator
+ #:consfigurator.util.linux-namespace
+ #:cffi)
+ (:local-nicknames (#:file #:consfigurator.property.file)
+ (#:apt #:consfigurator.property.apt)
+ (#:os #:consfigurator.property.os)
+ (#:service #:consfigurator.property.service)
+ (#:chroot #:consfigurator.property.chroot)
+ (#:user #:consfigurator.property.user)
+ (#:systemd #:consfigurator.property.systemd))
+ (:export #:installed
+ #:user-container-started
+ #:when-user-container-running
+ #:user-containers-autostart
+ #:usernet-usable-by
+ #:user-container-for
+ #:user-container-for.
+ #:user-container
+ #:user-container.
+
+ #:lxc-ls))
+
(defpackage :consfigurator.connection.local
(:use #:cl #:consfigurator #:alexandria)
(:export #:local-connection))
(defpackage :consfigurator.connection.shell-wrap
- (:use #:cl #:consfigurator)
+ (:use #:cl #:alexandria #:consfigurator)
(:export #:shell-wrap-connection #:connection-shell-wrap))
(defpackage :consfigurator.connection.fork
@@ -1016,7 +1052,9 @@
#:consfigurator
#:consfigurator.util.linux-namespace
#:consfigurator.connection.fork
- #:consfigurator.connection.shell-wrap))
+ #:consfigurator.connection.shell-wrap)
+ (:local-nicknames (#:user #:consfigurator.property.user)
+ (#:lxc #:consfigurator.property.lxc)))
(defpackage :consfigurator.data.asdf
(:use #:cl #:alexandria #:consfigurator))
diff --git a/src/property/lxc.lisp b/src/property/lxc.lisp
new file mode 100644
index 0000000..4feb34f
--- /dev/null
+++ b/src/property/lxc.lisp
@@ -0,0 +1,329 @@
+;;; Consfigurator -- Lisp declarative configuration management system
+
+;;; Copyright (C) 2021 Sean Whitton <spwhitton@spwhitton.name>
+
+;;; This file is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3, or (at your option)
+;;; any later version.
+
+;;; This file is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+(in-package :consfigurator.property.lxc)
+(named-readtables:in-readtable :consfigurator)
+
+;;;; Properties and combinators
+
+(defproplist installed :posix ()
+ "Install the LXC userspace tools."
+ (:desc "LXC installed")
+ (os:etypecase
+ (debianlike (apt:installed "lxc"))))
+
+(defmacro default-maps-params (uid-maps-param gid-maps-param)
+ `(setq ,uid-maps-param
+ (or ,uid-maps-param
+ (list (cons 0 (multiple-value-list
+ (get-ids-offset user "/etc/subuid")))))
+ ,gid-maps-param
+ (or ,gid-maps-param
+ (list (cons 0 (multiple-value-list
+ (get-ids-offset user "/etc/subgid")))))))
+
+(defprop user-container-started :posix (host &optional owner)
+ "Ensure the LXC unprivileged container for the host designated by HOST owned
+by OWNER, defaulting to the current user, is started.
+(I.e., if HOST is a string, ensure the container named HOST is started; if
+HOST is a HOST value, start the container whose name is HOST's hostname.)"
+ (:desc #?"LXC container ${(get-hostname host)} started")
+ (:check (or (service:no-services-p) (user-container-active-p host owner)))
+ (:apply (lxc-cmd owner "lxc-unpriv-start" "-n" (get-hostname host))))
+
+(defmacro when-user-container-running ((host &key owner) &body propapps)
+ "Apply PROPAPPS only when the unprivileged LXC for the host designated by HOST
+and owned by OWNER, defaulting to the current user, is already started."
+ `(when-user-container-running*
+ ,host ,owner
+ ,(if (cdr propapps) `(eseqprops ,@propapps) (car propapps))))
+
+(define-function-property-combinator
+ when-user-container-running* (host owner propapp)
+ (macrolet ((check-running (form)
+ `(if (user-container-running-p host owner) ,form :no-change)))
+ (:retprop :type (propapptype propapp)
+ :desc (get (car propapp) 'desc)
+ :hostattrs (get (car propapp) 'hostattrs)
+ :apply (lambda-ignoring-args
+ (check-running (propappapply propapp)))
+ :unapply (lambda-ignoring-args
+ (check-running (propappunapply propapp)))
+ :args (cdr propapp))))
+
+(defproplist user-containers-autostart :posix (user)
+ "Install a systemd user unit for USER to autostart all LXC user containers
+owned by that user which have lxc.start.auto turned on. Also ensures that
+lingering is enabled for USER, so the user unit triggers at system boot.
+
+A limitation of the current implementation is that it assumes XDG_CONFIG_HOME
+is ~/.config."
+ (:desc #?"LXC autostart systemd user unit installed for ${user}")
+ (user:has-account user)
+ (systemd:lingering-enabled user)
+ (as user
+ (file:directory-exists ".config/systemd/user")
+ (file:has-content ".config/systemd/user/lxc-autostart.service"
+ '("[Unit]"
+ "Description=\"lxc-autostart\""
+
+ "[Service]"
+ "Type=oneshot"
+ "Delegate=yes"
+ "ExecStart=/usr/bin/lxc-autostart"
+ "ExecStop=/usr/bin/lxc-autostart --shutdown"
+ "RemainAfterExit=yes"
+
+ "[Install]"
+ "WantedBy=default.target"))
+ (systemd:enabled "lxc-autostart" t)))
+
+(defprop usernet-usable-by :posix
+ (user &optional (interface "lxcbr0") (count 10))
+ "Ensure that USER is allowed to attach up to COUNT unprivileged LXCs to the
+LXC-managed bridge INTERFACE.
+
+As a special case, INTERFACE may also be \"none\", which gives USER permission
+to create veth pairs where the peer outside the container is not attached to
+any bridge."
+ (:desc #?"${user} may attach LXC veth devices to ${interface}")
+ (:apply (file:map-file-lines
+ "/etc/lxc/lxc-usernet"
+ (lambda (lines)
+ (loop with done
+ and want = (format nil "~A veth ~A ~D" user interface count)
+ and prefix = (strcat user " veth " interface)
+ for line in lines
+ if (string-prefix-p prefix line)
+ unless done collect want into accum and do (setq done t)
+ end
+ else collect line into accum
+ finally (return
+ (if done accum (nconc accum (list want)))))))))
+
+(defprop %ids-shifted-for :lisp
+ (user directory uid-maps gid-maps
+ &optional
+ (rootfs
+ (merge-pathnames "rootfs/"
+ (ensure-directory-pathname directory))))
+ "Recursively shift the user and group ownership of ROOTFS according to
+UID-MAPS and GID-MAPS and chown DIRECTORY to root's UID according to UID-MAPS.
+Not idempotent! Also set the mode of DIRECTORY to 0770, as is standard for
+unprivileged LXCs."
+ (:apply
+ (default-maps-params uid-maps gid-maps)
+ (let ((dir (ensure-directory-pathname directory))
+ (uidmap (reduce-id-maps uid-maps))
+ (gidmap (reduce-id-maps gid-maps)))
+ (handler-bind ((serious-condition
+ ;; Don't leave a partially-shifted tree.
+ (lambda-ignoring-args (delete-remote-trees rootfs))))
+ (shift-ids rootfs uidmap gidmap))
+ ;; Don't see how to pass (gid_t)-1 as the third argument via CFFI. Note
+ ;; that gid_t is not guaranteed to be unsigned.
+ (nix:chown dir (funcall uidmap 0) (nix:stat-gid (nix:stat dir)))
+ (nix:chmod dir #o770))))
+
+(defprop %container-config-populated :posix
+ (prelude-lines user uid-maps gid-maps directory autostart hostname
+ additional-lines)
+ (:apply
+ (default-maps-params uid-maps gid-maps)
+ (let ((uid-maps (loop for (inside outside count) in uid-maps
+ collect (format nil "lxc.idmap = u ~D ~D ~D"
+ inside outside count)))
+ (gid-maps (loop for (inside outside count) in gid-maps
+ collect (format nil "lxc.idmap = g ~D ~D ~D"
+ inside outside count)))
+ (rootfs
+ (strcat
+ "dir:"
+ (unix-namestring
+ (merge-pathnames
+ "rootfs"
+ (merge-pathnames directory (get-connattr :remote-home)))))))
+ (file:has-content (merge-pathnames "config" directory)
+ (append prelude-lines uid-maps gid-maps
+ (list (strcat "lxc.rootfs.path = " rootfs)
+ (strcat "lxc.start.auto = " (if autostart "1" "0"))
+ (strcat "lxc.uts.name = " hostname))
+ additional-lines)
+ :mode #o640))))
+
+(defpropspec user-container-for :lisp
+ (options user host &optional additional-properties
+ &aux (host* (preprocess-host
+ (make-child-host
+ :hostattrs (hostattrs host)
+ :propspec (host-propspec
+ (union-propspec-into-host
+ host additional-properties))))))
+ "Build an unprivileged, non-system-wide LXC container for HOST.
+Must be applied using a connection chain which grants root access, primarily
+for the sake of bootstrapping the container's root filesystem. Once built,
+however, the container will be launched by USER, which should be non-root.
+
+If the container has already been bootstrapped and is running at the time this
+property is applied, enter the container and apply all its properties.
+
+OPTIONS is a plist of keyword parameters:
+
+ - :AUTOSTART -- Lisp boolean corresponding to lxc.start.auto in the
+ container's config file, and also determines whether applying this
+ property attempts to start the container. Defaults to nil. See also
+ LXC:USER-CONTAINERS-AUTOSTART.
+
+ - :PRELUDE-LINES -- additional lines to prepend to the container's
+ configuration file, before the lines generated by this property. See
+ lxc.container.conf(5). The default value is usually sufficient; if you
+ add lines, you will probably want to include the lines from the default
+ value too.
+
+ - :ADDITIONAL-LINES -- additional lines to append to the container's
+ configuration file, after the lines generated by this property. See
+ lxc.container.conf(5). In most cases you will need to include, at a
+ minimum, lines setting up a network interface for the container. The
+ default value serves as an example of a standard way to do this; if you
+ use them unmodified, you will also need to apply LXC:USERNET-USABLE-BY for
+ USER before this property.
+
+ - :UID-MAPS -- a list of the form (INSIDE OUTSIDE COUNT), or a list of such
+ lists, specifying the subordinate UIDs for the container's user namespace.
+ OUTSIDE is the beginning of a UID range, as seen from outside the
+ container, and INSIDE is the UID that OUTSIDE is mapped to, as seen from
+ inside the container. COUNT is the number of consecutive UIDs mapped.
+ This property will ensure that USER has permission to use that range of
+ UIDs by updating /etc/subuid if necessary.
+
+ As a special case, if NIL, instead use the first range of UIDs assigned to
+ USER in /etc/subuid, with a value of zero for INSIDE, and do not modify
+ /etc/subuid. (If you want to use the first range of UIDs assigned to USER
+ in /etc/subuid and also other ranges, you must specify them all explicitly
+ and cannot rely on this special case.)
+
+ It is usually sufficient not to specify this parameter, as distribution
+ scripts automatically add an entry to /etc/subuid for each regular user,
+ and most containers use a value of zero for INSIDE.
+
+ - :GID-MAPS -- as :UID-MAPS, but for GIDs and /etc/subgid.
+
+ - :CHROOT-OPTIONS -- passed on to CHROOT:OS-BOOTSTRAPPED-FOR, which see.
+
+A limitation of the current implementation is the the root filesystem of the
+container is always created under ~/.local/share/lxc/HOSTNAME where HOSTNAME
+is the hostname of HOST, ignoring any configured XDG_DATA_HOME for USER.
+
+Internally we use setns(2) to enter the container. See \"Connections which
+use setns(2) to enter containers\" in the Consfigurator manual for security
+implications."
+ (:desc #?"LXC container for ${(get-hostname host*)} configured")
+ ;; Same hostname probably means that the container HOST inherited the
+ ;; container host's hostname as one was not explicitly set; probably a
+ ;; mistake.
+ (when (string= (get-hostname host*) (get-hostname))
+ (aborted-change "LXC container has same hostname as container host."))
+ (destructuring-bind
+ (&key chroot-options autostart uid-maps gid-maps
+ (prelude-lines '("lxc.include = /usr/share/lxc/config/common.conf"
+ "lxc.include = /usr/share/lxc/config/userns.conf"))
+ (additional-lines '("lxc.net.0.type = veth"
+ "lxc.net.0.flags = up"
+ "lxc.net.0.link = lxcbr0"))
+ &aux
+ (directory
+ (ensure-directory-pathname
+ (merge-pathnames (get-hostname host*) ".local/share/lxc/")))
+ (rootfs (merge-pathnames "rootfs/" directory))
+ (uid-maps (if (listp (car uid-maps)) uid-maps (list uid-maps)))
+ (gid-maps (if (listp (car gid-maps)) gid-maps (list gid-maps))))
+ options
+ `(eseqprops
+ (installed)
+ (user:has-account ,user)
+ (systemd:lingering-enabled ,user) ; required for lxc-ls(1) to work at all
+ ,@(aand (loop for (inside outside count) in uid-maps
+ collect (format nil "~A:~D:~D" user outside count))
+ `((desc ,#?"/etc/subuid has mappings for ${(get-hostname host*)}"
+ (file:contains-lines "/etc/subuid" ,@it))))
+ ,@(aand (loop for (inside outside count) in gid-maps
+ collect (format nil "~A:~D:~D" user outside count))
+ `((desc ,#?"/etc/subgid has mappings for ${(get-hostname host*)}"
+ (file:contains-lines "/etc/subgid" ,@it))))
+ ,(propapp (desc "Base directory for container exists"
+ (as user (file:directory-exists directory))))
+ (with-homedir (:user ,user)
+ (with-flagfile ,(merge-pathnames "rootfs.bootstrapped" directory)
+ ;; It would be nice to branch here such that if we are about to
+ ;; start up the container and enter it, just bootstrap a minimal
+ ;; root filesystem, and only otherwise get all the other properties
+ ;; applied before the ID shifting. I.e.
+ ;;
+ ;; (chroot:os-bootstrapped-for
+ ;; ,chroot-options ,rootfs
+ ;; ,@(if autostart
+ ;; `(,(make-host :hostattrs
+ ;; (list :os (get-hostattrs :os host*))))
+ ;; `(,host ,additional-properties)))
+ ;;
+ ;; However, it might be that we need to apply the other properties
+ ;; in order that the container is startable; for example, getting
+ ;; systemd installed.
+ (chroot:os-bootstrapped-for
+ ,chroot-options ,rootfs ,host ,additional-properties)
+ (%ids-shifted-for ,user ,directory ,uid-maps ,gid-maps)))
+ ,(propapp
+ (desc "Container configuration file populated"
+ (as user
+ (%container-config-populated
+ prelude-lines user uid-maps gid-maps directory autostart
+ (car (split-string (get-hostname host*) :separator "."))
+ additional-lines))))
+ ,@(and autostart `((user-container-started ,host ,user)))
+ (when-user-container-running (,host :owner ,user)
+ (deploys ((:lxc :owner ,user :name ,(get-hostname host*)))
+ ,host ,additional-properties)))))
+
+(defproplist user-container :lisp (options user properties)
+ "Like LXC:USER-CONTAINER-FOR but define a new host using PROPERTIES."
+ (:desc "LXC container defined")
+ (user-container-for options user (make-host :propspec properties)))
+
+
+;;;; Utility functions
+
+(defun lxc-cmd (&optional owner &rest cmd-and-args)
+ (let* ((runuser
+ (and owner (not (string= owner (get-connattr :remote-user)))))
+ (uid (if runuser
+ (user:passwd-entry 2 owner)
+ (get-connattr :remote-uid))))
+ (apply #'run :env `(:DBUS_SESSION_BUS_ADDRESS nil
+ :XDG_RUNTIME_DIR ,(format nil "/run/user/~D" uid))
+ (and runuser (list "runuser" "-u" owner "--")) cmd-and-args)))
+
+(defun lxc-ls (&optional owner &rest args)
+ "Return the lines of output from lxc-ls(1) called with ARGS and for OWNER."
+ (lines (apply #'lxc-cmd owner "lxc-ls" args)))
+
+(defun user-container-active-p (host &optional owner)
+ (and (not (service:no-services-p))
+ (memstring= (get-hostname host) (lxc-ls owner "--active"))))
+
+(defun user-container-running-p (host &optional owner)
+ (and (not (service:no-services-p))
+ (memstring= (get-hostname host) (lxc-ls owner "--running"))))
diff --git a/src/util/linux-namespace.lisp b/src/util/linux-namespace.lisp
index e362868..9acdd0b 100644
--- a/src/util/linux-namespace.lisp
+++ b/src/util/linux-namespace.lisp
@@ -18,6 +18,128 @@
(in-package :consfigurator.util.linux-namespace)
(named-readtables:in-readtable :consfigurator)
+(defun get-ids-offset (identifier file)
+ "Where IDENTIFIER is a username or uid, and FILE is structured like
+/etc/subuid and /etc/subuid (see subuid(5) and subgid(5)), return the
+numerical subordinate ID and numerical subordinate ID count for the first
+entry in FILE for IDENTIFIER."
+ (with-open-file (file file)
+ (loop with info = (osicat:user-info identifier)
+ with fields
+ = (list (cdr (assoc :name info))
+ (write-to-string (cdr (assoc :user-id info))))
+ for line = (read-line file)
+ for (field start count) = (split-string line :separator '(#\:))
+ when (memstring= field fields)
+ return (values (parse-integer start) (parse-integer count)))))
+
+(defun reduce-id-maps (id-maps)
+ "Where each of ID-MAPS is a list of three integers corresponding to the lines
+of the uid_map (resp. gid_map) of a process in a different user namespace as
+would be read by a process in the current user namespace, return a function
+which maps UIDs (resp. GIDs) in the current user namespace to UIDs
+(resp. GIDs) in the user namespace of the process. The function returns NIL,
+not 65534, for values which are unmapped.
+
+A process's uid_map & gid_map files are under /proc; see user_namespaces(7)."
+ (let ((cache (make-hash-table))
+ (sorted (sort (copy-list id-maps) #'< :key #'car)))
+ (labels ((make-subtree (limit)
+ (and (plusp limit)
+ (let* ((new-limit (floor limit 2))
+ (left (make-subtree new-limit))
+ (next (pop sorted)))
+ (destructuring-bind (inside outside count) next
+ (let ((end (1- (+ inside count))))
+ (assert (and (every #'integerp next)
+ (not (minusp inside))
+ (not (minusp outside))
+ (plusp count))
+ nil "Invalid ID map ~S" next)
+ (assert
+ (or (not sorted) (> (caar sorted) end))
+ nil "ID maps overlap: ~S & ~S" next (car sorted))
+ (list inside end (- outside inside) left
+ (make-subtree (1- (- limit new-limit)))))))))
+ (tree-lookup (id tree)
+ (and tree
+ (destructuring-bind (start end offset left right) tree
+ (cond ((> id end) (tree-lookup id right))
+ ((< id start) (tree-lookup id left))
+ (t (+ id offset))))))
+ ;; Memoisation ought to be really worthwhile because we will
+ ;; likely be looking up the same few IDs over and over (e.g. 0).
+ (cache-or-tree-lookup (id tree)
+ (multiple-value-bind (result found) (gethash id cache)
+ (if found
+ result
+ (setf (gethash id cache) (tree-lookup id tree))))))
+ (rcurry #'cache-or-tree-lookup (make-subtree (length sorted))))))
+
+(defun shift-ids (root uidmap gidmap
+ &aux (seen (make-hash-table :test #'equal)))
+ "Recursively map the ownership and POSIX ACLs of files under ROOT by applying
+the function UIDMAP to user ownership and UIDs appearing in ACLs, and the
+function GIDMAP to group ownership and GIDs appearing in ACLs. Each of UIDMAP
+and GIDMAP should return a non-negative integer or NIL for each non-negative
+integer input; in the latter case, no update will be made to the UID or GID.
+
+For example, to recursively shift the ownership and POSIX ACLs of a filesystem
+hierarchy to render it suitable for use as a root filesystem in a different
+user namespace, you might use
+
+ (shift-ids \"/var/lib/lxc/mycontainer/rootfs\"
+ (reduce-id-maps '(0 100000 65536))
+ (reduce-id-maps '(0 100000 65536)))
+
+Here the list (0 100000 65536) describes the relationship between the present
+user namespace and the container's user namespace; see the docstring for
+CONSFIGURATOR.UTIL.LINUX-NAMESPACE:REDUCE-ID-MAPS and user_namespaces(7)."
+ (labels
+ ((shift (file)
+ (let* ((file (drop-trailing-slash (unix-namestring file)))
+ (stat (nix:lstat file))
+ (pair (cons (nix:stat-dev stat) (nix:stat-ino stat)))
+ (uid (nix:stat-uid stat))
+ (gid (nix:stat-gid stat))
+ (mode (nix:stat-mode stat))
+ (dirp (nix:s-isdir mode))
+ (linkp (nix:s-islnk mode)))
+ (unless (gethash pair seen)
+ (setf (gethash pair seen) t)
+ (nix:lchown file
+ (or (funcall uidmap uid) uid)
+ (or (funcall gidmap gid) gid))
+ (unless linkp
+ ;; Restore mode because chown wipes setuid/setgid.
+ (nix:chmod file mode)
+ ;; Now do the ACL shifts; directories have two.
+ (shift-acl file +ACL-TYPE-ACCESS+)
+ (when dirp (shift-acl file +ACL-TYPE-DEFAULT+)))
+ (when (and dirp (not linkp))
+ (mapc #'shift (directory-contents file))))))
+ (shift-acl (file type)
+ (with-acl-free (acl (acl-get-file file type))
+ (with-foreign-objects
+ ((uid 'uid_t) (gid 'gid_t) (entry-p 'acl_entry_t))
+ (loop with setp
+ for etype = +ACL-FIRST-ENTRY+ then +ACL-NEXT-ENTRY+
+ while (plusp (acl-get-entry acl etype entry-p))
+ for entry = (mem-ref entry-p 'acl_entry_t)
+ for tag-type = (acl-get-tag-type entry)
+ when (= tag-type +ACL-USER+)
+ do (awhen
+ (funcall uidmap (acl-get-qualifier entry 'uid_t))
+ (setf setp t (mem-ref uid 'uid_t) it)
+ (acl-set-qualifier entry uid))
+ when (= tag-type +ACL-GROUP+)
+ do (awhen
+ (funcall gidmap (acl-get-qualifier entry 'gid_t))
+ (setf setp t (mem-ref gid 'gid_t) it)
+ (acl-set-qualifier entry gid))
+ finally (when setp (acl-set-file file type acl)))))))
+ (shift (ensure-directory-pathname root))))
+
#+linux
(defun get-userns-owner (fd)
(with-foreign-object (owner 'uid_t)