aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Whitton <spwhitton@spwhitton.name>2021-08-04 17:09:47 -0700
committerSean Whitton <spwhitton@spwhitton.name>2021-11-08 16:00:27 -0700
commit1e99ee6ff7f47db2052e226d7b071e31ff33b56c (patch)
treec27a22b6cb4e7d2c8b0b1aad4dc747c31102958d
parent42489752b4c78f6bbc80bb56a4347b692a067c29 (diff)
downloadconsfigurator-1e99ee6ff7f47db2052e226d7b071e31ff33b56c.tar.gz
add LXC properties, :LXC{,-UNPRIV-ATTACH} connections, WITH-HOMEDIR
Signed-off-by: Sean Whitton <spwhitton@spwhitton.name>
-rw-r--r--README.rst3
-rw-r--r--consfigurator.asd1
-rw-r--r--doc/connections.rst10
-rw-r--r--doc/ideas.rst5
-rw-r--r--src/combinator.lisp31
-rw-r--r--src/connection/chroot.lisp4
-rw-r--r--src/connection/linux-namespace.lisp157
-rw-r--r--src/connection/shell-wrap.lisp3
-rw-r--r--src/data.lisp12
-rw-r--r--src/libc.lisp1
-rw-r--r--src/package.lisp46
-rw-r--r--src/property/lxc.lisp329
-rw-r--r--src/util/linux-namespace.lisp122
13 files changed, 675 insertions, 49 deletions
diff --git a/README.rst b/README.rst
index 3eadba0..548ad45 100644
--- a/README.rst
+++ b/README.rst
@@ -34,9 +34,6 @@ distro package managers, package archives and daemon configuration mechanisms,
rather than seeking to replace any of those. Let's get as much as we can out
of all that existing distro policy-compliant work!
-*Most of the features described above are implemented; a few are works in
-progress.*
-
.. _Propellor: https://propellor.branchable.com/
.. _GNU Guix System: https://guix.gnu.org/
.. _NixOS: https://nixos.org/
diff --git a/consfigurator.asd b/consfigurator.asd
index a25cf2e..cf1cb2c 100644
--- a/consfigurator.asd
+++ b/consfigurator.asd
@@ -75,6 +75,7 @@
(:file "src/property/firewalld")
(:file "src/property/timezone")
(:file "src/property/swap")
+ (:file "src/property/lxc")
(:file "src/connection/shell-wrap")
(:file "src/connection/fork")
(:file "src/connection/rehome")
diff --git a/doc/connections.rst b/doc/connections.rst
index 1f88f71..236df5b 100644
--- a/doc/connections.rst
+++ b/doc/connections.rst
@@ -144,11 +144,11 @@ Connections which use setns(2) to enter containers
--------------------------------------------------
When the current connection is a Lisp-type connection, connection types which
-enter Linux containers, such as ``:SYSTEMD-MACHINED``, invoke the setns(2)
-system call directly. The implementation of this is the connection type
-``CONSFIGURATOR.CONNECTION.LINUX-NAMESPACE::SETNS``. The implementation of
-the ``POST-FORK`` generic for that connection type is structured similarly to
-the nsenter(1) command from util-linux. This has the advantage that
+enter Linux containers, such as ``:LXC`` and ``:SYSTEMD-MACHINED``, invoke the
+setns(2) system call directly. The implementation of this is the connection
+type ``CONSFIGURATOR.CONNECTION.LINUX-NAMESPACE::SETNS``. The implementation
+of the ``POST-FORK`` generic for that connection type is structured similarly
+to the nsenter(1) command from util-linux. This has the advantage that
``CONSFIGURATOR.CONNECTION.LINUX-NAMESPACE::SETNS`` should be reusable for
implementing connection types which enter other kinds of Linux container; the
container runtime-specific code is limited to determining the PID of the
diff --git a/doc/ideas.rst b/doc/ideas.rst
index 8ce7fb9..72f02ff 100644
--- a/doc/ideas.rst
+++ b/doc/ideas.rst
@@ -52,8 +52,9 @@ Core
number of hosts. Now that we don't call fork(2) while executing
deployments, we ought to be able to do this using threads, and so it can
work in the root Lisp too. However, we still use ``WITH-CURRENT-DIRECTORY``
- in various places. Perhaps that macro could be changed to only affect RUN,
- MRUN etc. for the sake of enabling multithreading.
+ in various places, and temporarily set HOME in ``WITH-HOMEDIR``. Perhaps
+ ``WITH-CURRENT-DIRECTORY`` could be changed to only affect RUN, MRUN
+ etc. for the sake of enabling multithreading.
- It might be useful to have a restart for the case where an attempt is made
to apply a list of properties containing some ``:LISP`` properties with a
diff --git a/src/combinator.lisp b/src/combinator.lisp
index 26a6767..088d3cd 100644
--- a/src/combinator.lisp
+++ b/src/combinator.lisp
@@ -360,3 +360,34 @@ an :UNAPPLY subroutine for a property which works by calling other properties."
:unapply (lambda-ignoring-args
(propappapply unapply-propapp)))
apply-propapp)))
+
+(defmacro with-homedir ((&key user dir) &body propapps)
+ "Apply PROPAPPS with a different home and initial working directory, either
+DIR or the home directory of USER."
+ (when (and user dir)
+ (simple-program-error
+ "WITH-HOMEDIR: Both USER and DIR arguments supplied."))
+ `(with-homedir* ,user ,dir
+ ,(if (cdr propapps) `(eseqprops ,@propapps) (car propapps))))
+
+(define-function-property-combinator with-homedir* (user dir propapp)
+ (flet ((change (f)
+ ;; Ensure the :CONSFIGURATOR-CACHE connattr is populated because
+ ;; determining it may look at HOME. In particular, we want to
+ ;; avoid looking in the new HOME for cached data to upload.
+ (when (lisp-connection-p) (get-connattr :consfigurator-cache))
+ (let ((new (or dir (stripln (run (strcat "echo ~" user))))))
+ (with-connattrs (:remote-home new)
+ (with-remote-current-directory (new)
+ (if (lisp-connection-p)
+ (let ((orig (getenv "HOME")))
+ (setf (getenv "HOME") new)
+ (unwind-protect (funcall f propapp)
+ (setf (getenv "HOME") orig)))
+ (funcall f propapp)))))))
+ (:retprop :type (propapptype propapp)
+ :desc (get (car propapp) 'desc)
+ :hostattrs (get (car propapp) 'hostattrs)
+ :apply (lambda-ignoring-args (change #'propappapply))
+ :unapply (lambda-ignoring-args (change #'propappunapply))
+ :args (cdr propapp))))
diff --git a/src/connection/chroot.lisp b/src/connection/chroot.lisp
index 8f829d3..4c1db70 100644
--- a/src/connection/chroot.lisp
+++ b/src/connection/chroot.lisp
@@ -100,7 +100,9 @@ should be the mount point, without the chroot's root prefixed.")
(unless (and (lisp-connection-p) (zerop (nix:geteuid)))
(error "~&Forking into a chroot requires a Lisp image running as root"))
(informat 1 "~&Forking into chroot at ~A" into)
- (let* ((into (ensure-pathname into :want-absolute t :ensure-directory t))
+ (let* ((into (ensure-pathname into
+ :defaults (uiop:getcwd)
+ :ensure-absolute t :ensure-directory t))
(connection (make-instance 'shell-chroot-connection :into into)))
;; Populate the CONSFIGURATOR::ID and :REMOTE-HOME connattrs correctly to
;; ensure they don't get bogus values when this connection object is used
diff --git a/src/connection/linux-namespace.lisp b/src/connection/linux-namespace.lisp
index 89b5daf..dc913e8 100644
--- a/src/connection/linux-namespace.lisp
+++ b/src/connection/linux-namespace.lisp
@@ -22,42 +22,36 @@
;;; finaliser thread: we must be truly single-threaded in order to enter a
;;; different user namespace. If we can't use it then we fall back to a
;;; POSIX-type connection into the container. In the latter situation the
-;;; user could follow the :SYSTEMD-MACHINED connection with a connection type
-;;; which starts up a remote Lisp image within the container. This will be
-;;; significantly slower, so if there is more than one container it will
-;;; probably be worth arranging for the use of 'SETNS.
+;;; user could follow the :SYSTEMD-MACHINED/:LXC/etc. connection with a
+;;; connection type which starts up a remote Lisp image within the container.
+;;; This will be significantly slower, so if there is more than one container
+;;; it will probably be worth arranging for the use of 'SETNS.
+;;;
+;;; For containers which can be launched only by root, like systemd-nspawn, we
+;;; use nsenter(1) for the POSIX-type connection into them. For containers
+;;; owned by non-root, however, we use container-specific commands like
+;;; lxc-unpriv-attach(1). This is because nsenter(1) with --all can fail with
+;;; permission errors depending on which namespaces are had in common inside
+;;; and outside of the container. For example, if the container has the same
+;;; time namespace, nsenter(1) will fail to re-enter it because the non-root
+;;; user lacks CAP_SYS_ADMIN for the user namespace owning that time
+;;; namespace. 'SETNS handles this case by ignoring EPERM in the second pass.
;;;
;;; An alternative to calling setns(2) ourselves, in the Lisp-type connection
;;; case, might be to dump an image, write it to a temporary file within the
;;; container using WITH-REMOTE-TEMPORARY-FILE & nsenter(1), and then reinvoke
-;;; the image, again using nsenter(1). This would be more portable with
-;;; respect to Lisp implementations. However, it would run into the problems
-;;; described in "Dumping and reinvoking Lisp" in pitfalls.rst. With the
-;;; current approach, shared library dependencies need be available only
-;;; outside the container, and then after entering the container they will
-;;; still be usable.
-
-(defmethod establish-connection
- ((type (eql :systemd-machined)) remaining &key name uid gid)
- (establish-connection
- #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter remaining
- :pid (or (loop for line in (runlines "machinectl" "show" name)
- when (string-prefix-p "Leader=" line)
- return (subseq line 7))
- (error "Could not determine PID for machine ~A." name))
- :uid uid :gid gid))
-
-
-;;;; :NSENTER
-
-(defmethod establish-connection
- ((type (eql :nsenter)) remaining &key pid uid gid)
- (declare (ignore remaining))
- (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid)
- (make-instance 'nsenter-connection :pid pid :uid uid :gid gid))
+;;; the image, again using nsenter(1), or using lxc-unpriv-attach(1), etc.
+;;; This would be more portable with respect to Lisp implementations.
+;;; However, it would run into the problems described in "Dumping and
+;;; reinvoking Lisp" in pitfalls.rst. With the current approach, shared
+;;; library dependencies need be available only outside the container, and
+;;; then after entering the container they will still be usable.
(defclass linux-namespace-connection ()
- ((pid :type integer :initarg :pid
+ ((name :type string :initarg :name
+ :documentation
+ "The name of the container as output by commands like lxc-ls(1).")
+ (pid :type integer :initarg :pid
:initform (simple-program-error "Must supply namespace leader PID.")
:documentation
"A PID of a process which is already within all of the namespaces.")
@@ -79,6 +73,9 @@
(when (plusp (length entry))
(push entry env))))))
+
+;;;; :NSENTER
+
(defclass nsenter-connection
(linux-namespace-connection shell-wrap-connection) ())
@@ -89,6 +86,101 @@
"nsenter ~@[-S ~D ~]~@[-G ~D ~]-at ~D env -i ~{~A~^ ~} sh -c ~A"
uid gid pid (mapcar #'escape-sh-token env) (escape-sh-token cmd))))
+(defmethod establish-connection
+ ((type (eql :nsenter)) remaining &key name pid uid gid)
+ (declare (ignore remaining))
+ (informat 1 "~&Entering namespaces of PID ~D with nsenter(1)" pid)
+ (make-instance 'nsenter-connection :name name :pid pid :uid uid :gid gid))
+
+(defmethod establish-connection
+ ((type (eql :systemd-machined)) remaining &key name uid gid)
+ (let ((type #+sbcl (if (lisp-connection-p) 'setns :nsenter) #-sbcl :nsenter)
+ (pid (or (loop for line in (runlines "machinectl" "show" name)
+ when (string-prefix-p "Leader=" line)
+ return (subseq line 7))
+ (error "Could not determine PID for container ~A." name))))
+ (apply #'establish-connection type remaining
+ :name name :pid pid :uid uid :gid gid
+ (and (eql 'setns type) '(:posix-type nsenter-connection)))))
+
+
+;;;; :LXC-UNPRIV-ATTACH
+
+(defclass lxc-unpriv-attach-connection
+ (linux-namespace-connection shell-wrap-connection)
+ ((owner :initarg :owner :initform nil)
+ (owner-uid :initarg :owner-uid :initform nil)))
+
+(defmethod initialize-instance :after
+ ((connection lxc-unpriv-attach-connection) &key)
+ (with-slots (owner owner-uid) connection
+ (when owner (setf owner-uid (user:passwd-entry 2 owner)))))
+
+(defmethod connection-shell-wrap
+ ((connection lxc-unpriv-attach-connection) cmd)
+ (with-slots (owner owner-uid name uid gid env) connection
+ ;; Here we reimplement lxc-unpriv-attach(1) in order to pass --quiet to
+ ;; systemd-run(1), else we get extra output on stderr.
+ (let ((args `("systemd-run" "--scope" "--quiet" "-p" "Delegate=yes"
+ "/usr/bin/lxc-attach" "-n" ,name "--clear-env"
+ ,@(loop for env in env collect "--set-var" collect env)
+ ,@(and uid `("-u" ,(write-to-string uid)))
+ ,@(and gid `("-g" ,(write-to-string gid)))
+ "--" "sh" "-c" ,cmd)))
+ (if (and owner (not (string= owner (get-connattr :remote-user))))
+ (with-connattrs (:remote-uid owner-uid)
+ (list* "runuser" "-u" owner "--" (apply #'systemd--user args)))
+ (apply #'systemd--user args)))))
+
+(defmethod establish-connection
+ ((type (eql :lxc-unpriv-attach)) remaining &key owner name pid uid gid)
+ (declare (ignore remaining))
+ (informat 1 "~&Entering namespaces of PID ~D with lxc-unpriv-attach(1)" pid)
+ (make-instance 'lxc-unpriv-attach-connection
+ :owner owner :name name :pid pid :uid uid :gid gid))
+
+(defmethod establish-connection
+ ((type (eql :lxc)) remaining
+ &key owner (name (and (not remaining) (get-hostname))) uid gid)
+ "Attach to the LXC named NAME and owned by OWNER, defaulting to the current
+user. Switch to UID and GID inside the LXC.
+
+When the previously established connection hop is a Lisp-type connection, this
+connection type will dump and reinvoke Lisp. Thus, connections established
+since the Lisp image was started up but before this one must not have rendered
+the original ~/.cache/common-lisp/ unreadable, or the reinvoked image will
+fail to start. For example,
+
+ (:ssh :sbcl (:lxc :name \"foo\"))
+
+and
+
+ ((:ssh :user \"root\") :sbcl (:lxc :owner \"user\" :name \"foo\"))
+
+will work but
+
+ ((:ssh :user \"root\") :sbcl (:setuid :to \"user\") (:lxc :name \"foo\"))
+
+will not. See \"Dumping and reinvoking Lisp\" in the \"Pitfalls and
+limitations\" section of the Consfigurator manual.
+
+When the current connection is a Lisp-type connection, this internally uses
+setns(2) to enter the container. See \"Connections which use setns(2) to
+enter containers\" in the Consfigurator manual for security implications."
+ (let ((type #+sbcl (if (lisp-connection-p) 'setns :lxc-unpriv-attach)
+ #-sbcl :lxc-unpriv-attach)
+ (pid (loop
+ for (lxc pid)
+ in (mapcar #'words (cdr (lxc:lxc-ls owner "-1fFNAME,PID")))
+ when (string= lxc name)
+ return pid
+ finally
+ (error "Could not determine PID for container ~A." name))))
+ (apply #'establish-connection type remaining
+ :owner owner :name name :pid pid :uid uid :gid gid
+ (and (eql 'setns type)
+ '(:posix-type lxc-unpriv-attach-connection)))))
+
;;;; 'SETNS
@@ -100,7 +192,8 @@
;;; type like :SYSTEMD-MACHINED, we mirror these semantics in our 'SETNS.
#+sbcl
(defmethod establish-connection
- ((type (eql 'setns)) remaining &key pid uid gid)
+ ((type (eql 'setns)) remaining
+ &rest args &key pid posix-type &allow-other-keys)
"Use setns(2) to enter the Linux namespaces of process PID. Additionally,
- If PID has a distinct user namespace and we have permission to setgroups(2)
@@ -133,7 +226,7 @@ setgroups(2) in either the starting user namespace or the target user
namespace, in each case either due to a lack of privilege or because
setgroups(2) is denied in the namespace."
(informat 1 "~&Reassociating to Linux namespaces of PID ~D" pid)
- (alet (make-instance 'nsenter-connection :pid pid :uid uid :gid gid)
+ (alet (apply #'make-instance posix-type (remove-from-plist args :posix-type))
(upload-all-prerequisite-data it)
(change-class it 'setns-connection)
(continue-connection it remaining)))
diff --git a/src/connection/shell-wrap.lisp b/src/connection/shell-wrap.lisp
index bcc8fc8..49cfd83 100644
--- a/src/connection/shell-wrap.lisp
+++ b/src/connection/shell-wrap.lisp
@@ -23,7 +23,8 @@
(defgeneric connection-shell-wrap (connection cmd))
(defmethod connection-run ((c shell-wrap-connection) cmd input)
- (mrun :may-fail :input input (connection-shell-wrap c cmd)))
+ (apply #'mrun :may-fail :input input
+ (ensure-cons (connection-shell-wrap c cmd))))
(defun %readfile (c path &optional delete)
(multiple-value-bind (out exit)
diff --git a/src/data.lisp b/src/data.lisp
index 4962ae0..1b74338 100644
--- a/src/data.lisp
+++ b/src/data.lisp
@@ -469,7 +469,17 @@ properties, or data sources which return objects referencing existing files."
:version (caddr triple))))
(defun get-local-data-cache-dir ()
- (merge-pathnames "consfigurator/data/" (uiop:xdg-cache-home)))
+ (merge-pathnames
+ "data/"
+ ;; A combinator like WITH-HOMEDIR might have temporarily set the HOME
+ ;; and/or XDG_CACHE_HOME environment variables, so use a cached value if we
+ ;; can find one.
+ (or (loop for conn = *connection* then (connection-parent conn)
+ while conn
+ when (subtypep (type-of conn) 'lisp-connection)
+ return (connection-connattr conn :consfigurator-cache))
+ (connection-connattr
+ (establish-connection :local nil) :consfigurator-cache))))
;;;; Passphrases
diff --git a/src/libc.lisp b/src/libc.lisp
index bd1bd48..41a601b 100644
--- a/src/libc.lisp
+++ b/src/libc.lisp
@@ -3,6 +3,7 @@
(include "unistd.h")
(ctype uid_t "uid_t")
+(ctype gid_t "gid_t")
#+linux
(progn
diff --git a/src/package.lisp b/src/package.lisp
index a12b008..c2d0348 100644
--- a/src/package.lisp
+++ b/src/package.lisp
@@ -86,6 +86,7 @@
;; libc.lisp
#:uid_t
+ #:gid_t
#:+CLONE_NEWCGROUP+
#:+CLONE_NEWIPC+
@@ -259,6 +260,7 @@
#:as
#:with-flagfile
#:with-unapply
+ #:with-homedir
;; host.lisp
#:host
@@ -406,8 +408,16 @@
#:capability-p))
(defpackage :consfigurator.util.linux-namespace
- (:use #:cl #:consfigurator #:consfigurator.util.posix1e #:cffi)
- (:export #:setgroups-p
+ (:use #:cl
+ #:anaphora
+ #:alexandria
+ #:consfigurator
+ #:consfigurator.util.posix1e
+ #:cffi)
+ (:export #:get-ids-offset
+ #:reduce-id-maps
+ #:shift-ids
+ #:setgroups-p
#:get-userns-owner))
(defpackage :consfigurator.property.cmd
@@ -944,12 +954,38 @@
(#:os #:consfigurator.property.os))
(:export #:has-swap-file))
+(defpackage :consfigurator.property.lxc
+ (:use #:cl
+ #:anaphora
+ #:alexandria
+ #:consfigurator
+ #:consfigurator.util.linux-namespace
+ #:cffi)
+ (:local-nicknames (#:file #:consfigurator.property.file)
+ (#:apt #:consfigurator.property.apt)
+ (#:os #:consfigurator.property.os)
+ (#:service #:consfigurator.property.service)
+ (#:chroot #:consfigurator.property.chroot)
+ (#:user #:consfigurator.property.user)
+ (#:systemd #:consfigurator.property.systemd))
+ (:export #:installed
+ #:user-container-started
+ #:when-user-container-running
+ #:user-containers-autostart
+ #:usernet-usable-by
+ #:user-container-for
+ #:user-container-for.
+ #:user-container
+ #:user-container.
+
+ #:lxc-ls))
+
(defpackage :consfigurator.connection.local
(:use #:cl #:consfigurator #:alexandria)
(:export #:local-connection))
(defpackage :consfigurator.connection.shell-wrap
- (:use #:cl #:consfigurator)
+ (:use #:cl #:alexandria #:consfigurator)
(:export #:shell-wrap-connection #:connection-shell-wrap))
(defpackage :consfigurator.connection.fork
@@ -1016,7 +1052,9 @@
#:consfigurator
#:consfigurator.util.linux-namespace
#:consfigurator.connection.fork
- #:consfigurator.connection.shell-wrap))
+ #:consfigurator.connection.shell-wrap)
+ (:local-nicknames (#:user #:consfigurator.property.user)
+ (#:lxc #:consfigurator.property.lxc)))
(defpackage :consfigurator.data.asdf
(:use #:cl #:alexandria #:consfigurator))
diff --git a/src/property/lxc.lisp b/src/property/lxc.lisp
new file mode 100644
index 0000000..4feb34f
--- /dev/null
+++ b/src/property/lxc.lisp
@@ -0,0 +1,329 @@
+;;; Consfigurator -- Lisp declarative configuration management system
+
+;;; Copyright (C) 2021 Sean Whitton <spwhitton@spwhitton.name>
+
+;;; This file is free software; you can redistribute it and/or modify
+;;; it under the terms of the GNU General Public License as published by
+;;; the Free Software Foundation; either version 3, or (at your option)
+;;; any later version.
+
+;;; This file is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+(in-package :consfigurator.property.lxc)
+(named-readtables:in-readtable :consfigurator)
+
+;;;; Properties and combinators
+
+(defproplist installed :posix ()
+ "Install the LXC userspace tools."
+ (:desc "LXC installed")
+ (os:etypecase
+ (debianlike (apt:installed "lxc"))))
+
+(defmacro default-maps-params (uid-maps-param gid-maps-param)
+ `(setq ,uid-maps-param
+ (or ,uid-maps-param
+ (list (cons 0 (multiple-value-list
+ (get-ids-offset user "/etc/subuid")))))
+ ,gid-maps-param
+ (or ,gid-maps-param
+ (list (cons 0 (multiple-value-list
+ (get-ids-offset user "/etc/subgid")))))))
+
+(defprop user-container-started :posix (host &optional owner)
+ "Ensure the LXC unprivileged container for the host designated by HOST owned
+by OWNER, defaulting to the current user, is started.
+(I.e., if HOST is a string, ensure the container named HOST is started; if
+HOST is a HOST value, start the container whose name is HOST's hostname.)"
+ (:desc #?"LXC container ${(get-hostname host)} started")
+ (:check (or (service:no-services-p) (user-container-active-p host owner)))
+ (:apply (lxc-cmd owner "lxc-unpriv-start" "-n" (get-hostname host))))
+
+(defmacro when-user-container-running ((host &key owner) &body propapps)
+ "Apply PROPAPPS only when the unprivileged LXC for the host designated by HOST
+and owned by OWNER, defaulting to the current user, is already started."
+ `(when-user-container-running*
+ ,host ,owner
+ ,(if (cdr propapps) `(eseqprops ,@propapps) (car propapps))))
+
+(define-function-property-combinator
+ when-user-container-running* (host owner propapp)
+ (macrolet ((check-running (form)
+ `(if (user-container-running-p host owner) ,form :no-change)))
+ (:retprop :type (propapptype propapp)
+ :desc (get (car propapp) 'desc)
+ :hostattrs (get (car propapp) 'hostattrs)
+ :apply (lambda-ignoring-args
+ (check-running (propappapply propapp)))
+ :unapply (lambda-ignoring-args
+ (check-running (propappunapply propapp)))
+ :args (cdr propapp))))
+
+(defproplist user-containers-autostart :posix (user)
+ "Install a systemd user unit for USER to autostart all LXC user containers
+owned by that user which have lxc.start.auto turned on. Also ensures that
+lingering is enabled for USER, so the user unit triggers at system boot.
+
+A limitation of the current implementation is that it assumes XDG_CONFIG_HOME
+is ~/.config."
+ (:desc #?"LXC autostart systemd user unit installed for ${user}")
+ (user:has-account user)
+ (systemd:lingering-enabled user)
+ (as user
+ (file:directory-exists ".config/systemd/user")
+ (file:has-content ".config/systemd/user/lxc-autostart.service"
+ '("[Unit]"
+ "Description=\"lxc-autostart\""
+
+ "[Service]"
+ "Type=oneshot"
+ "Delegate=yes"
+ "ExecStart=/usr/bin/lxc-autostart"
+ "ExecStop=/usr/bin/lxc-autostart --shutdown"
+ "RemainAfterExit=yes"
+
+ "[Install]"
+ "WantedBy=default.target"))
+ (systemd:enabled "lxc-autostart" t)))
+
+(defprop usernet-usable-by :posix
+ (user &optional (interface "lxcbr0") (count 10))
+ "Ensure that USER is allowed to attach up to COUNT unprivileged LXCs to the
+LXC-managed bridge INTERFACE.
+
+As a special case, INTERFACE may also be \"none\", which gives USER permission
+to create veth pairs where the peer outside the container is not attached to
+any bridge."
+ (:desc #?"${user} may attach LXC veth devices to ${interface}")
+ (:apply (file:map-file-lines
+ "/etc/lxc/lxc-usernet"
+ (lambda (lines)
+ (loop with done
+ and want = (format nil "~A veth ~A ~D" user interface count)
+ and prefix = (strcat user " veth " interface)
+ for line in lines
+ if (string-prefix-p prefix line)
+ unless done collect want into accum and do (setq done t)
+ end
+ else collect line into accum
+ finally (return
+ (if done accum (nconc accum (list want)))))))))
+
+(defprop %ids-shifted-for :lisp
+ (user directory uid-maps gid-maps
+ &optional
+ (rootfs
+ (merge-pathnames "rootfs/"
+ (ensure-directory-pathname directory))))
+ "Recursively shift the user and group ownership of ROOTFS according to
+UID-MAPS and GID-MAPS and chown DIRECTORY to root's UID according to UID-MAPS.
+Not idempotent! Also set the mode of DIRECTORY to 0770, as is standard for
+unprivileged LXCs."
+ (:apply
+ (default-maps-params uid-maps gid-maps)
+ (let ((dir (ensure-directory-pathname directory))
+ (uidmap (reduce-id-maps uid-maps))
+ (gidmap (reduce-id-maps gid-maps)))
+ (handler-bind ((serious-condition
+ ;; Don't leave a partially-shifted tree.
+ (lambda-ignoring-args (delete-remote-trees rootfs))))
+ (shift-ids rootfs uidmap gidmap))
+ ;; Don't see how to pass (gid_t)-1 as the third argument via CFFI. Note
+ ;; that gid_t is not guaranteed to be unsigned.
+ (nix:chown dir (funcall uidmap 0) (nix:stat-gid (nix:stat dir)))
+ (nix:chmod dir #o770))))
+
+(defprop %container-config-populated :posix
+ (prelude-lines user uid-maps gid-maps directory autostart hostname
+ additional-lines)
+ (:apply
+ (default-maps-params uid-maps gid-maps)
+ (let ((uid-maps (loop for (inside outside count) in uid-maps
+ collect (format nil "lxc.idmap = u ~D ~D ~D"
+ inside outside count)))
+ (gid-maps (loop for (inside outside count) in gid-maps
+ collect (format nil "lxc.idmap = g ~D ~D ~D"
+ inside outside count)))
+ (rootfs
+ (strcat
+ "dir:"
+ (unix-namestring
+ (merge-pathnames
+ "rootfs"
+ (merge-pathnames directory (get-connattr :remote-home)))))))
+ (file:has-content (merge-pathnames "config" directory)
+ (append prelude-lines uid-maps gid-maps
+ (list (strcat "lxc.rootfs.path = " rootfs)
+ (strcat "lxc.start.auto = " (if autostart "1" "0"))
+ (strcat "lxc.uts.name = " hostname))
+ additional-lines)
+ :mode #o640))))
+
+(defpropspec user-container-for :lisp
+ (options user host &optional additional-properties
+ &aux (host* (preprocess-host
+ (make-child-host
+ :hostattrs (hostattrs host)
+ :propspec (host-propspec
+ (union-propspec-into-host
+ host additional-properties))))))
+ "Build an unprivileged, non-system-wide LXC container for HOST.
+Must be applied using a connection chain which grants root access, primarily
+for the sake of bootstrapping the container's root filesystem. Once built,
+however, the container will be launched by USER, which should be non-root.
+
+If the container has already been bootstrapped and is running at the time this
+property is applied, enter the container and apply all its properties.
+
+OPTIONS is a plist of keyword parameters:
+
+ - :AUTOSTART -- Lisp boolean corresponding to lxc.start.auto in the
+ container's config file, and also determines whether applying this
+ property attempts to start the container. Defaults to nil. See also
+ LXC:USER-CONTAINERS-AUTOSTART.
+
+ - :PRELUDE-LINES -- additional lines to prepend to the container's
+ configuration file, before the lines generated by this property. See
+ lxc.container.conf(5). The default value is usually sufficient; if you
+ add lines, you will probably want to include the lines from the default
+ value too.
+
+ - :ADDITIONAL-LINES -- additional lines to append to the container's
+ configuration file, after the lines generated by this property. See
+ lxc.container.conf(5). In most cases you will need to include, at a
+ minimum, lines setting up a network interface for the container. The
+ default value serves as an example of a standard way to do this; if you
+ use them unmodified, you will also need to apply LXC:USERNET-USABLE-BY for
+ USER before this property.
+
+ - :UID-MAPS -- a list of the form (INSIDE OUTSIDE COUNT), or a list of such
+ lists, specifying the subordinate UIDs for the container's user namespace.
+ OUTSIDE is the beginning of a UID range, as seen from outside the
+ container, and INSIDE is the UID that OUTSIDE is mapped to, as seen from
+ inside the container. COUNT is the number of consecutive UIDs mapped.
+ This property will ensure that USER has permission to use that range of
+ UIDs by updating /etc/subuid if necessary.
+
+ As a special case, if NIL, instead use the first range of UIDs assigned to
+ USER in /etc/subuid, with a value of zero for INSIDE, and do not modify
+ /etc/subuid. (If you want to use the first range of UIDs assigned to USER
+ in /etc/subuid and also other ranges, you must specify them all explicitly
+ and cannot rely on this special case.)
+
+ It is usually sufficient not to specify this parameter, as distribution
+ scripts automatically add an entry to /etc/subuid for each regular user,
+ and most containers use a value of zero for INSIDE.
+
+ - :GID-MAPS -- as :UID-MAPS, but for GIDs and /etc/subgid.
+
+ - :CHROOT-OPTIONS -- passed on to CHROOT:OS-BOOTSTRAPPED-FOR, which see.
+
+A limitation of the current implementation is the the root filesystem of the
+container is always created under ~/.local/share/lxc/HOSTNAME where HOSTNAME
+is the hostname of HOST, ignoring any configured XDG_DATA_HOME for USER.
+
+Internally we use setns(2) to enter the container. See \"Connections which
+use setns(2) to enter containers\" in the Consfigurator manual for security
+implications."
+ (:desc #?"LXC container for ${(get-hostname host*)} configured")
+ ;; Same hostname probably means that the container HOST inherited the
+ ;; container host's hostname as one was not explicitly set; probably a
+ ;; mistake.
+ (when (string= (get-hostname host*) (get-hostname))
+ (aborted-change "LXC container has same hostname as container host."))
+ (destructuring-bind
+ (&key chroot-options autostart uid-maps gid-maps
+ (prelude-lines '("lxc.include = /usr/share/lxc/config/common.conf"
+ "lxc.include = /usr/share/lxc/config/userns.conf"))
+ (additional-lines '("lxc.net.0.type = veth"
+ "lxc.net.0.flags = up"
+ "lxc.net.0.link = lxcbr0"))
+ &aux
+ (directory
+ (ensure-directory-pathname
+ (merge-pathnames (get-hostname host*) ".local/share/lxc/")))
+ (rootfs (merge-pathnames "rootfs/" directory))
+ (uid-maps (if (listp (car uid-maps)) uid-maps (list uid-maps)))
+ (gid-maps (if (listp (car gid-maps)) gid-maps (list gid-maps))))
+ options
+ `(eseqprops
+ (installed)
+ (user:has-account ,user)
+ (systemd:lingering-enabled ,user) ; required for lxc-ls(1) to work at all
+ ,@(aand (loop for (inside outside count) in uid-maps
+ collect (format nil "~A:~D:~D" user outside count))
+ `((desc ,#?"/etc/subuid has mappings for ${(get-hostname host*)}"
+ (file:contains-lines "/etc/subuid" ,@it))))
+ ,@(aand (loop for (inside outside count) in gid-maps
+ collect (format nil "~A:~D:~D" user outside count))
+ `((desc ,#?"/etc/subgid has mappings for ${(get-hostname host*)}"
+ (file:contains-lines "/etc/subgid" ,@it))))
+ ,(propapp (desc "Base directory for container exists"
+ (as user (file:directory-exists directory))))
+ (with-homedir (:user ,user)
+ (with-flagfile ,(merge-pathnames "rootfs.bootstrapped" directory)
+ ;; It would be nice to branch here such that if we are about to
+ ;; start up the container and enter it, just bootstrap a minimal
+ ;; root filesystem, and only otherwise get all the other properties
+ ;; applied before the ID shifting. I.e.
+ ;;
+ ;; (chroot:os-bootstrapped-for
+ ;; ,chroot-options ,rootfs
+ ;; ,@(if autostart
+ ;; `(,(make-host :hostattrs
+ ;; (list :os (get-hostattrs :os host*))))
+ ;; `(,host ,additional-properties)))
+ ;;
+ ;; However, it might be that we need to apply the other properties
+ ;; in order that the container is startable; for example, getting
+ ;; systemd installed.
+ (chroot:os-bootstrapped-for
+ ,chroot-options ,rootfs ,host ,additional-properties)
+ (%ids-shifted-for ,user ,directory ,uid-maps ,gid-maps)))
+ ,(propapp
+ (desc "Container configuration file populated"
+ (as user
+ (%container-config-populated
+ prelude-lines user uid-maps gid-maps directory autostart
+ (car (split-string (get-hostname host*) :separator "."))
+ additional-lines))))
+ ,@(and autostart `((user-container-started ,host ,user)))
+ (when-user-container-running (,host :owner ,user)
+ (deploys ((:lxc :owner ,user :name ,(get-hostname host*)))
+ ,host ,additional-properties)))))
+
+(defproplist user-container :lisp (options user properties)
+ "Like LXC:USER-CONTAINER-FOR but define a new host using PROPERTIES."
+ (:desc "LXC container defined")
+ (user-container-for options user (make-host :propspec properties)))
+
+
+;;;; Utility functions
+
+(defun lxc-cmd (&optional owner &rest cmd-and-args)
+ (let* ((runuser
+ (and owner (not (string= owner (get-connattr :remote-user)))))
+ (uid (if runuser
+ (user:passwd-entry 2 owner)
+ (get-connattr :remote-uid))))
+ (apply #'run :env `(:DBUS_SESSION_BUS_ADDRESS nil
+ :XDG_RUNTIME_DIR ,(format nil "/run/user/~D" uid))
+ (and runuser (list "runuser" "-u" owner "--")) cmd-and-args)))
+
+(defun lxc-ls (&optional owner &rest args)
+ "Return the lines of output from lxc-ls(1) called with ARGS and for OWNER."
+ (lines (apply #'lxc-cmd owner "lxc-ls" args)))
+
+(defun user-container-active-p (host &optional owner)
+ (and (not (service:no-services-p))
+ (memstring= (get-hostname host) (lxc-ls owner "--active"))))
+
+(defun user-container-running-p (host &optional owner)
+ (and (not (service:no-services-p))
+ (memstring= (get-hostname host) (lxc-ls owner "--running"))))
diff --git a/src/util/linux-namespace.lisp b/src/util/linux-namespace.lisp
index e362868..9acdd0b 100644
--- a/src/util/linux-namespace.lisp
+++ b/src/util/linux-namespace.lisp
@@ -18,6 +18,128 @@
(in-package :consfigurator.util.linux-namespace)
(named-readtables:in-readtable :consfigurator)
+(defun get-ids-offset (identifier file)
+ "Where IDENTIFIER is a username or uid, and FILE is structured like
+/etc/subuid and /etc/subuid (see subuid(5) and subgid(5)), return the
+numerical subordinate ID and numerical subordinate ID count for the first
+entry in FILE for IDENTIFIER."
+ (with-open-file (file file)
+ (loop with info = (osicat:user-info identifier)
+ with fields
+ = (list (cdr (assoc :name info))
+ (write-to-string (cdr (assoc :user-id info))))
+ for line = (read-line file)
+ for (field start count) = (split-string line :separator '(#\:))
+ when (memstring= field fields)
+ return (values (parse-integer start) (parse-integer count)))))
+
+(defun reduce-id-maps (id-maps)
+ "Where each of ID-MAPS is a list of three integers corresponding to the lines
+of the uid_map (resp. gid_map) of a process in a different user namespace as
+would be read by a process in the current user namespace, return a function
+which maps UIDs (resp. GIDs) in the current user namespace to UIDs
+(resp. GIDs) in the user namespace of the process. The function returns NIL,
+not 65534, for values which are unmapped.
+
+A process's uid_map & gid_map files are under /proc; see user_namespaces(7)."
+ (let ((cache (make-hash-table))
+ (sorted (sort (copy-list id-maps) #'< :key #'car)))
+ (labels ((make-subtree (limit)
+ (and (plusp limit)
+ (let* ((new-limit (floor limit 2))
+ (left (make-subtree new-limit))
+ (next (pop sorted)))
+ (destructuring-bind (inside outside count) next
+ (let ((end (1- (+ inside count))))
+ (assert (and (every #'integerp next)
+ (not (minusp inside))
+ (not (minusp outside))
+ (plusp count))
+ nil "Invalid ID map ~S" next)
+ (assert
+ (or (not sorted) (> (caar sorted) end))
+ nil "ID maps overlap: ~S & ~S" next (car sorted))
+ (list inside end (- outside inside) left
+ (make-subtree (1- (- limit new-limit)))))))))
+ (tree-lookup (id tree)
+ (and tree
+ (destructuring-bind (start end offset left right) tree
+ (cond ((> id end) (tree-lookup id right))
+ ((< id start) (tree-lookup id left))
+ (t (+ id offset))))))
+ ;; Memoisation ought to be really worthwhile because we will
+ ;; likely be looking up the same few IDs over and over (e.g. 0).
+ (cache-or-tree-lookup (id tree)
+ (multiple-value-bind (result found) (gethash id cache)
+ (if found
+ result
+ (setf (gethash id cache) (tree-lookup id tree))))))
+ (rcurry #'cache-or-tree-lookup (make-subtree (length sorted))))))
+
+(defun shift-ids (root uidmap gidmap
+ &aux (seen (make-hash-table :test #'equal)))
+ "Recursively map the ownership and POSIX ACLs of files under ROOT by applying
+the function UIDMAP to user ownership and UIDs appearing in ACLs, and the
+function GIDMAP to group ownership and GIDs appearing in ACLs. Each of UIDMAP
+and GIDMAP should return a non-negative integer or NIL for each non-negative
+integer input; in the latter case, no update will be made to the UID or GID.
+
+For example, to recursively shift the ownership and POSIX ACLs of a filesystem
+hierarchy to render it suitable for use as a root filesystem in a different
+user namespace, you might use
+
+ (shift-ids \"/var/lib/lxc/mycontainer/rootfs\"
+ (reduce-id-maps '(0 100000 65536))
+ (reduce-id-maps '(0 100000 65536)))
+
+Here the list (0 100000 65536) describes the relationship between the present
+user namespace and the container's user namespace; see the docstring for
+CONSFIGURATOR.UTIL.LINUX-NAMESPACE:REDUCE-ID-MAPS and user_namespaces(7)."
+ (labels
+ ((shift (file)
+ (let* ((file (drop-trailing-slash (unix-namestring file)))
+ (stat (nix:lstat file))
+ (pair (cons (nix:stat-dev stat) (nix:stat-ino stat)))
+ (uid (nix:stat-uid stat))
+ (gid (nix:stat-gid stat))
+ (mode (nix:stat-mode stat))
+ (dirp (nix:s-isdir mode))
+ (linkp (nix:s-islnk mode)))
+ (unless (gethash pair seen)
+ (setf (gethash pair seen) t)
+ (nix:lchown file
+ (or (funcall uidmap uid) uid)
+ (or (funcall gidmap gid) gid))
+ (unless linkp
+ ;; Restore mode because chown wipes setuid/setgid.
+ (nix:chmod file mode)
+ ;; Now do the ACL shifts; directories have two.
+ (shift-acl file +ACL-TYPE-ACCESS+)
+ (when dirp (shift-acl file +ACL-TYPE-DEFAULT+)))
+ (when (and dirp (not linkp))
+ (mapc #'shift (directory-contents file))))))
+ (shift-acl (file type)
+ (with-acl-free (acl (acl-get-file file type))
+ (with-foreign-objects
+ ((uid 'uid_t) (gid 'gid_t) (entry-p 'acl_entry_t))
+ (loop with setp
+ for etype = +ACL-FIRST-ENTRY+ then +ACL-NEXT-ENTRY+
+ while (plusp (acl-get-entry acl etype entry-p))
+ for entry = (mem-ref entry-p 'acl_entry_t)
+ for tag-type = (acl-get-tag-type entry)
+ when (= tag-type +ACL-USER+)
+ do (awhen
+ (funcall uidmap (acl-get-qualifier entry 'uid_t))
+ (setf setp t (mem-ref uid 'uid_t) it)
+ (acl-set-qualifier entry uid))
+ when (= tag-type +ACL-GROUP+)
+ do (awhen
+ (funcall gidmap (acl-get-qualifier entry 'gid_t))
+ (setf setp t (mem-ref gid 'gid_t) it)
+ (acl-set-qualifier entry gid))
+ finally (when setp (acl-set-file file type acl)))))))
+ (shift (ensure-directory-pathname root))))
+
#+linux
(defun get-userns-owner (fd)
(with-foreign-object (owner 'uid_t)