{ config, lib, pkgs, ... }: with lib; let nixos-container = pkgs.substituteAll { name = "nixos-container"; dir = "bin"; isExecutable = true; src = ./nixos-container.pl; perl = "${pkgs.perl}/bin/perl -I${pkgs.perlPackages.FileSlurp}/lib/perl5/site_perl"; inherit (pkgs) socat; }; # The container's init script, a small wrapper around the regular # NixOS stage-2 init script. containerInit = pkgs.writeScript "container-init" '' #! ${pkgs.stdenv.shell} -e # Initialise the container side of the veth pair. if [ "$PRIVATE_NETWORK" = 1 ]; then ip link set host0 name eth0 ip link set dev eth0 up if [ -n "$HOST_ADDRESS" ]; then ip route add $HOST_ADDRESS dev eth0 ip route add default via $HOST_ADDRESS fi if [ -n "$LOCAL_ADDRESS" ]; then ip addr add $LOCAL_ADDRESS dev eth0 fi fi exec "$1" ''; system = config.nixpkgs.system; in { options = { boot.isContainer = mkOption { type = types.bool; default = false; description = '' Whether this NixOS machine is a lightweight container running in another NixOS system. ''; }; containers = mkOption { type = types.attrsOf (types.submodule ( { config, options, name, ... }: { options = { config = mkOption { description = '' A specification of the desired configuration of this container, as a NixOS module. ''; }; path = mkOption { type = types.path; example = "/nix/var/nix/profiles/containers/webserver"; description = '' As an alternative to specifying , you can specify the path to the evaluated NixOS system configuration, typically a symlink to a system profile. ''; }; privateNetwork = mkOption { type = types.bool; default = false; description = '' Whether to give the container its own private virtual Ethernet interface. The interface is called eth0, and is hooked up to the interface ve-container-name on the host. If this option is not set, then the container shares the network interfaces of the host, and can bind to any port on any interface. ''; }; hostAddress = mkOption { type = types.nullOr types.string; default = null; example = "10.231.136.1"; description = '' The IPv4 address assigned to the host interface. ''; }; localAddress = mkOption { type = types.nullOr types.string; default = null; example = "10.231.136.2"; description = '' The IPv4 address assigned to eth0 in the container. ''; }; }; config = mkMerge [ (mkIf options.config.isDefined { path = (import ../../lib/eval-config.nix { inherit system; modules = let extraConfig = { boot.isContainer = true; networking.hostName = mkDefault name; networking.useDHCP = false; }; in [ extraConfig config.config ]; prefix = [ "containers" name ]; }).config.system.build.toplevel; }) ]; })); default = {}; example = literalExample '' { webserver = { path = "/nix/var/nix/profiles/webserver"; }; database = { config = { config, pkgs, ... }: { services.postgresql.enable = true; services.postgresql.package = pkgs.postgresql92; }; }; } ''; description = '' A set of NixOS system configurations to be run as lightweight containers. Each container appears as a service container-name on the host system, allowing it to be started and stopped via systemctl . ''; }; }; config = mkIf (!config.boot.isContainer) { systemd.services."container@" = { description = "Container '%i'"; unitConfig.RequiresMountsFor = [ "/var/lib/containers/%i" ]; path = [ pkgs.iproute ]; environment.INSTANCE = "%i"; environment.root = "/var/lib/containers/%i"; preStart = '' # Clean up existing machined registration and interfaces. machinectl terminate "$INSTANCE" 2> /dev/null || true if [ "$PRIVATE_NETWORK" = 1 ]; then ip link del dev "ve-$INSTANCE" 2> /dev/null || true fi mkdir -p -m 0755 $root/var/lib # Create a named pipe to get a signal when the container # has finished booting. rm -f $root/var/lib/startup-done mkfifo -m 0600 $root/var/lib/startup-done ''; script = '' mkdir -p -m 0755 "$root/etc" "$root/var/lib" if ! [ -e "$root/etc/os-release" ]; then touch "$root/etc/os-release" fi mkdir -p -m 0755 \ "/nix/var/nix/profiles/per-container/$INSTANCE" \ "/nix/var/nix/gcroots/per-container/$INSTANCE" if [ "$PRIVATE_NETWORK" = 1 ]; then extraFlags+=" --network-veth" fi for iface in $MACVLANS; do extraFlags+=" --network-macvlan=$iface" done # If the host is 64-bit and the container is 32-bit, add a # --personality flag. ${optionalString (config.nixpkgs.system == "x86_64-linux") '' if [ "$(< ''${SYSTEM_PATH:-/nix/var/nix/profiles/per-container/$INSTANCE/system}/system)" = i686-linux ]; then extraFlags+=" --personality=x86" fi ''} EXIT_ON_REBOOT=1 \ exec ${config.systemd.package}/bin/systemd-nspawn \ --keep-unit \ -M "$INSTANCE" -D "$root" $extraFlags \ --bind-ro=/nix/store \ --bind-ro=/nix/var/nix/db \ --bind-ro=/nix/var/nix/daemon-socket \ --bind="/nix/var/nix/profiles/per-container/$INSTANCE:/nix/var/nix/profiles" \ --bind="/nix/var/nix/gcroots/per-container/$INSTANCE:/nix/var/nix/gcroots" \ --setenv PRIVATE_NETWORK="$PRIVATE_NETWORK" \ --setenv HOST_ADDRESS="$HOST_ADDRESS" \ --setenv LOCAL_ADDRESS="$LOCAL_ADDRESS" \ --setenv PATH="$PATH" \ ${containerInit} "''${SYSTEM_PATH:-/nix/var/nix/profiles/system}/init" ''; postStart = '' if [ "$PRIVATE_NETWORK" = 1 ]; then ifaceHost=ve-$INSTANCE ip link set dev $ifaceHost up if [ -n "$HOST_ADDRESS" ]; then ip addr add $HOST_ADDRESS dev $ifaceHost fi if [ -n "$LOCAL_ADDRESS" ]; then ip route add $LOCAL_ADDRESS dev $ifaceHost fi fi # This blocks until the container-startup-done service # writes something to this pipe. FIXME: it also hangs # until the start timeout expires if systemd-nspawn exits. read x < $root/var/lib/startup-done rm -f $root/var/lib/startup-done ''; preStop = '' machinectl poweroff "$INSTANCE" || true ''; restartIfChanged = false; #reloadIfChanged = true; # FIXME serviceConfig = { ExecReload = pkgs.writeScript "reload-container" '' #! ${pkgs.stdenv.shell} -e SYSTEM_PATH=/nix/var/nix/profiles/system echo $SYSTEM_PATH/bin/switch-to-configuration test | \ ${pkgs.socat}/bin/socat unix:$root/var/lib/run-command.socket - ''; SyslogIdentifier = "container %i"; EnvironmentFile = "-/etc/containers/%i.conf"; Type = "notify"; # Note that on reboot, systemd-nspawn returns 10, so this # unit will be restarted. On poweroff, it returns 0, so the # unit won't be restarted. Restart = "on-failure"; # Hack: we don't want to kill systemd-nspawn, since we call # "machinectl poweroff" in preStop to shut down the # container cleanly. But systemd requires sending a signal # (at least if we want remaining processes to be killed # after the timeout). So send an ignored signal. KillMode = "mixed"; KillSignal = "WINCH"; }; }; # Generate a configuration file in /etc/containers for each # container so that container@.target can get the container # configuration. environment.etc = mapAttrs' (name: cfg: nameValuePair "containers/${name}.conf" { text = '' SYSTEM_PATH=${cfg.path} ${optionalString cfg.privateNetwork '' PRIVATE_NETWORK=1 ${optionalString (cfg.hostAddress != null) '' HOST_ADDRESS=${cfg.hostAddress} ''} ${optionalString (cfg.localAddress != null) '' LOCAL_ADDRESS=${cfg.localAddress} ''} ''} ''; }) config.containers; # FIXME: auto-start containers. # Generate /etc/hosts entries for the containers. networking.extraHosts = concatStrings (mapAttrsToList (name: cfg: optionalString (cfg.localAddress != null) '' ${cfg.localAddress} ${name}.containers '') config.containers); networking.dhcpcd.denyInterfaces = [ "ve-*" ]; environment.systemPackages = [ nixos-container ]; }; }