diff --git a/nixos/doc/manual/release-notes/rl-2105.xml b/nixos/doc/manual/release-notes/rl-2105.xml index 0666b4300ec..2f87869fbe3 100644 --- a/nixos/doc/manual/release-notes/rl-2105.xml +++ b/nixos/doc/manual/release-notes/rl-2105.xml @@ -788,6 +788,15 @@ self: super: and use Maturin as their build tool. + + + Kubernetes has deprecated docker as container runtime. + As a consequence, the Kubernetes module now has support for configuration of custom remote container runtimes and enables containerd by default. + Note that containerd is more strict regarding container image OCI-compliance. + As an example, images with CMD or ENTRYPOINT defined as strings (not lists) will fail on containerd, while working fine on docker. + Please test your setup and container images with containerd prior to upgrading. + + diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index f91c21ad5cb..9bb81d085c9 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -1053,6 +1053,7 @@ ./testing/service-runner.nix ./virtualisation/anbox.nix ./virtualisation/container-config.nix + ./virtualisation/containerd.nix ./virtualisation/containers.nix ./virtualisation/nixos-containers.nix ./virtualisation/oci-containers.nix diff --git a/nixos/modules/services/cluster/kubernetes/apiserver.nix b/nixos/modules/services/cluster/kubernetes/apiserver.nix index 616389dfaac..a5b13215476 100644 --- a/nixos/modules/services/cluster/kubernetes/apiserver.nix +++ b/nixos/modules/services/cluster/kubernetes/apiserver.nix @@ -260,7 +260,6 @@ in account token issuer. The issuer will sign issued ID tokens with this private key. ''; - default = top.serviceAccountSigningKeyFile; type = path; }; @@ -272,7 +271,6 @@ in different files. If unspecified, --tls-private-key-file is used. Must be specified when --service-account-signing-key is provided ''; - default = top.serviceAccountKeyFile; type = path; }; diff --git a/nixos/modules/services/cluster/kubernetes/default.nix b/nixos/modules/services/cluster/kubernetes/default.nix index 3a11a6513a4..19edc338bba 100644 --- a/nixos/modules/services/cluster/kubernetes/default.nix +++ b/nixos/modules/services/cluster/kubernetes/default.nix @@ -5,6 +5,29 @@ with lib; let cfg = config.services.kubernetes; + defaultContainerdConfigFile = pkgs.writeText "containerd.toml" '' + version = 2 + root = "/var/lib/containerd/daemon" + state = "/var/run/containerd/daemon" + oom_score = 0 + + [grpc] + address = "/var/run/containerd/containerd.sock" + + [plugins."io.containerd.grpc.v1.cri"] + sandbox_image = "pause:latest" + + [plugins."io.containerd.grpc.v1.cri".cni] + bin_dir = "/opt/cni/bin" + max_conf_num = 0 + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc] + runtime_type = "io.containerd.runc.v2" + + [plugins."io.containerd.grpc.v1.cri".containerd.runtimes."io.containerd.runc.v2".options] + SystemdCgroup = true + ''; + mkKubeConfig = name: conf: pkgs.writeText "${name}-kubeconfig" (builtins.toJSON { apiVersion = "v1"; kind = "Config"; @@ -222,14 +245,9 @@ in { }) (mkIf cfg.kubelet.enable { - virtualisation.docker = { + virtualisation.containerd = { enable = mkDefault true; - - # kubernetes needs access to logs - logDriver = mkDefault "json-file"; - - # iptables must be disabled for kubernetes - extraOptions = "--iptables=false --ip-masq=false"; + configFile = mkDefault defaultContainerdConfigFile; }; }) @@ -269,7 +287,6 @@ in { users.users.kubernetes = { uid = config.ids.uids.kubernetes; description = "Kubernetes user"; - extraGroups = [ "docker" ]; group = "kubernetes"; home = cfg.dataDir; createHome = true; diff --git a/nixos/modules/services/cluster/kubernetes/flannel.nix b/nixos/modules/services/cluster/kubernetes/flannel.nix index 548ffed1ddb..3f55719027f 100644 --- a/nixos/modules/services/cluster/kubernetes/flannel.nix +++ b/nixos/modules/services/cluster/kubernetes/flannel.nix @@ -8,16 +8,6 @@ let # we want flannel to use kubernetes itself as configuration backend, not direct etcd storageBackend = "kubernetes"; - - # needed for flannel to pass options to docker - mkDockerOpts = pkgs.runCommand "mk-docker-opts" { - buildInputs = [ pkgs.makeWrapper ]; - } '' - mkdir -p $out - - # bashInteractive needed for `compgen` - makeWrapper ${pkgs.bashInteractive}/bin/bash $out/mk-docker-opts --add-flags "${pkgs.kubernetes}/bin/mk-docker-opts.sh" - ''; in { ###### interface @@ -43,43 +33,17 @@ in cniVersion = "0.3.1"; delegate = { isDefaultGateway = true; - bridge = "docker0"; + bridge = "mynet"; }; }]; }; - systemd.services.mk-docker-opts = { - description = "Pre-Docker Actions"; - path = with pkgs; [ gawk gnugrep ]; - script = '' - ${mkDockerOpts}/mk-docker-opts -d /run/flannel/docker - systemctl restart docker - ''; - serviceConfig.Type = "oneshot"; - }; - - systemd.paths.flannel-subnet-env = { - wantedBy = [ "flannel.service" ]; - pathConfig = { - PathModified = "/run/flannel/subnet.env"; - Unit = "mk-docker-opts.service"; - }; - }; - - systemd.services.docker = { - environment.DOCKER_OPTS = "-b none"; - serviceConfig.EnvironmentFile = "-/run/flannel/docker"; - }; - - # read environment variables generated by mk-docker-opts - virtualisation.docker.extraOptions = "$DOCKER_OPTS"; - networking = { firewall.allowedUDPPorts = [ 8285 # flannel udp 8472 # flannel vxlan ]; - dhcpcd.denyInterfaces = [ "docker*" "flannel*" ]; + dhcpcd.denyInterfaces = [ "mynet*" "flannel*" ]; }; services.kubernetes.pki.certs = { diff --git a/nixos/modules/services/cluster/kubernetes/kubelet.nix b/nixos/modules/services/cluster/kubernetes/kubelet.nix index 4da6efca535..ef6da26a024 100644 --- a/nixos/modules/services/cluster/kubernetes/kubelet.nix +++ b/nixos/modules/services/cluster/kubernetes/kubelet.nix @@ -23,7 +23,7 @@ let name = "pause"; tag = "latest"; contents = top.package.pause; - config.Cmd = "/bin/pause"; + config.Cmd = ["/bin/pause"]; }; kubeconfig = top.lib.mkKubeConfig "kubelet" cfg.kubeconfig; @@ -134,7 +134,7 @@ in containerRuntimeEndpoint = mkOption { description = "Endpoint at which to find the container runtime api interface/socket"; type = str; - default = "unix:///var/run/docker/containerd/containerd.sock"; + default = "unix:///var/run/containerd/containerd.sock"; }; enable = mkEnableOption "Kubernetes kubelet."; @@ -247,16 +247,24 @@ in ###### implementation config = mkMerge [ (mkIf cfg.enable { + + environment.etc."cni/net.d".source = cniConfig; + services.kubernetes.kubelet.seedDockerImages = [infraContainer]; + boot.kernel.sysctl = { + "net.bridge.bridge-nf-call-iptables" = 1; + "net.ipv4.ip_forward" = 1; + "net.bridge.bridge-nf-call-ip6tables" = 1; + }; + systemd.services.kubelet = { description = "Kubernetes Kubelet Service"; wantedBy = [ "kubernetes.target" ]; - after = [ "network.target" "kube-apiserver.service" "sockets.target" ]; + after = [ "containerd.service" "network.target" "kube-apiserver.service" ]; path = with pkgs; [ gitMinimal openssh - docker util-linux iproute ethtool @@ -266,8 +274,12 @@ in ] ++ lib.optional config.boot.zfs.enabled config.boot.zfs.package ++ top.path; preStart = '' ${concatMapStrings (img: '' - echo "Seeding docker image: ${img}" - docker load <${img} + echo "Seeding container image: ${img}" + ${if (lib.hasSuffix "gz" img) then + ''${pkgs.gzip}/bin/zcat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -'' + else + ''${pkgs.coreutils}/bin/cat "${img}" | ${pkgs.containerd}/bin/ctr -n k8s.io image import -'' + } '') cfg.seedDockerImages} rm /opt/cni/bin/* || true @@ -320,6 +332,7 @@ in ${optionalString (cfg.verbosity != null) "--v=${toString cfg.verbosity}"} \ --container-runtime=${cfg.containerRuntime} \ --container-runtime-endpoint=${cfg.containerRuntimeEndpoint} \ + --cgroup-driver=systemd \ ${cfg.extraOpts} ''; WorkingDirectory = top.dataDir; @@ -329,7 +342,7 @@ in # Allways include cni plugins services.kubernetes.kubelet.cni.packages = [pkgs.cni-plugins]; - boot.kernelModules = ["br_netfilter"]; + boot.kernelModules = ["br_netfilter" "overlay"]; services.kubernetes.kubelet.hostname = with config.networking; mkDefault (hostName + optionalString (domain != null) ".${domain}"); diff --git a/nixos/modules/services/networking/flannel.nix b/nixos/modules/services/networking/flannel.nix index 4c040112d28..32a7eb3ed69 100644 --- a/nixos/modules/services/networking/flannel.nix +++ b/nixos/modules/services/networking/flannel.nix @@ -162,10 +162,7 @@ in { NODE_NAME = cfg.nodeName; }; path = [ pkgs.iptables ]; - preStart = '' - mkdir -p /run/flannel - touch /run/flannel/docker - '' + optionalString (cfg.storageBackend == "etcd") '' + preStart = optionalString (cfg.storageBackend == "etcd") '' echo "setting network configuration" until ${pkgs.etcdctl}/bin/etcdctl set /coreos.com/network/config '${builtins.toJSON networkConfig}' do @@ -177,6 +174,7 @@ in { ExecStart = "${cfg.package}/bin/flannel"; Restart = "always"; RestartSec = "10s"; + RuntimeDirectory = "flannel"; }; }; diff --git a/nixos/modules/virtualisation/containerd.nix b/nixos/modules/virtualisation/containerd.nix new file mode 100644 index 00000000000..194276d1695 --- /dev/null +++ b/nixos/modules/virtualisation/containerd.nix @@ -0,0 +1,60 @@ +{ pkgs, lib, config, ... }: +let + cfg = config.virtualisation.containerd; + containerdConfigChecked = pkgs.runCommand "containerd-config-checked.toml" { nativeBuildInputs = [pkgs.containerd]; } '' + containerd -c ${cfg.configFile} config dump >/dev/null + ln -s ${cfg.configFile} $out + ''; +in +{ + + options.virtualisation.containerd = with lib.types; { + enable = lib.mkEnableOption "containerd container runtime"; + + configFile = lib.mkOption { + default = null; + description = "path to containerd config file"; + type = nullOr path; + }; + + args = lib.mkOption { + default = {}; + description = "extra args to append to the containerd cmdline"; + type = attrsOf str; + }; + }; + + config = lib.mkIf cfg.enable { + virtualisation.containerd.args.config = lib.mkIf (cfg.configFile != null) (toString containerdConfigChecked); + + environment.systemPackages = [pkgs.containerd]; + + systemd.services.containerd = { + description = "containerd - container runtime"; + wantedBy = [ "multi-user.target" ]; + after = [ "network.target" ]; + path = with pkgs; [ + containerd + runc + iptables + ]; + serviceConfig = { + ExecStart = ''${pkgs.containerd}/bin/containerd ${lib.concatStringsSep " " (lib.cli.toGNUCommandLine {} cfg.args)}''; + Delegate = "yes"; + KillMode = "process"; + Type = "notify"; + Restart = "always"; + RestartSec = "5"; + StartLimitBurst = "8"; + StartLimitIntervalSec = "120s"; + + # "limits" defined below are adopted from upstream: https://github.com/containerd/containerd/blob/master/containerd.service + LimitNPROC = "infinity"; + LimitCORE = "infinity"; + LimitNOFILE = "infinity"; + TasksMax = "infinity"; + OOMScoreAdjust = "-999"; + }; + }; + }; +} diff --git a/nixos/tests/kubernetes/dns.nix b/nixos/tests/kubernetes/dns.nix index 890499a0fb8..b6cd811c5ae 100644 --- a/nixos/tests/kubernetes/dns.nix +++ b/nixos/tests/kubernetes/dns.nix @@ -34,7 +34,7 @@ let name = "redis"; tag = "latest"; contents = [ pkgs.redis pkgs.bind.host ]; - config.Entrypoint = "/bin/redis-server"; + config.Entrypoint = ["/bin/redis-server"]; }; probePod = pkgs.writeText "probe-pod.json" (builtins.toJSON { @@ -55,12 +55,11 @@ let name = "probe"; tag = "latest"; contents = [ pkgs.bind.host pkgs.busybox ]; - config.Entrypoint = "/bin/tail"; + config.Entrypoint = ["/bin/tail"]; }; - extraConfiguration = { config, pkgs, ... }: { + extraConfiguration = { config, pkgs, lib, ... }: { environment.systemPackages = [ pkgs.bind.host ]; - # virtualisation.docker.extraOptions = "--dns=${config.services.kubernetes.addons.dns.clusterIp}"; services.dnsmasq.enable = true; services.dnsmasq.servers = [ "/cluster.local/${config.services.kubernetes.addons.dns.clusterIp}#53" @@ -77,7 +76,7 @@ let # prepare machine1 for test machine1.wait_until_succeeds("kubectl get node machine1.${domain} | grep -w Ready") machine1.wait_until_succeeds( - "docker load < ${redisImage}" + "${pkgs.gzip}/bin/zcat ${redisImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -" ) machine1.wait_until_succeeds( "kubectl create -f ${redisPod}" @@ -86,7 +85,7 @@ let "kubectl create -f ${redisService}" ) machine1.wait_until_succeeds( - "docker load < ${probeImage}" + "${pkgs.gzip}/bin/zcat ${probeImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -" ) machine1.wait_until_succeeds( "kubectl create -f ${probePod}" @@ -118,7 +117,7 @@ let # prepare machines for test machine1.wait_until_succeeds("kubectl get node machine2.${domain} | grep -w Ready") machine2.wait_until_succeeds( - "docker load < ${redisImage}" + "${pkgs.gzip}/bin/zcat ${redisImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -" ) machine1.wait_until_succeeds( "kubectl create -f ${redisPod}" @@ -127,7 +126,7 @@ let "kubectl create -f ${redisService}" ) machine2.wait_until_succeeds( - "docker load < ${probeImage}" + "${pkgs.gzip}/bin/zcat ${probeImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -" ) machine1.wait_until_succeeds( "kubectl create -f ${probePod}" diff --git a/nixos/tests/kubernetes/rbac.nix b/nixos/tests/kubernetes/rbac.nix index c922da515d9..3fc8ed0fbe3 100644 --- a/nixos/tests/kubernetes/rbac.nix +++ b/nixos/tests/kubernetes/rbac.nix @@ -85,7 +85,7 @@ let name = "kubectl"; tag = "latest"; contents = [ kubectl pkgs.busybox kubectlPod2 ]; - config.Entrypoint = "/bin/sh"; + config.Entrypoint = ["/bin/sh"]; }; base = { @@ -97,7 +97,7 @@ let machine1.wait_until_succeeds("kubectl get node machine1.my.zyx | grep -w Ready") machine1.wait_until_succeeds( - "docker load < ${kubectlImage}" + "${pkgs.gzip}/bin/zcat ${kubectlImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -" ) machine1.wait_until_succeeds( @@ -134,7 +134,7 @@ let machine1.wait_until_succeeds("kubectl get node machine2.my.zyx | grep -w Ready") machine2.wait_until_succeeds( - "docker load < ${kubectlImage}" + "${pkgs.gzip}/bin/zcat ${kubectlImage} | ${pkgs.containerd}/bin/ctr -n k8s.io image import -" ) machine1.wait_until_succeeds( diff --git a/pkgs/applications/networking/cluster/kubernetes/default.nix b/pkgs/applications/networking/cluster/kubernetes/default.nix index cb669615f63..c218e1b492b 100644 --- a/pkgs/applications/networking/cluster/kubernetes/default.nix +++ b/pkgs/applications/networking/cluster/kubernetes/default.nix @@ -77,8 +77,6 @@ stdenv.mkDerivation rec { cp cluster/addons/addon-manager/kube-addons.sh $out/bin/kube-addons-lib.sh - cp ${./mk-docker-opts.sh} $out/bin/mk-docker-opts.sh - for tool in kubeadm kubectl; do installShellCompletion --cmd $tool \ --bash <($out/bin/$tool completion bash) \ diff --git a/pkgs/applications/networking/cluster/kubernetes/mk-docker-opts.sh b/pkgs/applications/networking/cluster/kubernetes/mk-docker-opts.sh deleted file mode 100755 index 22a459f5134..00000000000 --- a/pkgs/applications/networking/cluster/kubernetes/mk-docker-opts.sh +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env bash - -# Copyright 2014 The Kubernetes Authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Generate Docker daemon options based on flannel env file. - -# exit on any error -set -e - -usage() { - echo "$0 [-f FLANNEL-ENV-FILE] [-d DOCKER-ENV-FILE] [-i] [-c] [-m] [-k COMBINED-KEY] - -Generate Docker daemon options based on flannel env file -OPTIONS: - -f Path to flannel env file. Defaults to /run/flannel/subnet.env - -d Path to Docker env file to write to. Defaults to /run/docker_opts.env - -i Output each Docker option as individual var. e.g. DOCKER_OPT_MTU=1500 - -c Output combined Docker options into DOCKER_OPTS var - -k Set the combined options key to this value (default DOCKER_OPTS=) - -m Do not output --ip-masq (useful for older Docker version) -" >/dev/stderr - exit 1 -} - -flannel_env="/run/flannel/subnet.env" -docker_env="/run/docker_opts.env" -combined_opts_key="DOCKER_OPTS" -indiv_opts=false -combined_opts=false -ipmasq=true -val="" - -while getopts "f:d:icmk:" opt; do - case $opt in - f) - flannel_env=$OPTARG - ;; - d) - docker_env=$OPTARG - ;; - i) - indiv_opts=true - ;; - c) - combined_opts=true - ;; - m) - ipmasq=false - ;; - k) - combined_opts_key=$OPTARG - ;; - \?) - usage - ;; - esac -done - -if [[ $indiv_opts = false ]] && [[ $combined_opts = false ]]; then - indiv_opts=true - combined_opts=true -fi - -if [[ -f "${flannel_env}" ]]; then - source "${flannel_env}" -fi - -if [[ -n "$FLANNEL_SUBNET" ]]; then - # shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below - DOCKER_OPT_BIP="--bip=$FLANNEL_SUBNET" -fi - -if [[ -n "$FLANNEL_MTU" ]]; then - # shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below - DOCKER_OPT_MTU="--mtu=$FLANNEL_MTU" -fi - -if [[ "$FLANNEL_IPMASQ" = true ]] && [[ $ipmasq = true ]]; then - # shellcheck disable=SC2034 # Variable name referenced in OPT_LOOP below - DOCKER_OPT_IPMASQ="--ip-masq=false" -fi - -eval docker_opts="\$${combined_opts_key}" -docker_opts+=" " - -echo -n "" >"${docker_env}" - -# OPT_LOOP -for opt in $(compgen -v DOCKER_OPT_); do - eval val=\$"${opt}" - - if [[ "$indiv_opts" = true ]]; then - echo "$opt=\"$val\"" >>"${docker_env}" - fi - - docker_opts+="$val " -done - -if [[ "$combined_opts" = true ]]; then - echo "${combined_opts_key}=\"${docker_opts}\"" >>"${docker_env}" -fi