nixpkgs/nixos/modules/system/activation/switch-to-configuration.pl
Oliver Charles 0d18d39e98 switch-to-configuration.pl: Handle successful auto-restarts
switch-to-configuration.pl is currently hard-coded to assume that if a
unit is in the "auto-restart" state that something has gone wrong, but
this is not strictly true. For example, I run offlineimap as a oneshot
service restarting itself every minute (on success). NixOS currently
thinks that offlineimap has failed to start as it enters the
auto-restart state, because it doesn't consider why the unit failed.

This commit changes switch-to-configuration.pl to inspect the full
status of a unit in auto-restart state, and now only considers it failed
if the ExecMainStatus is non-zero.
2014-02-02 15:56:22 +01:00

380 lines
14 KiB
Perl
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#! @perl@
use strict;
use warnings;
use File::Basename;
use File::Slurp;
use Sys::Syslog qw(:standard :macros);
use Cwd 'abs_path';
my $out = "@out@";
my $startListFile = "/run/systemd/start-list";
my $restartListFile = "/run/systemd/restart-list";
my $reloadListFile = "/run/systemd/reload-list";
my $action = shift @ARGV;
if (!defined $action || ($action ne "switch" && $action ne "boot" && $action ne "test")) {
print STDERR <<EOF;
Usage: $0 [switch|boot|test]
switch: make the configuration the boot default and activate now
boot: make the configuration the boot default
test: activate the configuration, but don\'t make it the boot default
EOF
exit 1;
}
die "This is not a NixOS installation (/etc/NIXOS is missing)!\n" unless -f "/etc/NIXOS";
openlog("nixos", "", LOG_USER);
# Install or update the bootloader.
if ($action eq "switch" || $action eq "boot") {
system("@installBootLoader@ $out") == 0 or exit 1;
}
# Just in case the new configuration hangs the system, do a sync now.
system("@coreutils@/bin/sync") unless ($ENV{"NIXOS_NO_SYNC"} // "") eq "1";
exit 0 if $action eq "boot";
# Check if we can activate the new configuration.
my $oldVersion = read_file("/run/current-system/init-interface-version", err_mode => 'quiet') // "";
my $newVersion = read_file("$out/init-interface-version");
if ($newVersion ne $oldVersion) {
print STDERR <<EOF;
Warning: the new NixOS configuration has an init that is
incompatible with the current configuration. The new configuration
won\'t take effect until you reboot the system.
EOF
exit 100;
}
syslog(LOG_NOTICE, "switching to system configuration $out");
# Ignore SIGHUP so that we're not killed if we're running on (say)
# virtual console 1 and we restart the "tty1" unit.
$SIG{PIPE} = "IGNORE";
sub getActiveUnits {
# FIXME: use D-Bus or whatever to query this, since parsing the
# output of list-units is likely to break.
my $lines = `@systemd@/bin/systemctl list-units --full`;
my $res = {};
foreach my $line (split '\n', $lines) {
chomp $line;
last if $line eq "";
$line =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s/ or next;
next if $1 eq "UNIT";
$res->{$1} = { load => $2, state => $3, substate => $4 };
}
return $res;
}
sub parseFstab {
my ($filename) = @_;
my ($fss, $swaps);
foreach my $line (read_file($filename, err_mode => 'quiet')) {
chomp $line;
$line =~ s/^\s*#.*//;
next if $line =~ /^\s*$/;
my @xs = split / /, $line;
if ($xs[2] eq "swap") {
$swaps->{$xs[0]} = { options => $xs[3] // "" };
} else {
$fss->{$xs[1]} = { device => $xs[0], fsType => $xs[2], options => $xs[3] // "" };
}
}
return ($fss, $swaps);
}
sub parseUnit {
my ($filename) = @_;
parseKeyValues(read_file($filename));
}
sub parseKeyValues {
my @lines = @_;
my $info = {};
foreach my $line (@_) {
# FIXME: not quite correct.
$line =~ /^([^=]+)=(.*)$/ or next;
$info->{$1} = $2;
}
return $info;
}
sub boolIsTrue {
my ($s) = @_;
return $s eq "yes" || $s eq "true";
}
# Stop all services that no longer exist or have changed in the new
# configuration.
my (@unitsToStop, @unitsToSkip);
my $activePrev = getActiveUnits;
while (my ($unit, $state) = each %{$activePrev}) {
my $baseUnit = $unit;
# Recognise template instances.
$baseUnit = "$1\@.$2" if $unit =~ /^(.*)@[^\.]*\.(.*)$/;
my $prevUnitFile = "/etc/systemd/system/$baseUnit";
my $newUnitFile = "$out/etc/systemd/system/$baseUnit";
my $baseName = $baseUnit;
$baseName =~ s/\.[a-z]*$//;
if (-e $prevUnitFile && ($state->{state} eq "active" || $state->{state} eq "activating")) {
if (! -e $newUnitFile) {
push @unitsToStop, $unit;
}
elsif ($unit =~ /\.target$/) {
my $unitInfo = parseUnit($newUnitFile);
# Cause all active target units to be restarted below.
# This should start most changed units we stop here as
# well as any new dependencies (including new mounts and
# swap devices). FIXME: the suspend target is sometimes
# active after the system has resumed, which probably
# should not be the case. Just ignore it.
if ($unit ne "suspend.target" && $unit ne "hibernate.target" && $unit ne "hybrid-sleep.target") {
unless (boolIsTrue($unitInfo->{'RefuseManualStart'} // "no")) {
write_file($startListFile, { append => 1 }, "$unit\n");
}
}
# Stop targets that have X-StopOnReconfiguration set.
# This is necessary to respect dependency orderings
# involving targets: if unit X starts after target Y and
# target Y starts after unit Z, then if X and Z have both
# changed, then X should be restarted after Z. However,
# if target Y is in the "active" state, X and Z will be
# restarted at the same time because X's dependency on Y
# is already satisfied. Thus, we need to stop Y first.
# Stopping a target generally has no effect on other units
# (unless there is a PartOf dependency), so this is just a
# bookkeeping thing to get systemd to do the right thing.
if (boolIsTrue($unitInfo->{'X-StopOnReconfiguration'} // "no")) {
push @unitsToStop, $unit;
}
}
elsif (abs_path($prevUnitFile) ne abs_path($newUnitFile)) {
if ($unit eq "sysinit.target" || $unit eq "basic.target" || $unit eq "multi-user.target" || $unit eq "graphical.target") {
# Do nothing. These cannot be restarted directly.
} elsif ($unit =~ /\.mount$/) {
# Reload the changed mount unit to force a remount.
write_file($reloadListFile, { append => 1 }, "$unit\n");
} elsif ($unit =~ /\.socket$/ || $unit =~ /\.path$/) {
# FIXME: do something?
} else {
my $unitInfo = parseUnit($newUnitFile);
if (!boolIsTrue($unitInfo->{'X-RestartIfChanged'} // "yes")) {
push @unitsToSkip, $unit;
} else {
# If this unit is socket-activated, then stop the
# socket unit(s) as well, and restart the
# socket(s) instead of the service.
my $socketActivated = 0;
if ($unit =~ /\.service$/) {
my @sockets = split / /, ($unitInfo->{Sockets} // "");
if (scalar @sockets == 0) {
@sockets = ("$baseName.socket");
}
foreach my $socket (@sockets) {
if (defined $activePrev->{$socket}) {
push @unitsToStop, $socket;
write_file($startListFile, { append => 1 }, "$socket\n");
$socketActivated = 1;
}
}
}
if (!boolIsTrue($unitInfo->{'X-StopIfChanged'} // "yes")) {
# This unit should be restarted instead of
# stopped and started.
write_file($restartListFile, { append => 1 }, "$unit\n");
} else {
# If the unit is not socket-activated, record
# that this unit needs to be started below.
# We write this to a file to ensure that the
# service gets restarted if we're interrupted.
if (!$socketActivated) {
write_file($startListFile, { append => 1 }, "$unit\n");
}
push @unitsToStop, $unit;
}
}
}
}
}
}
sub pathToUnitName {
my ($path) = @_;
die unless substr($path, 0, 1) eq "/";
return "-" if $path eq "/";
$path = substr($path, 1);
$path =~ s/\//-/g;
# FIXME: handle - and unprintable characters.
return $path;
}
sub unique {
my %seen;
my @res;
foreach my $name (@_) {
next if $seen{$name};
$seen{$name} = 1;
push @res, $name;
}
return @res;
}
# Compare the previous and new fstab to figure out which filesystems
# need a remount or need to be unmounted. New filesystems are mounted
# automatically by starting local-fs.target. FIXME: might be nicer if
# we generated units for all mounts; then we could unify this with the
# unit checking code above.
my ($prevFss, $prevSwaps) = parseFstab "/etc/fstab";
my ($newFss, $newSwaps) = parseFstab "$out/etc/fstab";
foreach my $mountPoint (keys %$prevFss) {
my $prev = $prevFss->{$mountPoint};
my $new = $newFss->{$mountPoint};
my $unit = pathToUnitName($mountPoint) . ".mount";
if (!defined $new) {
# Filesystem entry disappeared, so unmount it.
push @unitsToStop, $unit;
} elsif ($prev->{fsType} ne $new->{fsType} || $prev->{device} ne $new->{device}) {
# Filesystem type or device changed, so unmount and mount it.
write_file($startListFile, { append => 1 }, "$unit\n");
push @unitsToStop, $unit;
} elsif ($prev->{options} ne $new->{options}) {
# Mount options changes, so remount it.
write_file($reloadListFile, { append => 1 }, "$unit\n");
}
}
# Also handles swap devices.
foreach my $device (keys %$prevSwaps) {
my $prev = $prevSwaps->{$device};
my $new = $newSwaps->{$device};
if (!defined $new) {
# Swap entry disappeared, so turn it off. Can't use
# "systemctl stop" here because systemd has lots of alias
# units that prevent a stop from actually calling
# "swapoff".
print STDERR "stopping swap device: $device\n";
system("@utillinux@/sbin/swapoff", $device);
}
# FIXME: update swap options (i.e. its priority).
}
if (scalar @unitsToStop > 0) {
@unitsToStop = unique(@unitsToStop);
print STDERR "stopping the following units: ", join(", ", sort(@unitsToStop)), "\n";
system("@systemd@/bin/systemctl", "stop", "--", @unitsToStop); # FIXME: ignore errors?
}
print STDERR "NOT restarting the following units: ", join(", ", sort(@unitsToSkip)), "\n"
if scalar @unitsToSkip > 0;
# Activate the new configuration (i.e., update /etc, make accounts,
# and so on).
my $res = 0;
print STDERR "activating the configuration...\n";
system("$out/activate", "$out") == 0 or $res = 2;
# Restart systemd if necessary.
if (abs_path("/proc/1/exe") ne abs_path("@systemd@/lib/systemd/systemd")) {
print STDERR "restarting systemd...\n";
system("@systemd@/bin/systemctl", "daemon-reexec") == 0 or $res = 2;
}
# Forget about previously failed services.
system("@systemd@/bin/systemctl", "reset-failed");
# Make systemd reload its units.
system("@systemd@/bin/systemctl", "daemon-reload") == 0 or $res = 3;
# Restart changed services (those that have to be restarted rather
# than stopped and started).
my @restart = unique(split('\n', read_file($restartListFile, err_mode => 'quiet') // ""));
if (scalar @restart > 0) {
print STDERR "restarting the following units: ", join(", ", sort(@restart)), "\n";
system("@systemd@/bin/systemctl", "restart", "--", @restart) == 0 or $res = 4;
unlink($restartListFile);
}
# Start all active targets, as well as changed units we stopped above.
# The latter is necessary because some may not be dependencies of the
# targets (i.e., they were manually started). FIXME: detect units
# that are symlinks to other units. We shouldn't start both at the
# same time because we'll get a "Failed to add path to set" error from
# systemd.
my @start = unique("default.target", "timers.target", split('\n', read_file($startListFile, err_mode => 'quiet') // ""));
print STDERR "starting the following units: ", join(", ", sort(@start)), "\n";
system("@systemd@/bin/systemctl", "start", "--", @start) == 0 or $res = 4;
unlink($startListFile);
# Reload units that need it. This includes remounting changed mount
# units.
my @reload = unique(split '\n', read_file($reloadListFile, err_mode => 'quiet') // "");
if (scalar @reload > 0) {
print STDERR "reloading the following units: ", join(", ", sort(@reload)), "\n";
system("@systemd@/bin/systemctl", "reload", "--", @reload) == 0 or $res = 4;
unlink($reloadListFile);
}
# Signal dbus to reload its configuration.
system("@systemd@/bin/systemctl", "reload", "dbus.service");
# Print failed and new units.
my (@failed, @new, @restarting);
my $activeNew = getActiveUnits;
while (my ($unit, $state) = each %{$activeNew}) {
if ($state->{state} eq "failed") {
push @failed, $unit;
}
elsif ($state->{state} eq "auto-restart") {
# A unit in auto-restart state is a failure *if* it previously failed to start
my $lines = `@systemd@/bin/systemctl show '$unit'`;
my $info = parseKeyValues(split "\n", $lines);
if ($info->{ExecMainStatus} ne '0') {
push @failed, $unit;
}
}
elsif ($state->{state} ne "failed" && !defined $activePrev->{$unit}) {
push @new, $unit;
}
}
print STDERR "the following new units were started: ", join(", ", sort(@new)), "\n"
if scalar @new > 0;
if (scalar @failed > 0) {
print STDERR "warning: the following units failed: ", join(", ", sort(@failed)), "\n";
foreach my $unit (@failed) {
print STDERR "\n";
system("COLUMNS=1000 @systemd@/bin/systemctl status --no-pager '$unit' >&2");
}
$res = 4;
}
if ($res == 0) {
syslog(LOG_NOTICE, "finished switching to system configuration $out");
} else {
syslog(LOG_ERR, "switching to system configuration $out failed (status $res)");
}
exit $res;