commit b9a1fa30eb3296b169f51ffa8ee05513c5c1dbae Author: aszlig Date: Thu May 16 14:17:56 2013 +0200 zygote: Add support for user namespaces on Linux. The implementation is done by patching the Zygote host to execute the sandbox binary with CLONE_NEWUSER and setting the uid and gid mapping so that the child process is using uid 0 and gid 0 which map to the current user of the parent. Afterwards, the sandbox will continue as if it was called as a setuid binary. In addition, this adds new_user_namespace as an option in process_util in order to set the UID and GID mapping correctly. The reason for this is that just passing CLONE_NEWUSER to clone_flags doesn't help in LaunchProcess(), because without setting the mappings exec*() will clear the process's capability sets. If the kernel doesn't support unprivileged user namespaces and the sandbox binary doesn't have the setuid flag, the Zygote main process will run without a sandbox. This is to mimic the behaviour if no SUID sandbox binary path is set. Signed-off-by: aszlig diff --git a/base/process/launch.h b/base/process/launch.h index 45b1053..ce71418 100644 --- a/base/process/launch.h +++ b/base/process/launch.h @@ -51,6 +51,7 @@ struct LaunchOptions { new_process_group(false) #if defined(OS_LINUX) , clone_flags(0) + , new_user_namespace(false) #endif // OS_LINUX #if defined(OS_CHROMEOS) , ctrl_terminal_fd(-1) @@ -125,6 +126,9 @@ struct LaunchOptions { #if defined(OS_LINUX) // If non-zero, start the process using clone(), using flags as provided. int clone_flags; + + // If true, start the process in a new user namespace. + bool new_user_namespace; #endif // defined(OS_LINUX) #if defined(OS_CHROMEOS) diff --git a/base/process/launch_posix.cc b/base/process/launch_posix.cc index 336633c..4b50a5d 100644 --- a/base/process/launch_posix.cc +++ b/base/process/launch_posix.cc @@ -36,6 +36,13 @@ #include "base/threading/platform_thread.h" #include "base/threading/thread_restrictions.h" +#if defined(OS_LINUX) +#include +#if !defined(CLONE_NEWUSER) +#define CLONE_NEWUSER 0x10000000 +#endif +#endif + #if defined(OS_CHROMEOS) #include #endif @@ -395,8 +402,19 @@ bool LaunchProcess(const std::vector& argv, pid_t pid; #if defined(OS_LINUX) - if (options.clone_flags) { - pid = syscall(__NR_clone, options.clone_flags, 0, 0, 0); + int map_pipe_fd[2]; + int flags = options.clone_flags; + + if (options.new_user_namespace) { + flags |= CLONE_NEWUSER; + if (pipe(map_pipe_fd) < 0) { + DPLOG(ERROR) << "user namespace pipe"; + return false; + } + } + + if (options.clone_flags || options.new_user_namespace) { + pid = syscall(__NR_clone, flags, 0, 0, 0); } else #endif { @@ -409,6 +427,21 @@ bool LaunchProcess(const std::vector& argv, } else if (pid == 0) { // Child process +#if defined(OS_LINUX) + if (options.new_user_namespace) { + // Close the write end of the pipe so we get an EOF when the parent closes + // the FD. This is to avoid race conditions when the UID/GID mappings are + // written _after_ execvp(). + close(map_pipe_fd[1]); + + char dummy; + if (HANDLE_EINTR(read(map_pipe_fd[0], &dummy, 1)) != 0) { + RAW_LOG(ERROR, "Unexpected input in uid/gid mapping pipe."); + _exit(127); + } + } +#endif + // DANGER: fork() rule: in the child, if you don't end up doing exec*(), // you call _exit() instead of exit(). This is because _exit() does not // call any previously-registered (in the parent) exit handlers, which @@ -523,6 +556,40 @@ bool LaunchProcess(const std::vector& argv, _exit(127); } else { // Parent process +#if defined(OS_LINUX) + if (options.new_user_namespace) { + // We need to write UID/GID mapping here to map the current user outside + // the namespace to the root user inside the namespace in order to + // correctly "fool" the child process. + char buf[256]; + int map_fd, map_len; + + snprintf(buf, sizeof(buf), "/proc/%d/uid_map", pid); + map_fd = open(buf, O_RDWR); + DPCHECK(map_fd >= 0); + snprintf(buf, sizeof(buf), "0 %d 1", geteuid()); + map_len = strlen(buf); + if (write(map_fd, buf, map_len) != map_len) { + RAW_LOG(WARNING, "Can't write to uid_map."); + } + close(map_fd); + + snprintf(buf, sizeof(buf), "/proc/%d/gid_map", pid); + map_fd = open(buf, O_RDWR); + DPCHECK(map_fd >= 0); + snprintf(buf, sizeof(buf), "0 %d 1", getegid()); + map_len = strlen(buf); + if (write(map_fd, buf, map_len) != map_len) { + RAW_LOG(WARNING, "Can't write to gid_map."); + } + close(map_fd); + + // Close the pipe on the parent, so the child can continue doing the + // execvp() call. + close(map_pipe_fd[1]); + } +#endif + if (options.wait) { // While this isn't strictly disk IO, waiting for another process to // finish is the sort of thing ThreadRestrictions is trying to prevent. diff --git a/content/browser/zygote_host/zygote_host_impl_linux.cc b/content/browser/zygote_host/zygote_host_impl_linux.cc index bb84e62..bce0d18 100644 --- a/content/browser/zygote_host/zygote_host_impl_linux.cc +++ b/content/browser/zygote_host/zygote_host_impl_linux.cc @@ -119,25 +119,31 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) { sandbox_binary_ = sandbox_cmd.c_str(); - // A non empty sandbox_cmd means we want a SUID sandbox. - using_suid_sandbox_ = !sandbox_cmd.empty(); + bool userns_sandbox = false; + const std::vector cmd_line_unwrapped(cmd_line.argv()); - if (using_suid_sandbox_) { + if (!sandbox_cmd.empty()) { struct stat st; if (stat(sandbox_binary_.c_str(), &st) != 0) { LOG(FATAL) << "The SUID sandbox helper binary is missing: " << sandbox_binary_ << " Aborting now."; } - if (access(sandbox_binary_.c_str(), X_OK) == 0 && - (st.st_uid == 0) && - (st.st_mode & S_ISUID) && - (st.st_mode & S_IXOTH)) { + if (access(sandbox_binary_.c_str(), X_OK) == 0) { + using_suid_sandbox_ = true; + cmd_line.PrependWrapper(sandbox_binary_); scoped_ptr sandbox_client(sandbox::SetuidSandboxClient::Create()); sandbox_client->SetupLaunchEnvironment(); + + if (!((st.st_uid == 0) && + (st.st_mode & S_ISUID) && + (st.st_mode & S_IXOTH))) { + userns_sandbox = true; + sandbox_client->SetNoSuid(); + } } else { LOG(FATAL) << "The SUID sandbox helper binary was found, but is not " "configured correctly. Rather than run without sandboxing " @@ -161,7 +167,19 @@ void ZygoteHostImpl::Init(const std::string& sandbox_cmd) { base::ProcessHandle process = -1; base::LaunchOptions options; options.fds_to_remap = &fds_to_map; + if (userns_sandbox) + options.new_user_namespace = true; base::LaunchProcess(cmd_line.argv(), options, &process); + + if (process == -1 && userns_sandbox) { + LOG(ERROR) << "User namespace sandbox failed to start, running without " + << "sandbox! You need at least kernel 3.8.0 with CONFIG_USER_NS " + << "enabled in order to use the sandbox without setuid bit."; + using_suid_sandbox_ = false; + options.new_user_namespace = false; + base::LaunchProcess(cmd_line_unwrapped, options, &process); + } + CHECK(process != -1) << "Failed to launch zygote process"; if (using_suid_sandbox_) { diff --git a/content/zygote/zygote_main_linux.cc b/content/zygote/zygote_main_linux.cc index dcea4c0..c06b4ae 100644 --- a/content/zygote/zygote_main_linux.cc +++ b/content/zygote/zygote_main_linux.cc @@ -398,6 +398,13 @@ static bool EnterSandbox(sandbox::SetuidSandboxClient* setuid_sandbox, *has_started_new_init = true; } + // Don't set non-dumpable, as it causes trouble when the host tries to find + // the zygote process (XXX: Not quite sure why this happens with user + // namespaces). Fortunately, we also have the seccomp filter sandbox which + // should disallow the use of ptrace. + if (setuid_sandbox->IsNoSuid()) + return true; + #if !defined(OS_OPENBSD) // Previously, we required that the binary be non-readable. This causes the // kernel to mark the process as non-dumpable at startup. The thinking was diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.cc b/sandbox/linux/suid/client/setuid_sandbox_client.cc index 34231d4..36e3201 100644 --- a/sandbox/linux/suid/client/setuid_sandbox_client.cc +++ b/sandbox/linux/suid/client/setuid_sandbox_client.cc @@ -166,6 +166,10 @@ bool SetuidSandboxClient::IsInNewNETNamespace() const { return env_->HasVar(kSandboxNETNSEnvironmentVarName); } +bool SetuidSandboxClient::IsNoSuid() const { + return env_->HasVar(kSandboxNoSuidVarName); +} + bool SetuidSandboxClient::IsSandboxed() const { return sandboxed_; } @@ -175,5 +179,9 @@ void SetuidSandboxClient::SetupLaunchEnvironment() { SetSandboxAPIEnvironmentVariable(env_); } +void SetuidSandboxClient::SetNoSuid() { + env_->SetVar(kSandboxNoSuidVarName, "1"); +} + } // namespace sandbox diff --git a/sandbox/linux/suid/client/setuid_sandbox_client.h b/sandbox/linux/suid/client/setuid_sandbox_client.h index a9f6536..2e8113a 100644 --- a/sandbox/linux/suid/client/setuid_sandbox_client.h +++ b/sandbox/linux/suid/client/setuid_sandbox_client.h @@ -39,6 +39,8 @@ class SetuidSandboxClient { bool IsInNewPIDNamespace() const; // Did the setuid helper create a new network namespace ? bool IsInNewNETNamespace() const; + // Is sandboxed without SUID binary ? + bool IsNoSuid() const; // Are we done and fully sandboxed ? bool IsSandboxed() const; @@ -46,6 +48,8 @@ class SetuidSandboxClient { // helper. void SetupLaunchEnvironment(); + void SetNoSuid(); + private: // Holds the environment. Will never be NULL. base::Environment* env_; diff --git a/sandbox/linux/suid/common/sandbox.h b/sandbox/linux/suid/common/sandbox.h index aad4ff8..bd710d5 100644 --- a/sandbox/linux/suid/common/sandbox.h +++ b/sandbox/linux/suid/common/sandbox.h @@ -18,6 +18,7 @@ static const char kAdjustLowMemMarginSwitch[] = "--adjust-low-mem"; static const char kSandboxDescriptorEnvironmentVarName[] = "SBX_D"; static const char kSandboxHelperPidEnvironmentVarName[] = "SBX_HELPER_PID"; +static const char kSandboxNoSuidVarName[] = "SBX_NO_SUID"; static const long kSUIDSandboxApiNumber = 1; static const char kSandboxEnvironmentApiRequest[] = "SBX_CHROME_API_RQ";