357 lines
9.9 KiB
C++
357 lines
9.9 KiB
C++
/*
|
|
|
|
nsjail - isolating the binary
|
|
-----------------------------------------
|
|
|
|
Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
#include "contain.h"
|
|
|
|
#include <dirent.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <inttypes.h>
|
|
#include <limits.h>
|
|
#include <signal.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/personality.h>
|
|
#include <sys/prctl.h>
|
|
#include <sys/resource.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
|
|
#include "caps.h"
|
|
#include "cgroup.h"
|
|
#include "cpu.h"
|
|
#include "logs.h"
|
|
#include "macros.h"
|
|
#include "mnt.h"
|
|
#include "net.h"
|
|
#include "pid.h"
|
|
#include "user.h"
|
|
#include "util.h"
|
|
#include "uts.h"
|
|
|
|
namespace contain {
|
|
|
|
static bool containUserNs(nsjconf_t* nsjconf) {
|
|
return user::initNsFromChild(nsjconf);
|
|
}
|
|
|
|
static bool containInitPidNs(nsjconf_t* nsjconf) {
|
|
return pid::initNs(nsjconf);
|
|
}
|
|
|
|
static bool containInitNetNs(nsjconf_t* nsjconf) {
|
|
return net::initNsFromChild(nsjconf);
|
|
}
|
|
|
|
static bool containInitUtsNs(nsjconf_t* nsjconf) {
|
|
return uts::initNs(nsjconf);
|
|
}
|
|
|
|
static bool containInitCgroupNs(void) {
|
|
return cgroup::initNs();
|
|
}
|
|
|
|
static bool containDropPrivs(nsjconf_t* nsjconf) {
|
|
#ifndef PR_SET_NO_NEW_PRIVS
|
|
#define PR_SET_NO_NEW_PRIVS 38
|
|
#endif
|
|
if (!nsjconf->disable_no_new_privs) {
|
|
if (prctl(PR_SET_NO_NEW_PRIVS, 1UL, 0UL, 0UL, 0UL) == -1) {
|
|
/* Only new kernels support it */
|
|
PLOG_W("prctl(PR_SET_NO_NEW_PRIVS, 1)");
|
|
}
|
|
}
|
|
|
|
if (!caps::initNs(nsjconf)) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool containPrepareEnv(nsjconf_t* nsjconf) {
|
|
if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) == -1) {
|
|
PLOG_E("prctl(PR_SET_PDEATHSIG, SIGKILL)");
|
|
return false;
|
|
}
|
|
if (nsjconf->personality && personality(nsjconf->personality) == -1) {
|
|
PLOG_E("personality(%lx)", nsjconf->personality);
|
|
return false;
|
|
}
|
|
LOG_D("setpriority(%d)", nsjconf->nice_level);
|
|
errno = 0;
|
|
if (setpriority(PRIO_PROCESS, 0, nsjconf->nice_level) == -1 && errno != 0) {
|
|
PLOG_W("setpriority(%d)", nsjconf->nice_level);
|
|
}
|
|
if (!nsjconf->skip_setsid) {
|
|
setsid();
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool containInitMountNs(nsjconf_t* nsjconf) {
|
|
return mnt::initNs(nsjconf);
|
|
}
|
|
|
|
static bool containCPU(nsjconf_t* nsjconf) {
|
|
return cpu::initCpu(nsjconf);
|
|
}
|
|
|
|
static bool containTSC(nsjconf_t* nsjconf) {
|
|
if (nsjconf->disable_tsc) {
|
|
#if defined(__x86_64__) || defined(__i386__)
|
|
if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV, 0, 0, 0) == -1) {
|
|
PLOG_E("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)");
|
|
return false;
|
|
}
|
|
#else /* defined(__x86_64__) || defined(__i386__) */
|
|
LOG_W("prctl(PR_SET_TSC, PR_TSC_SIGSEGV) requested, but it's supported under "
|
|
"x86/x86-64 CPU architectures only. Ignoring it!");
|
|
#endif /* defined(__x86_64__) || defined(__i386__) */
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool containSetLimits(nsjconf_t* nsjconf) {
|
|
if (nsjconf->disable_rl) {
|
|
return true;
|
|
}
|
|
|
|
struct rlimit64 rl;
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_as;
|
|
if (util::setrlimit(RLIMIT_AS, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_AS, %" PRIu64 ")", nsjconf->rl_as);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_core;
|
|
if (util::setrlimit(RLIMIT_CORE, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_CORE, %" PRIu64 ")", nsjconf->rl_core);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_cpu;
|
|
if (util::setrlimit(RLIMIT_CPU, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_CPU, %" PRIu64 ")", nsjconf->rl_cpu);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_fsize;
|
|
if (util::setrlimit(RLIMIT_FSIZE, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_FSIZE, %" PRIu64 ")", nsjconf->rl_fsize);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_nofile;
|
|
if (util::setrlimit(RLIMIT_NOFILE, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_NOFILE, %" PRIu64 ")", nsjconf->rl_nofile);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_nproc;
|
|
if (util::setrlimit(RLIMIT_NPROC, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_NPROC, %" PRIu64 ")", nsjconf->rl_nproc);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_stack;
|
|
if (util::setrlimit(RLIMIT_STACK, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_STACK, %" PRIu64 ")", nsjconf->rl_stack);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_mlock;
|
|
if (util::setrlimit(RLIMIT_MEMLOCK, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_MEMLOCK, %" PRIu64 ")", nsjconf->rl_mlock);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_rtpr;
|
|
if (util::setrlimit(RLIMIT_RTPRIO, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_RTPRIO, %" PRIu64 ")", nsjconf->rl_rtpr);
|
|
return false;
|
|
}
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_msgq;
|
|
if (util::setrlimit(RLIMIT_MSGQUEUE, rl) == -1) {
|
|
PLOG_E("util::setrlimit(0, RLIMIT_MSGQUEUE , %" PRIu64 ")", nsjconf->rl_msgq);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool containPassFd(nsjconf_t* nsjconf, int fd) {
|
|
return (std::find(nsjconf->openfds.begin(), nsjconf->openfds.end(), fd) !=
|
|
nsjconf->openfds.end());
|
|
}
|
|
|
|
static bool containMakeFdsCOENaive(nsjconf_t* nsjconf) {
|
|
/*
|
|
* Don't use getrlimit(RLIMIT_NOFILE) here, as it can return an artifically small value
|
|
* (e.g. 32), which could be smaller than a maximum assigned number to file-descriptors
|
|
* in this process. Just use some reasonably sane value (e.g. 1024)
|
|
*/
|
|
for (unsigned fd = 0; fd < 1024; fd++) {
|
|
int flags = TEMP_FAILURE_RETRY(fcntl(fd, F_GETFD, 0));
|
|
if (flags == -1 && errno == EBADF) {
|
|
continue;
|
|
}
|
|
if (flags == -1) {
|
|
PLOG_E("Couldn't get flags for fd=%d", fd)
|
|
return false;
|
|
}
|
|
if (containPassFd(nsjconf, fd)) {
|
|
LOG_D("fd=%d will be passed to the child process", fd);
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags & ~(FD_CLOEXEC))) == -1) {
|
|
PLOG_E("Could not set FD_CLOEXEC for fd=%d", fd);
|
|
return false;
|
|
}
|
|
} else {
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) == -1) {
|
|
PLOG_E("Could not set FD_CLOEXEC for fd=%d", fd);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool containMakeFdsCOEProc(nsjconf_t* nsjconf) {
|
|
int dirfd = open("/proc/self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
|
if (dirfd == -1) {
|
|
PLOG_D("open('/proc/self/fd', O_DIRECTORY|O_RDONLY|O_CLOEXEC)");
|
|
return false;
|
|
}
|
|
DIR* dir = fdopendir(dirfd);
|
|
if (dir == NULL) {
|
|
PLOG_W("fdopendir(fd=%d)", dirfd);
|
|
close(dirfd);
|
|
return false;
|
|
}
|
|
/* Make all fds above stderr close-on-exec */
|
|
for (;;) {
|
|
errno = 0;
|
|
struct dirent* entry = readdir(dir);
|
|
if (entry == NULL && errno != 0) {
|
|
PLOG_D("readdir('/proc/self/fd')");
|
|
closedir(dir);
|
|
return false;
|
|
}
|
|
if (entry == NULL) {
|
|
break;
|
|
}
|
|
if (strcmp(".", entry->d_name) == 0) {
|
|
continue;
|
|
}
|
|
if (strcmp("..", entry->d_name) == 0) {
|
|
continue;
|
|
}
|
|
errno = 0;
|
|
int fd = strtoimax(entry->d_name, NULL, 10);
|
|
if (errno != 0) {
|
|
PLOG_W("Cannot convert /proc/self/fd/%s to a number", entry->d_name);
|
|
continue;
|
|
}
|
|
int flags = TEMP_FAILURE_RETRY(fcntl(fd, F_GETFD, 0));
|
|
if (flags == -1) {
|
|
PLOG_D("fcntl(fd=%d, F_GETFD, 0)", fd);
|
|
closedir(dir);
|
|
return false;
|
|
}
|
|
if (containPassFd(nsjconf, fd)) {
|
|
LOG_D("fd=%d will be passed to the child process", fd);
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags & ~(FD_CLOEXEC))) == -1) {
|
|
PLOG_E("Could not clear FD_CLOEXEC for fd=%d", fd);
|
|
closedir(dir);
|
|
return false;
|
|
}
|
|
} else {
|
|
LOG_D("fd=%d will be closed before execve()", fd);
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) == -1) {
|
|
PLOG_E("Could not set FD_CLOEXEC for fd=%d", fd);
|
|
closedir(dir);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
closedir(dir);
|
|
return true;
|
|
}
|
|
|
|
static bool containMakeFdsCOE(nsjconf_t* nsjconf) {
|
|
if (containMakeFdsCOEProc(nsjconf)) {
|
|
return true;
|
|
}
|
|
if (containMakeFdsCOENaive(nsjconf)) {
|
|
return true;
|
|
}
|
|
LOG_E("Couldn't mark relevant file-descriptors as close-on-exec with any known method");
|
|
return false;
|
|
}
|
|
|
|
bool setupFD(nsjconf_t* nsjconf, int fd_in, int fd_out, int fd_err) {
|
|
if (nsjconf->stderr_to_null) {
|
|
LOG_D("Redirecting fd=2 (STDERR_FILENO) to /dev/null");
|
|
if ((fd_err = TEMP_FAILURE_RETRY(open("/dev/null", O_RDWR))) == -1) {
|
|
PLOG_E("open('/dev/null', O_RDWR");
|
|
return false;
|
|
}
|
|
}
|
|
if (nsjconf->is_silent) {
|
|
LOG_D("Redirecting fd=0-2 (STDIN/OUT/ERR_FILENO) to /dev/null");
|
|
if (TEMP_FAILURE_RETRY(fd_in = fd_out = fd_err = open("/dev/null", O_RDWR)) == -1) {
|
|
PLOG_E("open('/dev/null', O_RDWR)");
|
|
return false;
|
|
}
|
|
}
|
|
/* Set stdin/stdout/stderr to the net */
|
|
if (fd_in != STDIN_FILENO && TEMP_FAILURE_RETRY(dup2(fd_in, STDIN_FILENO)) == -1) {
|
|
PLOG_E("dup2(%d, STDIN_FILENO)", fd_in);
|
|
return false;
|
|
}
|
|
if (fd_out != STDOUT_FILENO && TEMP_FAILURE_RETRY(dup2(fd_out, STDOUT_FILENO)) == -1) {
|
|
PLOG_E("dup2(%d, STDOUT_FILENO)", fd_out);
|
|
return false;
|
|
}
|
|
if (fd_err != STDERR_FILENO && TEMP_FAILURE_RETRY(dup2(fd_err, STDERR_FILENO)) == -1) {
|
|
PLOG_E("dup2(%d, STDERR_FILENO)", fd_err);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool containProc(nsjconf_t* nsjconf) {
|
|
RETURN_ON_FAILURE(containUserNs(nsjconf));
|
|
RETURN_ON_FAILURE(containInitPidNs(nsjconf));
|
|
RETURN_ON_FAILURE(containInitMountNs(nsjconf));
|
|
RETURN_ON_FAILURE(containInitNetNs(nsjconf));
|
|
RETURN_ON_FAILURE(containInitUtsNs(nsjconf));
|
|
RETURN_ON_FAILURE(containInitCgroupNs());
|
|
RETURN_ON_FAILURE(containDropPrivs(nsjconf));
|
|
;
|
|
/* */
|
|
/* As non-root */
|
|
RETURN_ON_FAILURE(containCPU(nsjconf));
|
|
RETURN_ON_FAILURE(containTSC(nsjconf));
|
|
RETURN_ON_FAILURE(containSetLimits(nsjconf));
|
|
RETURN_ON_FAILURE(containPrepareEnv(nsjconf));
|
|
RETURN_ON_FAILURE(containMakeFdsCOE(nsjconf));
|
|
|
|
return true;
|
|
}
|
|
|
|
} // namespace contain
|