2015-05-15 05:44:48 +08:00
|
|
|
/*
|
|
|
|
|
|
|
|
nsjail - isolating the binary
|
|
|
|
-----------------------------------------
|
|
|
|
|
|
|
|
Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
|
|
|
|
*/
|
2016-03-02 00:03:11 +08:00
|
|
|
|
2015-05-15 05:44:48 +08:00
|
|
|
#include "contain.h"
|
|
|
|
|
|
|
|
#include <dirent.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
2017-10-18 20:46:17 +08:00
|
|
|
#include <inttypes.h>
|
2015-05-15 05:44:48 +08:00
|
|
|
#include <signal.h>
|
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stddef.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <sys/personality.h>
|
|
|
|
#include <sys/prctl.h>
|
|
|
|
#include <sys/resource.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2018-02-10 05:47:00 +08:00
|
|
|
#include <algorithm>
|
|
|
|
|
2018-02-10 00:49:13 +08:00
|
|
|
#include "caps.h"
|
2018-02-10 01:13:17 +08:00
|
|
|
#include "cgroup.h"
|
2018-02-10 00:41:16 +08:00
|
|
|
#include "cpu.h"
|
2018-02-10 01:55:42 +08:00
|
|
|
#include "log.h"
|
2018-02-10 01:26:16 +08:00
|
|
|
#include "mnt.h"
|
2018-02-10 00:27:28 +08:00
|
|
|
#include "net.h"
|
2018-02-10 00:57:19 +08:00
|
|
|
#include "pid.h"
|
2018-02-10 01:08:11 +08:00
|
|
|
#include "user.h"
|
2018-02-10 00:59:51 +08:00
|
|
|
#include "uts.h"
|
2018-02-10 00:27:28 +08:00
|
|
|
|
2018-02-10 00:09:58 +08:00
|
|
|
namespace contain {
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containUserNs(nsjconf_t* nsjconf) { return user::initNsFromChild(nsjconf); }
|
2017-02-08 01:31:50 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containInitPidNs(nsjconf_t* nsjconf) { return pid::initNs(nsjconf); }
|
2016-05-13 23:07:44 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containInitNetNs(nsjconf_t* nsjconf) { return net::initNsFromChild(nsjconf); }
|
2016-02-29 07:14:36 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containInitUtsNs(nsjconf_t* nsjconf) { return uts::initNs(nsjconf); }
|
2015-08-16 02:48:48 +08:00
|
|
|
|
2018-02-10 01:13:17 +08:00
|
|
|
static bool containInitCgroupNs(void) { return cgroup::initNs(); }
|
2016-06-19 18:47:28 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containDropPrivs(nsjconf_t* nsjconf) {
|
2015-05-15 05:44:48 +08:00
|
|
|
#ifndef PR_SET_NO_NEW_PRIVS
|
|
|
|
#define PR_SET_NO_NEW_PRIVS 38
|
|
|
|
#endif
|
2016-09-25 21:56:28 +08:00
|
|
|
if (nsjconf->disable_no_new_privs == false) {
|
2017-10-01 11:32:07 +08:00
|
|
|
if (prctl(PR_SET_NO_NEW_PRIVS, 1UL, 0UL, 0UL, 0UL) == -1) {
|
2016-09-25 21:56:28 +08:00
|
|
|
/* Only new kernels support it */
|
|
|
|
PLOG_W("prctl(PR_SET_NO_NEW_PRIVS, 1)");
|
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
2017-10-01 11:16:01 +08:00
|
|
|
|
2018-02-10 00:49:13 +08:00
|
|
|
if (caps::initNs(nsjconf) == false) {
|
2017-01-21 07:15:03 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-05-15 05:44:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containPrepareEnv(nsjconf_t* nsjconf) {
|
2015-05-15 05:44:48 +08:00
|
|
|
if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0) == -1) {
|
|
|
|
PLOG_E("prctl(PR_SET_PDEATHSIG, SIGKILL)");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (nsjconf->personality && personality(nsjconf->personality) == -1) {
|
|
|
|
PLOG_E("personality(%lx)", nsjconf->personality);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
errno = 0;
|
|
|
|
if (setpriority(PRIO_PROCESS, 0, 19) == -1 && errno != 0) {
|
|
|
|
PLOG_W("setpriority(19)");
|
|
|
|
}
|
2016-01-26 01:09:32 +08:00
|
|
|
if (nsjconf->skip_setsid == false) {
|
|
|
|
setsid();
|
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containInitMountNs(nsjconf_t* nsjconf) { return mnt::initNs(nsjconf); }
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containCPU(nsjconf_t* nsjconf) { return cpu::initCpu(nsjconf); }
|
2017-06-19 23:01:50 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containSetLimits(nsjconf_t* nsjconf) {
|
2015-10-14 01:06:49 +08:00
|
|
|
struct rlimit64 rl;
|
2015-05-15 05:44:48 +08:00
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_as;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_AS, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_AS, %" PRIu64 ")", nsjconf->rl_as);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_core;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_CORE, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_CORE, %" PRIu64 ")", nsjconf->rl_core);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_cpu;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_CPU, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_CPU, %" PRIu64 ")", nsjconf->rl_cpu);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_fsize;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_FSIZE, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_FSIZE, %" PRIu64 ")", nsjconf->rl_fsize);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_nofile;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_NOFILE, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_NOFILE, %" PRIu64 ")", nsjconf->rl_nofile);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_nproc;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_NPROC, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_NPROC, %" PRIu64 ")", nsjconf->rl_nproc);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
rl.rlim_cur = rl.rlim_max = nsjconf->rl_stack;
|
2017-09-29 20:32:39 +08:00
|
|
|
if (setrlimit64(RLIMIT_STACK, &rl) == -1) {
|
|
|
|
PLOG_E("setrlimit64(0, RLIMIT_STACK, %" PRIu64 ")", nsjconf->rl_stack);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containPassFd(nsjconf_t* nsjconf, int fd) {
|
2018-02-10 05:47:00 +08:00
|
|
|
return (std::find(nsjconf->openfds.begin(), nsjconf->openfds.end(), fd) !=
|
|
|
|
nsjconf->openfds.end());
|
2016-06-18 06:46:57 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containMakeFdsCOENaive(nsjconf_t* nsjconf) {
|
2017-02-12 03:33:54 +08:00
|
|
|
/*
|
|
|
|
* Don't use getrlimit(RLIMIT_NOFILE) here, as it can return an artifically small value
|
|
|
|
* (e.g. 32), which could be smaller than a maximum assigned number to file-descriptors
|
|
|
|
* in this process. Just use some reasonably sane value (e.g. 1024)
|
|
|
|
*/
|
2016-06-18 06:46:57 +08:00
|
|
|
for (unsigned fd = 0; fd < 1024; fd++) {
|
2016-05-10 05:45:56 +08:00
|
|
|
int flags = TEMP_FAILURE_RETRY(fcntl(fd, F_GETFD, 0));
|
2015-08-16 02:48:48 +08:00
|
|
|
if (flags == -1) {
|
|
|
|
continue;
|
|
|
|
}
|
2016-06-18 06:46:57 +08:00
|
|
|
if (containPassFd(nsjconf, fd)) {
|
2016-06-18 17:08:35 +08:00
|
|
|
LOG_D("FD=%d will be passed to the child process", fd);
|
2016-06-21 04:59:29 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags & ~(FD_CLOEXEC))) == -1) {
|
|
|
|
PLOG_E("Could not set FD_CLOEXEC for FD=%d", fd);
|
|
|
|
return false;
|
|
|
|
}
|
2016-06-18 06:46:57 +08:00
|
|
|
} else {
|
2016-06-21 04:59:29 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) == -1) {
|
|
|
|
PLOG_E("Could not set FD_CLOEXEC for FD=%d", fd);
|
|
|
|
return false;
|
|
|
|
}
|
2016-06-18 06:46:57 +08:00
|
|
|
}
|
2015-08-16 02:48:48 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containMakeFdsCOEProc(nsjconf_t* nsjconf) {
|
2017-06-11 07:34:20 +08:00
|
|
|
int dirfd = open("/proc/self/fd", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
|
|
|
|
if (dirfd == -1) {
|
2018-02-08 22:23:15 +08:00
|
|
|
PLOG_D("open('/proc/self/fd', O_DIRECTORY|O_RDONLY|O_CLOEXEC)");
|
2017-06-11 07:34:20 +08:00
|
|
|
return false;
|
|
|
|
}
|
2017-10-09 05:00:45 +08:00
|
|
|
DIR* dir = fdopendir(dirfd);
|
2015-05-15 05:44:48 +08:00
|
|
|
if (dir == NULL) {
|
2017-06-09 20:40:44 +08:00
|
|
|
PLOG_W("fdopendir(fd=%d)", dirfd);
|
2017-06-11 07:34:20 +08:00
|
|
|
close(dirfd);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
2017-06-09 20:40:44 +08:00
|
|
|
/* Make all fds above stderr close-on-exec */
|
2015-05-15 05:44:48 +08:00
|
|
|
for (;;) {
|
|
|
|
errno = 0;
|
2017-10-09 05:00:45 +08:00
|
|
|
struct dirent* entry = readdir(dir);
|
2015-05-15 05:44:48 +08:00
|
|
|
if (entry == NULL && errno != 0) {
|
2015-08-16 02:48:48 +08:00
|
|
|
PLOG_D("readdir('/proc/self/fd')");
|
2016-07-29 21:38:22 +08:00
|
|
|
closedir(dir);
|
2015-05-15 05:44:48 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (entry == NULL) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (strcmp(".", entry->d_name) == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (strcmp("..", entry->d_name) == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
int fd = strtoul(entry->d_name, NULL, 10);
|
|
|
|
if (errno == EINVAL) {
|
|
|
|
LOG_W("Cannot convert /proc/self/fd/%s to a number", entry->d_name);
|
|
|
|
continue;
|
|
|
|
}
|
2016-06-18 06:46:57 +08:00
|
|
|
int flags = TEMP_FAILURE_RETRY(fcntl(fd, F_GETFD, 0));
|
|
|
|
if (flags == -1) {
|
|
|
|
PLOG_D("fcntl(fd, F_GETFD, 0)");
|
2016-07-29 21:38:22 +08:00
|
|
|
closedir(dir);
|
2016-06-18 06:46:57 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (containPassFd(nsjconf, fd)) {
|
2016-06-18 17:08:35 +08:00
|
|
|
LOG_D("FD=%d will be passed to the child process", fd);
|
2016-06-21 04:59:29 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags & ~(FD_CLOEXEC))) == -1) {
|
|
|
|
PLOG_E("Could not clear FD_CLOEXEC for FD=%d", fd);
|
2016-07-29 21:38:22 +08:00
|
|
|
closedir(dir);
|
2016-06-21 04:59:29 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-06-18 06:46:57 +08:00
|
|
|
} else {
|
2016-06-18 17:08:35 +08:00
|
|
|
LOG_D("FD=%d will be closed before execve()", fd);
|
2016-06-21 04:59:29 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(fcntl(fd, F_SETFD, flags | FD_CLOEXEC)) == -1) {
|
|
|
|
PLOG_E("Could not set FD_CLOEXEC for FD=%d", fd);
|
2016-07-29 21:38:22 +08:00
|
|
|
closedir(dir);
|
2016-06-21 04:59:29 +08:00
|
|
|
return false;
|
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
}
|
2016-07-29 21:38:22 +08:00
|
|
|
closedir(dir);
|
2015-05-15 05:44:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool containMakeFdsCOE(nsjconf_t* nsjconf) {
|
2017-10-18 21:41:16 +08:00
|
|
|
if (containMakeFdsCOEProc(nsjconf)) {
|
2015-08-16 02:48:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
2017-10-18 21:41:16 +08:00
|
|
|
if (containMakeFdsCOENaive(nsjconf)) {
|
2015-08-16 02:48:48 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
LOG_E("Couldn't mark relevant file-descriptors as close-on-exec with any known method");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
bool setupFD(nsjconf_t* nsjconf, int fd_in, int fd_out, int fd_err) {
|
2015-05-15 05:44:48 +08:00
|
|
|
if (nsjconf->mode != MODE_LISTEN_TCP) {
|
|
|
|
if (nsjconf->is_silent == false) {
|
|
|
|
return true;
|
|
|
|
}
|
2016-05-09 21:16:26 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(fd_in = fd_out = fd_err = open("/dev/null", O_RDWR)) == -1) {
|
2015-05-15 05:44:48 +08:00
|
|
|
PLOG_E("open('/dev/null', O_RDWR)");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Set stdin/stdout/stderr to the net */
|
2016-05-09 21:16:26 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(dup2(fd_in, STDIN_FILENO)) == -1) {
|
2015-05-15 05:44:48 +08:00
|
|
|
PLOG_E("dup2(%d, STDIN_FILENO)", fd_in);
|
|
|
|
return false;
|
|
|
|
}
|
2016-05-09 21:16:26 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(dup2(fd_out, STDOUT_FILENO)) == -1) {
|
2015-05-15 05:44:48 +08:00
|
|
|
PLOG_E("dup2(%d, STDOUT_FILENO)", fd_out);
|
|
|
|
return false;
|
|
|
|
}
|
2016-05-09 21:16:26 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(dup2(fd_err, STDERR_FILENO)) == -1) {
|
2015-05-15 05:44:48 +08:00
|
|
|
PLOG_E("dup2(%d, STDERR_FILENO)", fd_err);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2016-03-08 22:57:09 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
bool containProc(nsjconf_t* nsjconf) {
|
2017-02-08 01:31:50 +08:00
|
|
|
if (containUserNs(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-05-13 23:07:44 +08:00
|
|
|
if (containInitPidNs(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-03-08 22:57:09 +08:00
|
|
|
if (containInitMountNs(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (containInitNetNs(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (containInitUtsNs(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-06-19 21:50:25 +08:00
|
|
|
if (containInitCgroupNs() == false) {
|
2016-06-19 18:47:28 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-03-08 22:57:09 +08:00
|
|
|
if (containDropPrivs(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
/* */
|
|
|
|
/* As non-root */
|
2017-06-22 01:18:02 +08:00
|
|
|
if (containCPU(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-03-08 22:57:09 +08:00
|
|
|
if (containSetLimits(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (containPrepareEnv(nsjconf) == false) {
|
|
|
|
return false;
|
|
|
|
}
|
2016-06-18 06:46:57 +08:00
|
|
|
if (containMakeFdsCOE(nsjconf) == false) {
|
2016-03-08 22:57:09 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2018-02-10 00:09:58 +08:00
|
|
|
|
|
|
|
} // namespace contain
|