2015-05-15 05:44:48 +08:00
|
|
|
/*
|
|
|
|
|
|
|
|
nsjail - subprocess management
|
|
|
|
-----------------------------------------
|
|
|
|
|
|
|
|
Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "subproc.h"
|
|
|
|
|
|
|
|
#include <errno.h>
|
2015-05-15 22:02:15 +08:00
|
|
|
#include <fcntl.h>
|
2017-10-18 20:27:34 +08:00
|
|
|
#include <limits.h>
|
2016-07-21 21:48:47 +08:00
|
|
|
#include <linux/sched.h>
|
2015-05-15 05:44:48 +08:00
|
|
|
#include <sched.h>
|
2016-10-15 08:42:01 +08:00
|
|
|
#include <setjmp.h>
|
2015-05-15 05:44:48 +08:00
|
|
|
#include <signal.h>
|
2017-10-18 20:46:17 +08:00
|
|
|
#include <stdbool.h>
|
|
|
|
#include <stddef.h>
|
2015-05-15 05:44:48 +08:00
|
|
|
#include <stdint.h>
|
2017-09-14 04:03:21 +08:00
|
|
|
#include <stdio.h>
|
2015-05-15 05:44:48 +08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2017-10-18 20:46:17 +08:00
|
|
|
#include <sys/socket.h>
|
2017-10-18 23:57:52 +08:00
|
|
|
#include <sys/syscall.h>
|
2015-05-15 05:44:48 +08:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/wait.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
|
2018-02-11 04:19:47 +08:00
|
|
|
#include <string>
|
2018-02-11 11:02:14 +08:00
|
|
|
#include <vector>
|
2018-02-11 04:19:47 +08:00
|
|
|
|
2018-02-10 01:13:17 +08:00
|
|
|
#include "cgroup.h"
|
2018-02-10 00:09:58 +08:00
|
|
|
#include "contain.h"
|
2018-02-11 00:49:15 +08:00
|
|
|
#include "logs.h"
|
2018-02-10 12:25:55 +08:00
|
|
|
#include "macros.h"
|
2018-02-10 00:27:28 +08:00
|
|
|
#include "net.h"
|
2018-02-10 00:16:41 +08:00
|
|
|
#include "sandbox.h"
|
2018-02-10 01:08:11 +08:00
|
|
|
#include "user.h"
|
2018-02-10 01:45:50 +08:00
|
|
|
#include "util.h"
|
2018-02-10 00:09:58 +08:00
|
|
|
|
2018-02-10 01:45:50 +08:00
|
|
|
namespace subproc {
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2017-05-22 01:44:54 +08:00
|
|
|
#if !defined(CLONE_NEWCGROUP)
|
|
|
|
#define CLONE_NEWCGROUP 0x02000000
|
2017-10-09 05:00:45 +08:00
|
|
|
#endif /* !defined(CLONE_NEWCGROUP) */
|
2017-05-22 03:35:02 +08:00
|
|
|
|
2018-02-11 04:19:47 +08:00
|
|
|
static const std::string cloneFlagsToStr(uintptr_t flags) {
|
|
|
|
std::string res;
|
2017-05-22 01:44:54 +08:00
|
|
|
|
2018-05-22 20:27:18 +08:00
|
|
|
struct {
|
2017-10-09 05:03:02 +08:00
|
|
|
const uintptr_t flag;
|
|
|
|
const char* const name;
|
2018-05-22 20:27:18 +08:00
|
|
|
} static const cloneFlags[] = {
|
2017-10-26 06:26:02 +08:00
|
|
|
NS_VALSTR_STRUCT(CLONE_VM),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_FS),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_FILES),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_SIGHAND),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_PTRACE),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_VFORK),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_PARENT),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_THREAD),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWNS),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_SYSVSEM),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_SETTLS),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_PARENT_SETTID),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_CHILD_CLEARTID),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_DETACHED),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_UNTRACED),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_CHILD_SETTID),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWCGROUP),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWUTS),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWIPC),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWUSER),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWPID),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_NEWNET),
|
|
|
|
NS_VALSTR_STRUCT(CLONE_IO),
|
2017-10-09 05:03:02 +08:00
|
|
|
};
|
2017-05-22 01:44:54 +08:00
|
|
|
|
2018-02-11 04:19:47 +08:00
|
|
|
uintptr_t knownFlagMask = CSIGNAL;
|
2018-05-22 20:27:18 +08:00
|
|
|
for (const auto& i : cloneFlags) {
|
|
|
|
if (flags & i.flag) {
|
|
|
|
res.append(i.name).append("|");
|
2017-05-22 01:44:54 +08:00
|
|
|
}
|
2018-05-22 20:27:18 +08:00
|
|
|
knownFlagMask |= i.flag;
|
2017-05-22 01:44:54 +08:00
|
|
|
}
|
2018-02-11 04:19:47 +08:00
|
|
|
|
2017-05-22 03:35:02 +08:00
|
|
|
if (flags & ~(knownFlagMask)) {
|
2018-05-24 00:19:17 +08:00
|
|
|
util::StrAppend(&res, "%#tx|", flags & ~(knownFlagMask));
|
2017-05-22 03:35:02 +08:00
|
|
|
}
|
2018-02-11 04:19:47 +08:00
|
|
|
res.append(util::sigName(flags & CSIGNAL).c_str());
|
|
|
|
return res;
|
2017-05-22 01:44:54 +08:00
|
|
|
}
|
|
|
|
|
2017-10-18 18:33:24 +08:00
|
|
|
/* Reset the execution environment for the new process */
|
2018-02-10 00:03:02 +08:00
|
|
|
static bool resetEnv(void) {
|
2017-10-20 20:43:56 +08:00
|
|
|
/* Set all previously changed signals to their default behavior */
|
2018-04-29 07:10:09 +08:00
|
|
|
for (const auto& sig : nssigs) {
|
|
|
|
if (signal(sig, SIG_DFL) == SIG_ERR) {
|
|
|
|
PLOG_W("signal(%s, SIG_DFL)", util::sigName(sig).c_str());
|
2017-10-18 18:33:24 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2017-10-20 20:43:56 +08:00
|
|
|
/* Unblock all signals */
|
|
|
|
sigset_t sset;
|
2017-10-20 21:56:32 +08:00
|
|
|
sigemptyset(&sset);
|
|
|
|
if (sigprocmask(SIG_SETMASK, &sset, NULL) == -1) {
|
|
|
|
PLOG_W("sigprocmask(SIG_SET, empty)");
|
2017-10-20 20:43:56 +08:00
|
|
|
return false;
|
|
|
|
}
|
2017-10-18 18:33:24 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 00:03:02 +08:00
|
|
|
static const char kSubprocDoneChar = 'D';
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static int subprocNewProc(nsjconf_t* nsjconf, int fd_in, int fd_out, int fd_err, int pipefd) {
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!contain::setupFD(nsjconf, fd_in, fd_out, fd_err)) {
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2015-08-16 02:48:48 +08:00
|
|
|
}
|
2018-02-10 00:03:02 +08:00
|
|
|
if (!resetEnv()) {
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2017-10-18 18:33:24 +08:00
|
|
|
}
|
|
|
|
|
2016-05-05 11:44:12 +08:00
|
|
|
if (pipefd == -1) {
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!user::initNsFromParent(nsjconf, getpid())) {
|
2016-06-19 21:50:25 +08:00
|
|
|
LOG_E("Couldn't initialize net user namespace");
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2016-05-05 11:44:12 +08:00
|
|
|
}
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!cgroup::initNsFromParent(nsjconf, getpid())) {
|
2016-06-20 01:36:56 +08:00
|
|
|
LOG_E("Couldn't initialize net user namespace");
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2016-06-20 01:36:56 +08:00
|
|
|
}
|
2016-05-05 11:44:12 +08:00
|
|
|
} else {
|
|
|
|
char doneChar;
|
2018-02-10 01:45:50 +08:00
|
|
|
if (util::readFromFd(pipefd, &doneChar, sizeof(doneChar)) != sizeof(doneChar)) {
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2016-05-05 11:44:12 +08:00
|
|
|
}
|
2018-02-01 21:19:01 +08:00
|
|
|
if (doneChar != kSubprocDoneChar) {
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2016-05-05 11:44:12 +08:00
|
|
|
}
|
2016-02-28 09:34:43 +08:00
|
|
|
}
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!contain::containProc(nsjconf)) {
|
2017-10-20 20:43:56 +08:00
|
|
|
_exit(0xff);
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!nsjconf->keep_env) {
|
2016-01-27 00:42:10 +08:00
|
|
|
clearenv();
|
|
|
|
}
|
2018-02-10 06:04:57 +08:00
|
|
|
for (const auto& env : nsjconf->envs) {
|
|
|
|
putenv(const_cast<char*>(env.c_str()));
|
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2018-02-11 07:17:44 +08:00
|
|
|
auto connstr = net::connToText(fd_in, /* remote= */ true, NULL);
|
2018-02-12 23:52:05 +08:00
|
|
|
LOG_I("Executing '%s' for '%s'", nsjconf->exec_file.c_str(), connstr.c_str());
|
2017-02-12 03:33:54 +08:00
|
|
|
|
2018-02-12 23:52:05 +08:00
|
|
|
std::vector<const char*> argv;
|
|
|
|
for (const auto& s : nsjconf->argv) {
|
|
|
|
argv.push_back(s.c_str());
|
|
|
|
LOG_D(" Arg: '%s'", s.c_str());
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
2018-02-12 23:52:05 +08:00
|
|
|
argv.push_back(nullptr);
|
2016-03-08 22:57:09 +08:00
|
|
|
|
|
|
|
/* Should be the last one in the sequence */
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!sandbox::applyPolicy(nsjconf)) {
|
2017-09-25 13:08:22 +08:00
|
|
|
exit(0xff);
|
2016-03-08 22:57:09 +08:00
|
|
|
}
|
2017-10-18 23:57:52 +08:00
|
|
|
|
|
|
|
if (nsjconf->use_execveat) {
|
|
|
|
#if defined(__NR_execveat)
|
2018-02-12 23:52:05 +08:00
|
|
|
syscall(__NR_execveat, (uintptr_t)nsjconf->exec_fd, "", (char* const*)argv.data(),
|
|
|
|
environ, (uintptr_t)AT_EMPTY_PATH);
|
2017-10-26 06:26:02 +08:00
|
|
|
#else /* defined(__NR_execveat) */
|
2017-10-18 23:57:52 +08:00
|
|
|
LOG_F("Your system doesn't support execveat() syscall");
|
|
|
|
#endif /* defined(__NR_execveat) */
|
|
|
|
} else {
|
2018-02-12 23:52:05 +08:00
|
|
|
execv(nsjconf->exec_file.c_str(), (char* const*)argv.data());
|
2017-10-18 23:57:52 +08:00
|
|
|
}
|
2015-05-15 22:02:15 +08:00
|
|
|
|
2018-02-12 23:52:05 +08:00
|
|
|
PLOG_E("execve('%s') failed", nsjconf->exec_file.c_str());
|
2015-05-15 22:02:15 +08:00
|
|
|
|
2017-09-25 13:08:22 +08:00
|
|
|
_exit(0xff);
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static void addProc(nsjconf_t* nsjconf, pid_t pid, int sock) {
|
|
|
|
pids_t p;
|
2018-02-11 07:17:44 +08:00
|
|
|
|
2018-02-10 12:13:25 +08:00
|
|
|
p.pid = pid;
|
|
|
|
p.start = time(NULL);
|
2018-02-11 07:17:44 +08:00
|
|
|
p.remote_txt = net::connToText(sock, /* remote= */ true, &p.remote_addr);
|
2016-05-08 09:09:43 +08:00
|
|
|
|
|
|
|
char fname[PATH_MAX];
|
|
|
|
snprintf(fname, sizeof(fname), "/proc/%d/syscall", (int)pid);
|
2018-02-10 12:13:25 +08:00
|
|
|
p.pid_syscall_fd = TEMP_FAILURE_RETRY(open(fname, O_RDONLY | O_CLOEXEC));
|
2016-05-08 09:09:43 +08:00
|
|
|
|
2018-02-10 12:13:25 +08:00
|
|
|
nsjconf->pids.push_back(p);
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2018-02-10 12:13:25 +08:00
|
|
|
LOG_D("Added pid '%d' with start time '%u' to the queue for IP: '%s'", p.pid,
|
2018-02-11 07:17:44 +08:00
|
|
|
(unsigned int)p.start, p.remote_txt.c_str());
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static void removeProc(nsjconf_t* nsjconf, pid_t pid) {
|
2018-02-10 12:13:25 +08:00
|
|
|
for (auto p = nsjconf->pids.begin(); p != nsjconf->pids.end(); ++p) {
|
2015-05-15 05:44:48 +08:00
|
|
|
if (p->pid == pid) {
|
2017-06-22 00:46:19 +08:00
|
|
|
LOG_D("Removing pid '%d' from the queue (IP:'%s', start time:'%s')", p->pid,
|
2018-02-11 07:17:44 +08:00
|
|
|
p->remote_txt.c_str(), util::timeToStr(p->start).c_str());
|
2016-07-29 21:38:22 +08:00
|
|
|
close(p->pid_syscall_fd);
|
2018-02-10 12:13:25 +08:00
|
|
|
nsjconf->pids.erase(p);
|
2018-04-29 07:10:09 +08:00
|
|
|
|
2015-05-15 05:44:48 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
LOG_W("PID: %d not found (?)", pid);
|
|
|
|
}
|
|
|
|
|
2018-02-11 03:32:04 +08:00
|
|
|
int countProc(nsjconf_t* nsjconf) {
|
|
|
|
return nsjconf->pids.size();
|
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
void displayProc(nsjconf_t* nsjconf) {
|
2018-02-10 00:03:02 +08:00
|
|
|
LOG_I("Total number of spawned namespaces: %d", countProc(nsjconf));
|
2015-05-15 05:44:48 +08:00
|
|
|
time_t now = time(NULL);
|
2018-02-10 12:13:25 +08:00
|
|
|
for (const auto& pid : nsjconf->pids) {
|
|
|
|
time_t diff = now - pid.start;
|
2018-05-26 19:54:17 +08:00
|
|
|
uint64_t left = nsjconf->tlimit ? nsjconf->tlimit - (uint64_t)diff : 0;
|
2018-05-28 07:40:02 +08:00
|
|
|
LOG_I("PID: %d, Remote host: %s, Run time: %ld sec. (time left: %" PRId64 " sec.)",
|
|
|
|
pid.pid, pid.remote_txt.c_str(), (long)diff, left);
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static const pids_t* getPidElem(nsjconf_t* nsjconf, pid_t pid) {
|
2018-02-10 12:13:25 +08:00
|
|
|
for (const auto& p : nsjconf->pids) {
|
|
|
|
if (p.pid == pid) {
|
|
|
|
return &p;
|
2016-05-08 09:09:43 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static void seccompViolation(nsjconf_t* nsjconf, siginfo_t* si) {
|
2017-10-08 18:00:19 +08:00
|
|
|
LOG_W("PID: %d commited a syscall/seccomp violation and exited with SIGSYS", si->si_pid);
|
2016-05-08 09:09:43 +08:00
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
const pids_t* p = getPidElem(nsjconf, si->si_pid);
|
2016-05-08 09:09:43 +08:00
|
|
|
if (p == NULL) {
|
2017-11-01 21:21:50 +08:00
|
|
|
LOG_W("PID:%d SiSyscall: %d, SiCode: %d, SiErrno: %d", (int)si->si_pid,
|
|
|
|
si->si_syscall, si->si_code, si->si_errno);
|
2016-05-08 09:09:43 +08:00
|
|
|
LOG_E("Couldn't find pid element in the subproc list for PID: %d", (int)si->si_pid);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
char buf[4096];
|
2018-02-10 01:45:50 +08:00
|
|
|
ssize_t rdsize = util::readFromFd(p->pid_syscall_fd, buf, sizeof(buf) - 1);
|
2016-05-08 09:09:43 +08:00
|
|
|
if (rdsize < 1) {
|
2017-11-01 21:21:50 +08:00
|
|
|
LOG_W("PID: %d, SiSyscall: %d, SiCode: %d, SiErrno: %d", (int)si->si_pid,
|
|
|
|
si->si_syscall, si->si_code, si->si_errno);
|
2016-05-08 09:09:43 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
buf[rdsize - 1] = '\0';
|
|
|
|
|
2017-01-19 05:32:27 +08:00
|
|
|
uintptr_t arg1, arg2, arg3, arg4, arg5, arg6, sp, pc;
|
|
|
|
ptrdiff_t sc;
|
2017-10-17 21:22:23 +08:00
|
|
|
int ret = sscanf(buf, "%td %tx %tx %tx %tx %tx %tx %tx %tx", &sc, &arg1, &arg2, &arg3,
|
|
|
|
&arg4, &arg5, &arg6, &sp, &pc);
|
2017-01-19 05:32:27 +08:00
|
|
|
if (ret == 9) {
|
2017-10-17 21:22:23 +08:00
|
|
|
LOG_W(
|
|
|
|
"PID: %d, Syscall number: %td, Arguments: %#tx, %#tx, %#tx, %#tx, %#tx, %#tx, "
|
|
|
|
"SP: %#tx, PC: %#tx, si_syscall: %d, si_errno: %#x",
|
|
|
|
(int)si->si_pid, sc, arg1, arg2, arg3, arg4, arg5, arg6, sp, pc, si->si_syscall,
|
|
|
|
si->si_errno);
|
2017-01-19 05:32:27 +08:00
|
|
|
} else if (ret == 3) {
|
2017-11-01 21:21:50 +08:00
|
|
|
LOG_W("PID: %d, SiSyscall: %d, SiCode: %d, SiErrno: %d, SP: %#tx, PC: %#tx",
|
|
|
|
(int)si->si_pid, si->si_syscall, si->si_code, si->si_errno, arg1, arg2);
|
2017-01-19 05:32:27 +08:00
|
|
|
} else {
|
2017-11-01 21:21:50 +08:00
|
|
|
LOG_W("PID: %d, SiSyscall: %d, SiCode: %d, SiErrno: %d, Syscall string '%s'",
|
|
|
|
(int)si->si_pid, si->si_syscall, si->si_code, si->si_errno, buf);
|
2016-05-08 09:36:16 +08:00
|
|
|
}
|
2016-05-05 07:58:26 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
int reapProc(nsjconf_t* nsjconf) {
|
2015-05-15 05:44:48 +08:00
|
|
|
int status;
|
2015-07-08 00:33:10 +08:00
|
|
|
int rv = 0;
|
2016-05-05 07:58:26 +08:00
|
|
|
siginfo_t si;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
si.si_pid = 0;
|
|
|
|
if (waitid(P_ALL, 0, &si, WNOHANG | WNOWAIT | WEXITED) == -1) {
|
|
|
|
break;
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
2016-05-05 07:58:26 +08:00
|
|
|
if (si.si_pid == 0) {
|
|
|
|
break;
|
|
|
|
}
|
2016-05-05 11:04:01 +08:00
|
|
|
if (si.si_code == CLD_KILLED && si.si_status == SIGSYS) {
|
2018-02-10 00:03:02 +08:00
|
|
|
seccompViolation(nsjconf, &si);
|
2016-05-05 07:58:26 +08:00
|
|
|
}
|
|
|
|
|
2016-05-05 11:07:21 +08:00
|
|
|
if (wait4(si.si_pid, &status, WNOHANG, NULL) == si.si_pid) {
|
2018-02-10 01:13:17 +08:00
|
|
|
cgroup::finishFromParent(nsjconf, si.si_pid);
|
2017-06-20 00:53:29 +08:00
|
|
|
|
2018-02-11 07:17:44 +08:00
|
|
|
std::string remote_txt = "[UNKNOWN]";
|
2018-02-10 22:50:12 +08:00
|
|
|
const pids_t* elem = getPidElem(nsjconf, si.si_pid);
|
2017-06-20 00:53:29 +08:00
|
|
|
if (elem) {
|
|
|
|
remote_txt = elem->remote_txt;
|
|
|
|
}
|
|
|
|
|
2016-05-05 07:58:26 +08:00
|
|
|
if (WIFEXITED(status)) {
|
2017-06-20 00:53:29 +08:00
|
|
|
LOG_I("PID: %d (%s) exited with status: %d, (PIDs left: %d)",
|
2018-02-11 07:17:44 +08:00
|
|
|
si.si_pid, remote_txt.c_str(), WEXITSTATUS(status),
|
2018-02-10 00:03:02 +08:00
|
|
|
countProc(nsjconf) - 1);
|
|
|
|
removeProc(nsjconf, si.si_pid);
|
2018-06-07 20:59:12 +08:00
|
|
|
rv = WEXITSTATUS(status);
|
2016-05-05 07:58:26 +08:00
|
|
|
}
|
|
|
|
if (WIFSIGNALED(status)) {
|
2017-10-17 21:22:23 +08:00
|
|
|
LOG_I(
|
|
|
|
"PID: %d (%s) terminated with signal: %s (%d), (PIDs left: %d)",
|
2018-02-11 07:24:43 +08:00
|
|
|
si.si_pid, remote_txt.c_str(),
|
|
|
|
util::sigName(WTERMSIG(status)).c_str(), WTERMSIG(status),
|
|
|
|
countProc(nsjconf) - 1);
|
2018-02-10 00:03:02 +08:00
|
|
|
removeProc(nsjconf, si.si_pid);
|
2018-06-07 20:59:12 +08:00
|
|
|
rv = 128 + WTERMSIG(status);
|
2016-05-05 07:58:26 +08:00
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
time_t now = time(NULL);
|
2018-02-10 12:13:25 +08:00
|
|
|
for (const auto& p : nsjconf->pids) {
|
2015-05-15 05:44:48 +08:00
|
|
|
if (nsjconf->tlimit == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
2018-02-10 12:13:25 +08:00
|
|
|
pid_t pid = p.pid;
|
|
|
|
time_t diff = now - p.start;
|
2018-05-26 19:54:17 +08:00
|
|
|
if ((uint64_t)diff >= nsjconf->tlimit) {
|
2018-05-28 07:40:02 +08:00
|
|
|
LOG_I("PID: %d run time >= time limit (%ld >= %" PRId64
|
|
|
|
") (%s). Killing it",
|
|
|
|
pid, (long)diff, (long)nsjconf->tlimit, p.remote_txt.c_str());
|
2016-10-17 21:49:20 +08:00
|
|
|
/*
|
|
|
|
* Probably a kernel bug - some processes cannot be killed with KILL if
|
|
|
|
* they're namespaced, and in a stopped state
|
|
|
|
*/
|
2015-05-15 05:44:48 +08:00
|
|
|
kill(pid, SIGCONT);
|
|
|
|
PLOG_D("Sent SIGCONT to PID: %d", pid);
|
|
|
|
kill(pid, SIGKILL);
|
|
|
|
PLOG_D("Sent SIGKILL to PID: %d", pid);
|
|
|
|
}
|
|
|
|
}
|
2015-07-08 00:33:10 +08:00
|
|
|
return rv;
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
void killAll(nsjconf_t* nsjconf) {
|
2018-02-10 12:13:25 +08:00
|
|
|
for (const auto& p : nsjconf->pids) {
|
|
|
|
kill(p.pid, SIGKILL);
|
|
|
|
}
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool initParent(nsjconf_t* nsjconf, pid_t pid, int pipefd) {
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!net::initNsFromParent(nsjconf, pid)) {
|
2018-06-03 09:22:50 +08:00
|
|
|
LOG_E("Couldn't initialize net namespace for pid '%d'", pid);
|
2016-02-29 06:23:24 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!cgroup::initNsFromParent(nsjconf, pid)) {
|
2018-06-03 09:22:50 +08:00
|
|
|
LOG_E("Couldn't initialize cgroup user namespace for pid '%d'", pid);
|
2017-09-25 13:08:22 +08:00
|
|
|
exit(0xff);
|
2016-06-19 21:50:25 +08:00
|
|
|
}
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!user::initNsFromParent(nsjconf, pid)) {
|
2018-06-03 09:22:50 +08:00
|
|
|
LOG_E("Couldn't initialize user namespace for pid %d", pid);
|
2016-02-29 06:23:24 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-02-10 01:45:50 +08:00
|
|
|
if (util::writeToFd(pipefd, &kSubprocDoneChar, sizeof(kSubprocDoneChar)) !=
|
2018-02-01 21:19:01 +08:00
|
|
|
sizeof(kSubprocDoneChar)) {
|
2016-02-29 06:23:24 +08:00
|
|
|
LOG_E("Couldn't signal the new process via a socketpair");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
void runChild(nsjconf_t* nsjconf, int fd_in, int fd_out, int fd_err) {
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!net::limitConns(nsjconf, fd_in)) {
|
2015-05-15 05:44:48 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-03-04 08:39:21 +08:00
|
|
|
unsigned long flags = 0UL;
|
2015-05-15 05:44:48 +08:00
|
|
|
flags |= (nsjconf->clone_newnet ? CLONE_NEWNET : 0);
|
|
|
|
flags |= (nsjconf->clone_newuser ? CLONE_NEWUSER : 0);
|
|
|
|
flags |= (nsjconf->clone_newns ? CLONE_NEWNS : 0);
|
|
|
|
flags |= (nsjconf->clone_newpid ? CLONE_NEWPID : 0);
|
|
|
|
flags |= (nsjconf->clone_newipc ? CLONE_NEWIPC : 0);
|
|
|
|
flags |= (nsjconf->clone_newuts ? CLONE_NEWUTS : 0);
|
2016-06-19 17:55:55 +08:00
|
|
|
flags |= (nsjconf->clone_newcgroup ? CLONE_NEWCGROUP : 0);
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2015-08-15 22:02:38 +08:00
|
|
|
if (nsjconf->mode == MODE_STANDALONE_EXECVE) {
|
2018-02-11 04:19:47 +08:00
|
|
|
LOG_D("Entering namespace with flags:%s", cloneFlagsToStr(flags).c_str());
|
2015-08-15 22:02:38 +08:00
|
|
|
if (unshare(flags) == -1) {
|
2018-02-11 04:19:47 +08:00
|
|
|
PLOG_E("unshare(%s)", cloneFlagsToStr(flags).c_str());
|
2017-09-25 13:08:22 +08:00
|
|
|
_exit(0xff);
|
2015-08-15 22:02:38 +08:00
|
|
|
}
|
|
|
|
subprocNewProc(nsjconf, fd_in, fd_out, fd_err, -1);
|
|
|
|
}
|
|
|
|
|
|
|
|
flags |= SIGCHLD;
|
2018-02-11 04:19:47 +08:00
|
|
|
LOG_D("Creating new process with clone flags:%s", cloneFlagsToStr(flags).c_str());
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2016-02-28 09:34:43 +08:00
|
|
|
int sv[2];
|
|
|
|
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sv) == -1) {
|
|
|
|
PLOG_E("socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC) failed");
|
2015-05-15 22:02:15 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-05-10 05:16:26 +08:00
|
|
|
int child_fd = sv[0];
|
|
|
|
int parent_fd = sv[1];
|
2015-05-15 22:02:15 +08:00
|
|
|
|
2018-02-10 01:45:50 +08:00
|
|
|
pid_t pid = cloneProc(flags);
|
2015-05-15 05:44:48 +08:00
|
|
|
if (pid == 0) {
|
2016-07-29 21:38:22 +08:00
|
|
|
close(parent_fd);
|
2016-05-10 05:16:26 +08:00
|
|
|
subprocNewProc(nsjconf, fd_in, fd_out, fd_err, child_fd);
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
2016-07-29 21:38:22 +08:00
|
|
|
close(child_fd);
|
2015-05-15 05:44:48 +08:00
|
|
|
if (pid == -1) {
|
2017-10-26 22:16:05 +08:00
|
|
|
if (flags & CLONE_NEWCGROUP) {
|
|
|
|
PLOG_E(
|
|
|
|
"nsjail tried to use the CLONE_NEWCGROUP clone flag, which is "
|
|
|
|
"supported under kernel versions >= 4.6 only. Try disabling this flag");
|
|
|
|
}
|
2017-10-26 06:26:02 +08:00
|
|
|
PLOG_E(
|
|
|
|
"clone(flags=%s) failed. You probably need root privileges if your system "
|
|
|
|
"doesn't support CLONE_NEWUSER. Alternatively, you might want to recompile "
|
2017-10-27 04:57:14 +08:00
|
|
|
"your kernel with support for namespaces or check the setting of the "
|
2017-10-26 06:26:02 +08:00
|
|
|
"kernel.unprivileged_userns_clone sysctl",
|
2018-02-11 04:19:47 +08:00
|
|
|
cloneFlagsToStr(flags).c_str());
|
2016-07-29 21:38:22 +08:00
|
|
|
close(parent_fd);
|
2015-05-15 05:44:48 +08:00
|
|
|
return;
|
|
|
|
}
|
2018-02-10 00:03:02 +08:00
|
|
|
addProc(nsjconf, pid, fd_in);
|
2015-05-15 05:44:48 +08:00
|
|
|
|
2018-02-12 22:17:33 +08:00
|
|
|
if (!initParent(nsjconf, pid, parent_fd)) {
|
2016-07-29 21:38:22 +08:00
|
|
|
close(parent_fd);
|
2016-02-28 23:43:35 +08:00
|
|
|
return;
|
2016-02-28 09:34:43 +08:00
|
|
|
}
|
2015-05-28 09:37:08 +08:00
|
|
|
|
2016-07-29 21:38:22 +08:00
|
|
|
close(parent_fd);
|
2015-05-15 05:44:48 +08:00
|
|
|
}
|
2016-10-12 08:01:12 +08:00
|
|
|
|
2018-02-10 00:03:02 +08:00
|
|
|
/*
|
|
|
|
* Will be used inside the child process only, so it's safe to have it in BSS.
|
|
|
|
* Some CPU archs (e.g. aarch64) must have it aligned. Size: 128 KiB (/2)
|
|
|
|
*/
|
2018-02-10 01:45:50 +08:00
|
|
|
static uint8_t cloneStack[128 * 1024] __attribute__((aligned(__BIGGEST_ALIGNMENT__)));
|
2018-02-10 00:03:02 +08:00
|
|
|
/* Cannot be on the stack, as the child's stack pointer will change after clone() */
|
|
|
|
static __thread jmp_buf env;
|
|
|
|
|
2018-02-10 01:45:50 +08:00
|
|
|
static int cloneFunc(void* arg __attribute__((unused))) {
|
2018-02-10 00:03:02 +08:00
|
|
|
longjmp(env, 1);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Avoid problems with caching of PID/TID in glibc - when using syscall(__NR_clone) glibc doesn't
|
|
|
|
* update the internal PID/TID caches, what can lead to invalid values being returned by getpid()
|
|
|
|
* or incorrect PID/TIDs used in raise()/abort() functions
|
|
|
|
*/
|
2018-02-10 01:45:50 +08:00
|
|
|
pid_t cloneProc(uintptr_t flags) {
|
2018-02-10 00:03:02 +08:00
|
|
|
if (flags & CLONE_VM) {
|
|
|
|
LOG_E("Cannot use clone(flags & CLONE_VM)");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (setjmp(env) == 0) {
|
2018-02-11 04:19:47 +08:00
|
|
|
LOG_D("Cloning process with flags:%s", cloneFlagsToStr(flags).c_str());
|
2018-02-10 00:03:02 +08:00
|
|
|
/*
|
|
|
|
* Avoid the problem of the stack growing up/down under different CPU architectures,
|
|
|
|
* by using middle of the static stack buffer (which is temporary, and used only
|
2018-02-10 01:45:50 +08:00
|
|
|
* inside of the cloneFunc()
|
2018-02-10 00:03:02 +08:00
|
|
|
*/
|
2018-02-10 01:45:50 +08:00
|
|
|
void* stack = &cloneStack[sizeof(cloneStack) / 2];
|
2018-02-10 00:03:02 +08:00
|
|
|
/* Parent */
|
2018-02-10 01:45:50 +08:00
|
|
|
return clone(cloneFunc, stack, flags, NULL, NULL, NULL);
|
2018-02-10 00:03:02 +08:00
|
|
|
}
|
|
|
|
/* Child */
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-02-11 11:02:14 +08:00
|
|
|
int systemExe(const std::vector<std::string>& args, char** env) {
|
2016-10-12 08:01:12 +08:00
|
|
|
bool exec_failed = false;
|
|
|
|
|
2018-02-11 11:02:14 +08:00
|
|
|
std::vector<const char*> argv;
|
|
|
|
for (const auto& a : args) {
|
2018-02-11 11:02:43 +08:00
|
|
|
argv.push_back(a.c_str());
|
2018-02-11 11:02:14 +08:00
|
|
|
}
|
|
|
|
argv.push_back(nullptr);
|
|
|
|
|
2016-10-12 08:01:12 +08:00
|
|
|
int sv[2];
|
|
|
|
if (pipe2(sv, O_CLOEXEC) == -1) {
|
|
|
|
PLOG_W("pipe2(sv, O_CLOEXEC");
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
pid_t pid = fork();
|
|
|
|
if (pid == -1) {
|
|
|
|
PLOG_W("fork()");
|
|
|
|
close(sv[0]);
|
|
|
|
close(sv[1]);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pid == 0) {
|
|
|
|
close(sv[0]);
|
2018-02-11 11:02:14 +08:00
|
|
|
execve(argv[0], (char* const*)argv.data(), (char* const*)env);
|
2016-10-12 08:01:12 +08:00
|
|
|
PLOG_W("execve('%s')", argv[0]);
|
2018-02-10 01:45:50 +08:00
|
|
|
util::writeToFd(sv[1], "A", 1);
|
2016-10-12 08:01:12 +08:00
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
close(sv[1]);
|
|
|
|
char buf[1];
|
2018-02-10 01:45:50 +08:00
|
|
|
if (util::readFromFd(sv[0], buf, sizeof(buf)) > 0) {
|
2016-10-12 08:01:12 +08:00
|
|
|
exec_failed = true;
|
|
|
|
LOG_W("Couldn't execute '%s'", argv[0]);
|
|
|
|
}
|
|
|
|
close(sv[0]);
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
int status;
|
|
|
|
int ret = wait4(pid, &status, __WALL, NULL);
|
|
|
|
if (ret == -1 && errno == EINTR) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (ret == -1) {
|
|
|
|
PLOG_W("wait4(pid=%d)", pid);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (WIFEXITED(status)) {
|
|
|
|
int exit_code = WEXITSTATUS(status);
|
|
|
|
LOG_D("PID %d exited with exit code: %d", pid, exit_code);
|
2017-10-18 21:41:16 +08:00
|
|
|
if (exec_failed) {
|
2016-10-12 08:01:12 +08:00
|
|
|
return -1;
|
|
|
|
} else if (exit_code == 0) {
|
|
|
|
return 0;
|
|
|
|
} else {
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (WIFSIGNALED(status)) {
|
|
|
|
int exit_signal = WTERMSIG(status);
|
2017-06-20 06:16:38 +08:00
|
|
|
LOG_W("PID %d killed by signal: %d (%s)", pid, exit_signal,
|
2018-02-11 04:19:47 +08:00
|
|
|
util::sigName(exit_signal).c_str());
|
2016-10-12 08:01:12 +08:00
|
|
|
return 2;
|
|
|
|
}
|
|
|
|
LOG_W("Unknown exit status: %d", status);
|
|
|
|
}
|
|
|
|
}
|
2018-02-10 01:45:50 +08:00
|
|
|
|
|
|
|
} // namespace subproc
|