Enable support for clone3() and for CLONE_NEWTIME
This commit is contained in:
parent
4be9595234
commit
d1f332b911
@ -231,7 +231,8 @@ void logParams(nsjconf_t* nsjconf) {
|
||||
"max_conns:%u, max_conns_per_ip:%u, time_limit:%" PRId64
|
||||
", personality:%#lx, daemonize:%s, clone_newnet:%s, "
|
||||
"clone_newuser:%s, clone_newns:%s, clone_newpid:%s, clone_newipc:%s, clone_newuts:%s, "
|
||||
"clone_newcgroup:%s, clone_newtime:%s, keep_caps:%s, disable_no_new_privs:%s, max_cpus:%zu",
|
||||
"clone_newcgroup:%s, clone_newtime:%s, keep_caps:%s, disable_no_new_privs:%s, "
|
||||
"max_cpus:%zu",
|
||||
nsjconf->hostname.c_str(), nsjconf->chroot.c_str(),
|
||||
nsjconf->exec_file.empty() ? nsjconf->argv[0].c_str() : nsjconf->exec_file.c_str(),
|
||||
nsjconf->bindhost.c_str(), nsjconf->port, nsjconf->max_conns, nsjconf->max_conns_per_ip,
|
||||
@ -239,8 +240,9 @@ void logParams(nsjconf_t* nsjconf) {
|
||||
logYesNo(nsjconf->clone_newnet), logYesNo(nsjconf->clone_newuser),
|
||||
logYesNo(nsjconf->clone_newns), logYesNo(nsjconf->clone_newpid),
|
||||
logYesNo(nsjconf->clone_newipc), logYesNo(nsjconf->clone_newuts),
|
||||
logYesNo(nsjconf->clone_newcgroup), logYesNo(nsjconf->clone_newtime), logYesNo(nsjconf->keep_caps),
|
||||
logYesNo(nsjconf->disable_no_new_privs), nsjconf->max_cpus);
|
||||
logYesNo(nsjconf->clone_newcgroup), logYesNo(nsjconf->clone_newtime),
|
||||
logYesNo(nsjconf->keep_caps), logYesNo(nsjconf->disable_no_new_privs),
|
||||
nsjconf->max_cpus);
|
||||
|
||||
for (const auto& p : nsjconf->mountpts) {
|
||||
LOG_I(
|
||||
|
@ -177,7 +177,7 @@ message NsJailConfig {
|
||||
optional bool clone_newuts = 52 [default = true];
|
||||
/* Disable for kernel versions < 4.6 as it's not supported there */
|
||||
optional bool clone_newcgroup = 53 [default = true];
|
||||
/* Supported with kernel versions >= 5.3 */
|
||||
/* Supported with kernel versions >= 5.3 */
|
||||
optional bool clone_newtime = 86 [default = false];
|
||||
|
||||
/* Mappings for UIDs and GIDs. See the description for 'msg IdMap'
|
||||
|
2
mnt.cc
2
mnt.cc
@ -453,7 +453,7 @@ bool initNs(nsjconf_t* nsjconf) {
|
||||
return initNsInternal(nsjconf);
|
||||
}
|
||||
|
||||
pid_t pid = subproc::cloneProc(CLONE_FS | SIGCHLD);
|
||||
pid_t pid = subproc::cloneProc(CLONE_FS, SIGCHLD);
|
||||
if (pid == -1) {
|
||||
return false;
|
||||
}
|
||||
|
2
pid.cc
2
pid.cc
@ -48,7 +48,7 @@ bool initNs(nsjconf_t* nsjconf) {
|
||||
* first clone/fork will work, and the rest will fail with ENOMEM (see 'man pid_namespaces'
|
||||
* for details on this behavior)
|
||||
*/
|
||||
pid_t pid = subproc::cloneProc(CLONE_FS);
|
||||
pid_t pid = subproc::cloneProc(CLONE_FS, 0);
|
||||
if (pid == -1) {
|
||||
PLOG_E("Couldn't create a dummy init process");
|
||||
return false;
|
||||
|
71
subproc.cc
71
subproc.cc
@ -100,18 +100,20 @@ static const std::string cloneFlagsToStr(uintptr_t flags) {
|
||||
NS_VALSTR_STRUCT(CLONE_IO),
|
||||
};
|
||||
|
||||
uintptr_t knownFlagMask = CSIGNAL;
|
||||
uintptr_t knownFlagMask = 0;
|
||||
for (const auto& i : cloneFlags) {
|
||||
if (flags & i.flag) {
|
||||
res.append(i.name).append("|");
|
||||
if (!res.empty()) {
|
||||
res.append("|");
|
||||
}
|
||||
res.append(i.name);
|
||||
}
|
||||
knownFlagMask |= i.flag;
|
||||
}
|
||||
|
||||
if (flags & ~(knownFlagMask)) {
|
||||
util::StrAppend(&res, "%#tx|", flags & ~(knownFlagMask));
|
||||
util::StrAppend(&res, "|%#tx", flags & ~(knownFlagMask));
|
||||
}
|
||||
res.append(util::sigName(flags & CSIGNAL).c_str());
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -444,8 +446,8 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err)
|
||||
LOG_F("Launching new process failed");
|
||||
}
|
||||
|
||||
flags |= SIGCHLD;
|
||||
LOG_D("Creating new process with clone flags:%s", cloneFlagsToStr(flags).c_str());
|
||||
LOG_D("Creating new process with clone flags:%s and exit_signal:SIGCHLD",
|
||||
cloneFlagsToStr(flags).c_str());
|
||||
|
||||
int sv[2];
|
||||
if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, sv) == -1) {
|
||||
@ -455,7 +457,7 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err)
|
||||
int child_fd = sv[0];
|
||||
int parent_fd = sv[1];
|
||||
|
||||
pid_t pid = cloneProc(flags);
|
||||
pid_t pid = cloneProc(flags, SIGCHLD);
|
||||
if (pid == 0) {
|
||||
close(parent_fd);
|
||||
subprocNewProc(nsjconf, netfd, fd_in, fd_out, fd_err, child_fd);
|
||||
@ -464,21 +466,20 @@ pid_t runChild(nsjconf_t* nsjconf, int netfd, int fd_in, int fd_out, int fd_err)
|
||||
}
|
||||
close(child_fd);
|
||||
if (pid == -1) {
|
||||
auto saved_errno = errno;
|
||||
PLOG_W("clone(flags=%s) failed", cloneFlagsToStr(flags).c_str());
|
||||
if (flags & CLONE_NEWCGROUP) {
|
||||
auto saved_errno = errno;
|
||||
PLOG_E(
|
||||
LOG_W(
|
||||
"nsjail tried to use the CLONE_NEWCGROUP clone flag, which is "
|
||||
"supported under kernel versions >= 4.6 only. Try disabling this flag");
|
||||
errno = saved_errno;
|
||||
"supported under kernel versions >= 4.6 only");
|
||||
} else if (flags & CLONE_NEWTIME) {
|
||||
LOG_W(
|
||||
"nsjail tried to use the CLONE_NEWTIME clone flag, which is "
|
||||
"supported under kernel versions >= 5.13 only");
|
||||
}
|
||||
PLOG_E(
|
||||
"clone(flags=%s) failed. You probably need root privileges if your system "
|
||||
"doesn't support CLONE_NEWUSER. Alternatively, you might want to recompile "
|
||||
"your kernel with support for namespaces or check the current value of the "
|
||||
"kernel.unprivileged_userns_clone sysctl",
|
||||
cloneFlagsToStr(flags).c_str());
|
||||
close(parent_fd);
|
||||
return -1;
|
||||
errno = saved_errno;
|
||||
return pid;
|
||||
}
|
||||
addProc(nsjconf, pid, netfd);
|
||||
|
||||
@ -517,9 +518,39 @@ static int cloneFunc(void* arg __attribute__((unused))) {
|
||||
* update the internal PID/TID caches, what can lead to invalid values being returned by getpid()
|
||||
* or incorrect PID/TIDs used in raise()/abort() functions
|
||||
*/
|
||||
pid_t cloneProc(uintptr_t flags) {
|
||||
pid_t cloneProc(uintptr_t flags, int exit_signal) {
|
||||
exit_signal &= CSIGNAL;
|
||||
|
||||
if (flags & CLONE_VM) {
|
||||
LOG_E("Cannot use clone(flags & CLONE_VM)");
|
||||
errno = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if defined(__NR_clone3)
|
||||
struct clone_args ca = {
|
||||
.flags = (uint64_t)flags,
|
||||
.pidfd = 0,
|
||||
.child_tid = 0,
|
||||
.parent_tid = 0,
|
||||
.exit_signal = (uint64_t)exit_signal,
|
||||
.stack = 0,
|
||||
.stack_size = 0,
|
||||
.tls = 0,
|
||||
.set_tid = 0,
|
||||
.set_tid_size = 0,
|
||||
.cgroup = 0,
|
||||
};
|
||||
|
||||
pid_t ret = util::syscall(__NR_clone3, (uintptr_t)&ca, sizeof(ca));
|
||||
if (ret != -1 || errno != ENOSYS) {
|
||||
return ret;
|
||||
}
|
||||
#endif /* defined(__NR_clone3) */
|
||||
|
||||
if (flags & CLONE_NEWTIME) {
|
||||
LOG_E("CLONE_NEWTIME was requested but clone3() is not supported");
|
||||
errno = 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@ -532,7 +563,7 @@ pid_t cloneProc(uintptr_t flags) {
|
||||
*/
|
||||
void* stack = &cloneStack[sizeof(cloneStack) / 2];
|
||||
/* Parent */
|
||||
return clone(cloneFunc, stack, flags, NULL, NULL, NULL);
|
||||
return clone(cloneFunc, stack, flags | exit_signal, NULL, NULL, NULL);
|
||||
}
|
||||
/* Child */
|
||||
return 0;
|
||||
|
@ -41,7 +41,7 @@ void killAndReapAll(nsjconf_t* nsjconf);
|
||||
/* Returns the exit code of the first failing subprocess, or 0 if none fail */
|
||||
int reapProc(nsjconf_t* nsjconf);
|
||||
int systemExe(const std::vector<std::string>& args, char** env);
|
||||
pid_t cloneProc(uintptr_t flags);
|
||||
pid_t cloneProc(uintptr_t flags, int exit_signal);
|
||||
|
||||
} // namespace subproc
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user