2016-03-03 22:37:04 +08:00
|
|
|
/*
|
|
|
|
|
|
|
|
nsjail - CLONE_NEWNS routines
|
|
|
|
-----------------------------------------
|
|
|
|
|
|
|
|
Copyright 2014 Google Inc. All Rights Reserved.
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2018-02-10 01:26:16 +08:00
|
|
|
#include "mnt.h"
|
2016-03-03 22:37:04 +08:00
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
2017-10-18 20:46:17 +08:00
|
|
|
#include <inttypes.h>
|
2017-10-18 20:27:34 +08:00
|
|
|
#include <limits.h>
|
2016-07-21 21:48:47 +08:00
|
|
|
#include <linux/sched.h>
|
2016-05-13 04:25:48 +08:00
|
|
|
#include <sched.h>
|
2017-10-18 20:46:17 +08:00
|
|
|
#include <signal.h>
|
2016-03-03 22:37:04 +08:00
|
|
|
#include <stdio.h>
|
2017-09-14 04:03:21 +08:00
|
|
|
#include <stdlib.h>
|
2016-03-03 22:37:04 +08:00
|
|
|
#include <string.h>
|
|
|
|
#include <sys/mount.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#include <sys/statvfs.h>
|
|
|
|
#include <sys/syscall.h>
|
|
|
|
#include <sys/types.h>
|
2016-05-13 04:25:48 +08:00
|
|
|
#include <sys/wait.h>
|
2016-03-03 22:37:04 +08:00
|
|
|
#include <unistd.h>
|
|
|
|
|
2018-04-28 05:58:53 +08:00
|
|
|
#include <memory>
|
2018-02-11 04:19:47 +08:00
|
|
|
#include <string>
|
|
|
|
|
2018-02-11 00:49:15 +08:00
|
|
|
#include "logs.h"
|
2018-02-10 12:25:55 +08:00
|
|
|
#include "macros.h"
|
2018-02-10 01:26:16 +08:00
|
|
|
#include "subproc.h"
|
2018-02-10 01:45:50 +08:00
|
|
|
#include "util.h"
|
2018-02-10 01:26:16 +08:00
|
|
|
|
|
|
|
namespace mnt {
|
2016-03-03 22:37:04 +08:00
|
|
|
|
2017-05-21 23:37:18 +08:00
|
|
|
#if !defined(MS_LAZYTIME)
|
2017-09-14 04:03:21 +08:00
|
|
|
#define MS_LAZYTIME (1 << 25)
|
2017-10-09 05:00:45 +08:00
|
|
|
#endif /* if !defined(MS_LAZYTIME) */
|
2017-05-21 23:37:18 +08:00
|
|
|
|
2022-10-24 19:12:20 +08:00
|
|
|
static const std::string flagsToStr(unsigned long flags) {
|
2018-02-11 04:19:47 +08:00
|
|
|
std::string res;
|
2017-05-21 23:37:18 +08:00
|
|
|
|
2018-05-22 20:27:18 +08:00
|
|
|
struct {
|
2022-10-24 19:12:20 +08:00
|
|
|
const unsigned long flag;
|
2017-10-09 05:03:02 +08:00
|
|
|
const char* const name;
|
2018-05-22 20:27:18 +08:00
|
|
|
} static const mountFlags[] = {
|
2023-09-19 14:45:42 +08:00
|
|
|
NS_VALSTR_STRUCT(MS_RDONLY),
|
|
|
|
NS_VALSTR_STRUCT(MS_NOSUID),
|
|
|
|
NS_VALSTR_STRUCT(MS_NODEV),
|
|
|
|
NS_VALSTR_STRUCT(MS_NOEXEC),
|
|
|
|
NS_VALSTR_STRUCT(MS_SYNCHRONOUS),
|
|
|
|
NS_VALSTR_STRUCT(MS_REMOUNT),
|
|
|
|
NS_VALSTR_STRUCT(MS_MANDLOCK),
|
|
|
|
NS_VALSTR_STRUCT(MS_DIRSYNC),
|
|
|
|
NS_VALSTR_STRUCT(MS_NOATIME),
|
|
|
|
NS_VALSTR_STRUCT(MS_NODIRATIME),
|
|
|
|
NS_VALSTR_STRUCT(MS_BIND),
|
|
|
|
NS_VALSTR_STRUCT(MS_MOVE),
|
|
|
|
NS_VALSTR_STRUCT(MS_REC),
|
|
|
|
NS_VALSTR_STRUCT(MS_SILENT),
|
|
|
|
NS_VALSTR_STRUCT(MS_POSIXACL),
|
|
|
|
NS_VALSTR_STRUCT(MS_UNBINDABLE),
|
|
|
|
NS_VALSTR_STRUCT(MS_PRIVATE),
|
|
|
|
NS_VALSTR_STRUCT(MS_SLAVE),
|
|
|
|
NS_VALSTR_STRUCT(MS_SHARED),
|
|
|
|
NS_VALSTR_STRUCT(MS_RELATIME),
|
|
|
|
NS_VALSTR_STRUCT(MS_KERNMOUNT),
|
|
|
|
NS_VALSTR_STRUCT(MS_I_VERSION),
|
|
|
|
NS_VALSTR_STRUCT(MS_STRICTATIME),
|
|
|
|
NS_VALSTR_STRUCT(MS_LAZYTIME),
|
2021-09-30 22:44:48 +08:00
|
|
|
#if defined(MS_ACTIVE)
|
2023-09-19 14:45:42 +08:00
|
|
|
NS_VALSTR_STRUCT(MS_ACTIVE),
|
2021-09-30 22:44:48 +08:00
|
|
|
#endif /* defined(MS_ACTIVE) */
|
|
|
|
#if defined(MS_NOUSER)
|
2023-09-19 14:45:42 +08:00
|
|
|
NS_VALSTR_STRUCT(MS_NOUSER),
|
2021-09-30 22:44:48 +08:00
|
|
|
#endif /* defined(MS_NOUSER) */
|
2017-10-09 05:03:02 +08:00
|
|
|
};
|
2017-05-21 23:37:18 +08:00
|
|
|
|
2022-10-24 19:12:20 +08:00
|
|
|
unsigned knownFlagMask = 0U;
|
2018-05-22 20:27:18 +08:00
|
|
|
for (const auto& i : mountFlags) {
|
|
|
|
if (flags & i.flag) {
|
2018-10-30 08:44:08 +08:00
|
|
|
if (!res.empty()) {
|
|
|
|
res.append("|");
|
|
|
|
}
|
2018-05-22 20:27:18 +08:00
|
|
|
res.append(i.name);
|
2017-05-21 23:37:18 +08:00
|
|
|
}
|
2018-05-22 20:27:18 +08:00
|
|
|
knownFlagMask |= i.flag;
|
2017-05-21 23:37:18 +08:00
|
|
|
}
|
2018-02-11 04:19:47 +08:00
|
|
|
|
2018-10-30 08:44:08 +08:00
|
|
|
if (flags & ~(knownFlagMask)) {
|
2022-10-24 19:12:20 +08:00
|
|
|
util::StrAppend(&res, "|%#lx", flags & ~(knownFlagMask));
|
2018-04-29 06:58:35 +08:00
|
|
|
}
|
2018-02-11 04:19:47 +08:00
|
|
|
|
|
|
|
return res;
|
2017-05-21 23:37:18 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 01:26:16 +08:00
|
|
|
static bool isDir(const char* path) {
|
2016-06-19 07:35:06 +08:00
|
|
|
/*
|
2016-08-17 03:12:23 +08:00
|
|
|
* If the source dir is NULL, we assume it's a dir (for /proc and tmpfs)
|
2016-06-19 07:35:06 +08:00
|
|
|
*/
|
2023-10-20 20:15:36 +08:00
|
|
|
if (path == nullptr) {
|
2016-06-19 07:35:06 +08:00
|
|
|
return true;
|
2016-03-03 22:37:04 +08:00
|
|
|
}
|
|
|
|
struct stat st;
|
|
|
|
if (stat(path, &st) == -1) {
|
2017-05-28 02:19:36 +08:00
|
|
|
PLOG_D("stat('%s')", path);
|
2016-03-03 22:37:04 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (S_ISDIR(st.st_mode)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-05-16 20:01:45 +08:00
|
|
|
static int mountRWIfPossible(mount_t* mpt, const char* src, const char* dst) {
|
|
|
|
int res =
|
|
|
|
mount(src, dst, mpt->fs_type.c_str(), mpt->flags & ~(MS_RDONLY), mpt->options.c_str());
|
|
|
|
if ((mpt->flags & MS_RDONLY) && res == -1 && errno == EPERM) {
|
2023-10-04 05:16:55 +08:00
|
|
|
LOG_W("mount('%s') src: '%s' dstpath: '%s' could not mount read-write, falling "
|
|
|
|
"back to mounting read-only directly",
|
2023-05-16 20:01:45 +08:00
|
|
|
describeMountPt(*mpt).c_str(), src, dst);
|
|
|
|
res = mount(src, dst, mpt->fs_type.c_str(), mpt->flags, mpt->options.c_str());
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
}
|
|
|
|
|
2018-02-10 22:50:12 +08:00
|
|
|
static bool mountPt(mount_t* mpt, const char* newroot, const char* tmpdir) {
|
2022-08-09 17:32:49 +08:00
|
|
|
LOG_D("Mounting %s", describeMountPt(*mpt).c_str());
|
2016-03-03 22:37:04 +08:00
|
|
|
|
2018-02-21 10:29:26 +08:00
|
|
|
char dstpath[PATH_MAX];
|
|
|
|
snprintf(dstpath, sizeof(dstpath), "%s/%s", newroot, mpt->dst.c_str());
|
|
|
|
|
2017-05-27 21:01:34 +08:00
|
|
|
char srcpath[PATH_MAX];
|
2018-02-10 21:38:01 +08:00
|
|
|
if (!mpt->src.empty()) {
|
|
|
|
snprintf(srcpath, sizeof(srcpath), "%s", mpt->src.c_str());
|
2017-05-27 21:01:34 +08:00
|
|
|
} else {
|
|
|
|
snprintf(srcpath, sizeof(srcpath), "none");
|
2016-08-17 01:54:50 +08:00
|
|
|
}
|
|
|
|
|
2018-02-21 10:29:26 +08:00
|
|
|
if (!util::createDirRecursively(dstpath)) {
|
|
|
|
LOG_W("Couldn't create upper directories for '%s'", dstpath);
|
|
|
|
return false;
|
2016-03-03 22:37:04 +08:00
|
|
|
}
|
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
if (mpt->is_symlink) {
|
2022-08-09 18:05:33 +08:00
|
|
|
LOG_D("symlink(%s, %s)", util::StrQuote(srcpath).c_str(),
|
|
|
|
util::StrQuote(dstpath).c_str());
|
2018-02-21 10:29:26 +08:00
|
|
|
if (symlink(srcpath, dstpath) == -1) {
|
2018-02-12 06:44:43 +08:00
|
|
|
if (mpt->is_mandatory) {
|
2022-08-09 18:05:33 +08:00
|
|
|
PLOG_E("symlink('%s', '%s')", util::StrQuote(srcpath).c_str(),
|
|
|
|
util::StrQuote(dstpath).c_str());
|
2017-07-02 09:39:56 +08:00
|
|
|
return false;
|
|
|
|
} else {
|
|
|
|
PLOG_W("symlink('%s', '%s'), but it's not mandatory, continuing",
|
2022-08-09 18:05:33 +08:00
|
|
|
util::StrQuote(srcpath).c_str(),
|
|
|
|
util::StrQuote(dstpath).c_str());
|
2017-07-02 09:39:56 +08:00
|
|
|
}
|
2017-06-29 06:32:20 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-02-21 10:29:26 +08:00
|
|
|
if (mpt->is_dir) {
|
|
|
|
if (mkdir(dstpath, 0711) == -1 && errno != EEXIST) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_W("mkdir(%s)", QC(dstpath));
|
2018-02-21 10:29:26 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
int fd = TEMP_FAILURE_RETRY(open(dstpath, O_CREAT | O_RDONLY | O_CLOEXEC, 0644));
|
|
|
|
if (fd >= 0) {
|
|
|
|
close(fd);
|
|
|
|
} else {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_W("open(%s, O_CREAT|O_RDONLY|O_CLOEXEC, 0644)", QC(dstpath));
|
2018-02-21 10:29:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-02-10 21:38:01 +08:00
|
|
|
if (!mpt->src_content.empty()) {
|
2017-09-29 19:07:42 +08:00
|
|
|
static uint64_t df_counter = 0;
|
2017-10-17 21:22:23 +08:00
|
|
|
snprintf(
|
|
|
|
srcpath, sizeof(srcpath), "%s/dynamic_file.%" PRIu64, tmpdir, ++df_counter);
|
|
|
|
int fd = TEMP_FAILURE_RETRY(
|
|
|
|
open(srcpath, O_CREAT | O_EXCL | O_CLOEXEC | O_WRONLY, 0644));
|
2017-05-28 22:56:16 +08:00
|
|
|
if (fd < 0) {
|
2017-09-29 19:07:42 +08:00
|
|
|
PLOG_W("open(srcpath, O_CREAT|O_EXCL|O_CLOEXEC|O_WRONLY, 0644) failed");
|
2017-05-28 22:56:16 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-02-21 10:29:26 +08:00
|
|
|
if (!util::writeToFd(fd, mpt->src_content.data(), mpt->src_content.length())) {
|
2020-07-09 23:29:02 +08:00
|
|
|
LOG_W(
|
|
|
|
"Writing %zu bytes to '%s' failed", mpt->src_content.length(), srcpath);
|
2017-05-28 22:56:16 +08:00
|
|
|
close(fd);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
close(fd);
|
2017-10-11 08:10:52 +08:00
|
|
|
mpt->flags |= (MS_BIND | MS_REC | MS_PRIVATE);
|
2017-05-28 22:56:16 +08:00
|
|
|
}
|
|
|
|
|
2016-06-19 07:35:06 +08:00
|
|
|
/*
|
|
|
|
* Initially mount it as RW, it will be remounted later on if needed
|
|
|
|
*/
|
2023-05-16 20:01:45 +08:00
|
|
|
if (mountRWIfPossible(mpt, srcpath, dstpath) == -1) {
|
2017-07-02 09:39:56 +08:00
|
|
|
if (errno == EACCES) {
|
2023-09-20 02:31:57 +08:00
|
|
|
PLOG_W("mount('%s') src:'%s' dstpath:'%s' failed. "
|
|
|
|
"Try fixing this problem by applying 'chmod o+x' to the '%s' "
|
|
|
|
"directory and its ancestors",
|
2018-02-21 10:29:26 +08:00
|
|
|
describeMountPt(*mpt).c_str(), srcpath, dstpath, srcpath);
|
2016-03-03 22:37:04 +08:00
|
|
|
} else {
|
2018-02-21 10:29:26 +08:00
|
|
|
PLOG_W("mount('%s') src:'%s' dstpath:'%s' failed",
|
|
|
|
describeMountPt(*mpt).c_str(), srcpath, dstpath);
|
2018-02-20 00:41:37 +08:00
|
|
|
if (mpt->fs_type.compare("proc") == 0) {
|
2023-10-04 05:16:55 +08:00
|
|
|
PLOG_W("procfs can only be mounted if the original /proc doesn't "
|
|
|
|
"have any other file-systems mounted on top of it (e.g. "
|
|
|
|
"/dev/null on top of /proc/kcore)");
|
2017-10-27 05:00:15 +08:00
|
|
|
}
|
2017-05-27 21:17:11 +08:00
|
|
|
}
|
2017-10-19 21:46:31 +08:00
|
|
|
return false;
|
2017-10-08 07:28:45 +08:00
|
|
|
} else {
|
|
|
|
mpt->mounted = true;
|
2016-03-03 22:37:04 +08:00
|
|
|
}
|
2017-05-29 10:50:29 +08:00
|
|
|
|
2018-02-10 21:38:01 +08:00
|
|
|
if (!mpt->src_content.empty() && unlink(srcpath) == -1) {
|
2017-05-29 10:50:29 +08:00
|
|
|
PLOG_W("unlink('%s')", srcpath);
|
|
|
|
}
|
2016-03-03 22:37:04 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-07-29 06:30:08 +08:00
|
|
|
static bool remountPt(const mount_t& mpt) {
|
2018-02-10 21:38:01 +08:00
|
|
|
if (!mpt.mounted) {
|
2017-10-08 07:28:45 +08:00
|
|
|
return true;
|
|
|
|
}
|
2018-02-12 06:44:43 +08:00
|
|
|
if (mpt.is_symlink) {
|
2017-06-29 06:32:20 +08:00
|
|
|
return true;
|
|
|
|
}
|
2017-05-22 09:34:54 +08:00
|
|
|
|
2016-03-03 22:37:04 +08:00
|
|
|
struct statvfs vfs;
|
2018-02-10 21:38:01 +08:00
|
|
|
if (TEMP_FAILURE_RETRY(statvfs(mpt.dst.c_str(), &vfs)) == -1) {
|
|
|
|
PLOG_W("statvfs('%s')", mpt.dst.c_str());
|
2017-07-02 09:39:56 +08:00
|
|
|
return false;
|
2016-03-03 22:37:04 +08:00
|
|
|
}
|
2017-10-26 08:17:52 +08:00
|
|
|
|
2018-05-22 20:27:18 +08:00
|
|
|
struct {
|
2017-10-26 08:29:15 +08:00
|
|
|
const unsigned long mount_flag;
|
|
|
|
const unsigned long vfs_flag;
|
2018-05-22 20:27:18 +08:00
|
|
|
} static const mountPairs[] = {
|
2017-10-26 08:27:18 +08:00
|
|
|
{MS_NOSUID, ST_NOSUID},
|
|
|
|
{MS_NODEV, ST_NODEV},
|
|
|
|
{MS_NOEXEC, ST_NOEXEC},
|
|
|
|
{MS_SYNCHRONOUS, ST_SYNCHRONOUS},
|
|
|
|
{MS_MANDLOCK, ST_MANDLOCK},
|
|
|
|
{MS_NOATIME, ST_NOATIME},
|
|
|
|
{MS_NODIRATIME, ST_NODIRATIME},
|
|
|
|
{MS_RELATIME, ST_RELATIME},
|
|
|
|
};
|
|
|
|
|
2018-07-27 17:27:01 +08:00
|
|
|
const unsigned long per_mountpoint_flags =
|
|
|
|
MS_LAZYTIME | MS_MANDLOCK | MS_NOATIME | MS_NODEV | MS_NODIRATIME | MS_NOEXEC |
|
|
|
|
MS_NOSUID | MS_RELATIME | MS_RDONLY | MS_SYNCHRONOUS;
|
|
|
|
unsigned long new_flags = MS_REMOUNT | MS_BIND | (mpt.flags & per_mountpoint_flags);
|
2018-05-22 20:27:18 +08:00
|
|
|
for (const auto& i : mountPairs) {
|
|
|
|
if (vfs.f_flag & i.vfs_flag) {
|
|
|
|
new_flags |= i.mount_flag;
|
2017-10-26 08:27:18 +08:00
|
|
|
}
|
2017-10-26 08:17:52 +08:00
|
|
|
}
|
2016-03-03 22:37:04 +08:00
|
|
|
|
2018-07-24 22:30:31 +08:00
|
|
|
LOG_D("Re-mounting '%s' (flags:%s)", mpt.dst.c_str(), flagsToStr(new_flags).c_str());
|
2018-02-10 21:38:01 +08:00
|
|
|
if (mount(mpt.dst.c_str(), mpt.dst.c_str(), NULL, new_flags, 0) == -1) {
|
2018-02-11 04:19:47 +08:00
|
|
|
PLOG_W("mount('%s', flags:%s)", mpt.dst.c_str(), flagsToStr(new_flags).c_str());
|
2017-07-02 09:39:56 +08:00
|
|
|
return false;
|
2016-03-03 22:37:04 +08:00
|
|
|
}
|
2017-05-22 09:34:54 +08:00
|
|
|
|
2016-03-03 22:37:04 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-04-28 05:58:53 +08:00
|
|
|
static bool mkdirAndTest(const std::string& dir) {
|
|
|
|
if (mkdir(dir.c_str(), 0755) == -1 && errno != EEXIST) {
|
|
|
|
PLOG_D("Couldn't create '%s' directory", dir.c_str());
|
2017-06-22 01:18:02 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-04-28 05:58:53 +08:00
|
|
|
if (access(dir.c_str(), R_OK) == -1) {
|
|
|
|
PLOG_W("access('%s', R_OK)", dir.c_str());
|
2017-06-22 01:18:02 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-04-28 05:58:53 +08:00
|
|
|
LOG_D("Created accessible directory in '%s'", dir.c_str());
|
2017-06-22 01:18:02 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-04-28 05:58:53 +08:00
|
|
|
static std::unique_ptr<std::string> getDir(nsjconf_t* nsjconf, const char* name) {
|
|
|
|
std::unique_ptr<std::string> dir(new std::string);
|
|
|
|
|
2019-03-29 06:25:15 +08:00
|
|
|
dir->assign("/run/user/").append(std::to_string(nsjconf->orig_uid)).append("/nsjail");
|
2019-03-18 23:37:04 +08:00
|
|
|
if (mkdirAndTest(*dir)) {
|
2019-03-29 06:25:15 +08:00
|
|
|
dir->append("/").append(name);
|
|
|
|
if (mkdirAndTest(*dir)) {
|
|
|
|
return dir;
|
|
|
|
}
|
2019-03-18 23:37:04 +08:00
|
|
|
}
|
2018-04-28 05:58:53 +08:00
|
|
|
dir->assign("/run/user/")
|
|
|
|
.append("/nsjail.")
|
2018-05-16 21:50:31 +08:00
|
|
|
.append(std::to_string(nsjconf->orig_uid))
|
|
|
|
.append(".")
|
2018-04-28 05:58:53 +08:00
|
|
|
.append(name);
|
|
|
|
if (mkdirAndTest(*dir)) {
|
|
|
|
return dir;
|
2017-06-22 07:21:09 +08:00
|
|
|
}
|
2018-05-16 21:50:31 +08:00
|
|
|
dir->assign("/tmp/nsjail.")
|
|
|
|
.append(std::to_string(nsjconf->orig_uid))
|
|
|
|
.append(".")
|
|
|
|
.append(name);
|
2018-04-28 05:58:53 +08:00
|
|
|
if (mkdirAndTest(*dir)) {
|
|
|
|
return dir;
|
2017-06-22 06:38:49 +08:00
|
|
|
}
|
2017-10-09 05:00:45 +08:00
|
|
|
const char* tmp = getenv("TMPDIR");
|
2017-06-22 06:38:49 +08:00
|
|
|
if (tmp) {
|
2018-05-16 21:50:31 +08:00
|
|
|
dir->assign(tmp)
|
|
|
|
.append("/")
|
|
|
|
.append("nsjail.")
|
|
|
|
.append(std::to_string(nsjconf->orig_uid))
|
|
|
|
.append(".")
|
|
|
|
.append(name);
|
2018-04-28 05:58:53 +08:00
|
|
|
if (mkdirAndTest(*dir)) {
|
|
|
|
return dir;
|
2017-06-22 01:18:02 +08:00
|
|
|
}
|
|
|
|
}
|
2018-05-16 21:50:31 +08:00
|
|
|
dir->assign("/dev/shm/nsjail.")
|
|
|
|
.append(std::to_string(nsjconf->orig_uid))
|
|
|
|
.append(".")
|
|
|
|
.append(name);
|
2018-04-28 05:58:53 +08:00
|
|
|
if (mkdirAndTest(*dir)) {
|
|
|
|
return dir;
|
2017-10-20 04:39:37 +08:00
|
|
|
}
|
2018-05-16 21:50:31 +08:00
|
|
|
dir->assign("/tmp/nsjail.")
|
|
|
|
.append(std::to_string(nsjconf->orig_uid))
|
|
|
|
.append(".")
|
|
|
|
.append(name)
|
|
|
|
.append(".")
|
|
|
|
.append(std::to_string(util::rnd64()));
|
2018-04-28 05:58:53 +08:00
|
|
|
if (mkdirAndTest(*dir)) {
|
|
|
|
return dir;
|
2017-06-22 09:06:53 +08:00
|
|
|
}
|
2017-06-22 01:18:02 +08:00
|
|
|
|
2022-08-10 21:23:53 +08:00
|
|
|
LOG_E("Couldn't create tmp directory of type '%s'", QC(name));
|
2018-04-28 05:58:53 +08:00
|
|
|
return nullptr;
|
2017-06-22 01:18:02 +08:00
|
|
|
}
|
|
|
|
|
2019-08-25 17:16:12 +08:00
|
|
|
static bool initNoCloneNs(nsjconf_t* nsjconf) {
|
2017-10-01 21:54:04 +08:00
|
|
|
/*
|
|
|
|
* If CLONE_NEWNS is not used, we would be changing the global mount namespace, so simply
|
|
|
|
* use --chroot in this case
|
|
|
|
*/
|
2019-08-25 17:16:12 +08:00
|
|
|
if (nsjconf->chroot.empty()) {
|
2016-03-03 22:37:04 +08:00
|
|
|
return true;
|
|
|
|
}
|
2019-08-25 17:16:12 +08:00
|
|
|
if (chroot(nsjconf->chroot.c_str()) == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("chroot('%s')", QC(nsjconf->chroot));
|
2019-08-25 17:16:12 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if (chdir("/") == -1) {
|
|
|
|
PLOG_E("chdir('/')");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
2016-03-03 22:37:04 +08:00
|
|
|
|
2019-08-25 17:16:12 +08:00
|
|
|
static bool initCloneNs(nsjconf_t* nsjconf) {
|
2016-08-17 01:54:50 +08:00
|
|
|
if (chdir("/") == -1) {
|
2016-08-17 01:59:51 +08:00
|
|
|
PLOG_E("chdir('/')");
|
2016-08-17 01:54:50 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-04-28 05:58:53 +08:00
|
|
|
std::unique_ptr<std::string> destdir = getDir(nsjconf, "root");
|
|
|
|
if (!destdir) {
|
2017-06-22 06:38:49 +08:00
|
|
|
LOG_E("Couldn't obtain root mount directories");
|
2017-05-29 22:39:08 +08:00
|
|
|
return false;
|
2017-05-29 09:11:32 +08:00
|
|
|
}
|
2017-06-22 01:18:02 +08:00
|
|
|
|
2017-10-01 21:54:04 +08:00
|
|
|
/* Make changes to / (recursively) private, to avoid changing the global mount ns */
|
2017-10-01 11:16:01 +08:00
|
|
|
if (mount("/", "/", NULL, MS_REC | MS_PRIVATE, NULL) == -1) {
|
|
|
|
PLOG_E("mount('/', '/', NULL, MS_REC|MS_PRIVATE, NULL)");
|
2017-10-01 10:51:56 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-04-28 05:58:53 +08:00
|
|
|
if (mount(NULL, destdir->c_str(), "tmpfs", 0, "size=16777216") == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("mount('%s', 'tmpfs')", QC(*destdir));
|
2017-05-29 09:11:32 +08:00
|
|
|
return false;
|
2016-03-03 22:37:04 +08:00
|
|
|
}
|
2017-06-22 00:29:02 +08:00
|
|
|
|
2018-04-28 05:58:53 +08:00
|
|
|
std::unique_ptr<std::string> tmpdir = getDir(nsjconf, "tmp");
|
|
|
|
if (!tmpdir) {
|
2017-06-22 06:38:49 +08:00
|
|
|
LOG_E("Couldn't obtain temporary mount directories");
|
|
|
|
return false;
|
|
|
|
}
|
2018-04-28 05:58:53 +08:00
|
|
|
if (mount(NULL, tmpdir->c_str(), "tmpfs", 0, "size=16777216") == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("mount(%s, 'tmpfs')", QC(*tmpdir));
|
2017-05-29 22:39:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
2016-03-03 22:37:04 +08:00
|
|
|
|
2018-02-10 21:38:01 +08:00
|
|
|
for (auto& p : nsjconf->mountpts) {
|
2018-04-28 05:58:53 +08:00
|
|
|
if (!mountPt(&p, destdir->c_str(), tmpdir->c_str()) && p.is_mandatory) {
|
2022-08-10 21:23:53 +08:00
|
|
|
LOG_E("Couldn't mount %s", QC(p.dst));
|
2016-03-03 22:37:04 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-04-28 05:58:53 +08:00
|
|
|
if (umount2(tmpdir->c_str(), MNT_DETACH) == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("umount2(%s, MNT_DETACH)", QC(*tmpdir));
|
2017-05-29 22:39:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
2021-08-03 23:46:08 +08:00
|
|
|
|
|
|
|
if (!nsjconf->no_pivotroot) {
|
|
|
|
/*
|
|
|
|
* This requires some explanation: It's actually possible to pivot_root('/', '/').
|
|
|
|
* After this operation has been completed, the old root is mounted over the new
|
|
|
|
* root, and it's OK to simply umount('/') now, and to have new_root as '/'. This
|
|
|
|
* allows us not care about providing any special directory for old_root, which is
|
|
|
|
* sometimes not easy, given that e.g. /tmp might not always be present inside
|
|
|
|
* new_root
|
|
|
|
*/
|
|
|
|
if (util::syscall(__NR_pivot_root, (uintptr_t)destdir->c_str(),
|
|
|
|
(uintptr_t)destdir->c_str()) == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("pivot_root(%s, %s)", QC(*destdir), QC(*destdir));
|
2021-08-03 23:46:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (umount2("/", MNT_DETACH) == -1) {
|
|
|
|
PLOG_E("umount2('/', MNT_DETACH)");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* pivot_root would normally un-mount the old root, however in certain cases this
|
|
|
|
* operation is forbidden. There are systems (mainly embedded) that keep their root
|
|
|
|
* file system in RAM, when initially loaded by the kernel (e.g. initramfs),
|
|
|
|
* and there is no other file system that is mounted on top of it.In such systems,
|
|
|
|
* there is no option to pivot_root!
|
|
|
|
* For more information, see
|
|
|
|
* kernel.org/doc/Documentation/filesystems/ramfs-rootfs-initramfs.txt. switch_root
|
|
|
|
* alternative: Innstead of un-mounting the old rootfs, it is over mounted by moving
|
|
|
|
* the new root to it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* NOTE: Using mount move and chroot allows escaping back into the old root when
|
|
|
|
* proper capabilities are kept in the user namespace. It can be acheived by
|
|
|
|
* unmounting the new root and using setns to re-enter the mount namespace.
|
|
|
|
*/
|
2023-09-20 02:31:57 +08:00
|
|
|
LOG_W("Using no_pivotroot is escapable when user posseses relevant capabilities, "
|
|
|
|
"Use it with care!");
|
2021-08-03 23:46:08 +08:00
|
|
|
|
|
|
|
if (chdir(destdir->c_str()) == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("chdir(%s)", QC(*destdir));
|
2021-08-03 23:46:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* mount moving the new root on top of '/'. This operation is atomic and doesn't
|
|
|
|
involve un-mounting '/' at any stage */
|
|
|
|
if (mount(".", "/", NULL, MS_MOVE, NULL) == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("mount('/', %s, NULL, MS_MOVE, NULL)", QC(*destdir));
|
2021-08-03 23:46:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (chroot(".") == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("chroot(%s)", QC(*destdir));
|
2021-08-03 23:46:08 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const auto& p : nsjconf->mountpts) {
|
|
|
|
if (!remountPt(p) && p.is_mandatory) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2016-03-03 22:37:04 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2016-05-13 04:25:48 +08:00
|
|
|
|
2019-08-25 17:16:12 +08:00
|
|
|
static bool initNsInternal(nsjconf_t* nsjconf) {
|
|
|
|
if (nsjconf->clone_newns) {
|
|
|
|
if (!initCloneNs(nsjconf)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!initNoCloneNs(nsjconf)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (chdir(nsjconf->cwd.c_str()) == -1) {
|
2022-08-10 21:23:53 +08:00
|
|
|
PLOG_E("chdir(%s)", QC(nsjconf->cwd));
|
2019-08-25 17:16:12 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-05-13 04:25:48 +08:00
|
|
|
/*
|
|
|
|
* With mode MODE_STANDALONE_EXECVE it's required to mount /proc inside a new process,
|
2016-10-18 04:53:31 +08:00
|
|
|
* as the current process is still in the original PID namespace (man pid_namespaces)
|
2016-05-13 04:25:48 +08:00
|
|
|
*/
|
2018-02-10 22:50:12 +08:00
|
|
|
bool initNs(nsjconf_t* nsjconf) {
|
2016-05-13 04:25:48 +08:00
|
|
|
if (nsjconf->mode != MODE_STANDALONE_EXECVE) {
|
2018-02-10 01:26:16 +08:00
|
|
|
return initNsInternal(nsjconf);
|
2016-05-13 04:25:48 +08:00
|
|
|
}
|
|
|
|
|
2021-05-18 20:38:01 +08:00
|
|
|
pid_t pid = subproc::cloneProc(CLONE_FS, SIGCHLD);
|
2016-05-13 04:25:48 +08:00
|
|
|
if (pid == -1) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pid == 0) {
|
2018-02-10 01:26:16 +08:00
|
|
|
exit(initNsInternal(nsjconf) ? 0 : 0xff);
|
2016-05-13 04:25:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int status;
|
2017-10-09 05:00:45 +08:00
|
|
|
while (wait4(pid, &status, 0, NULL) != pid)
|
|
|
|
;
|
2016-05-13 04:25:48 +08:00
|
|
|
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
2017-05-28 06:15:53 +08:00
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
static bool addMountPt(mount_t* mnt, const std::string& src, const std::string& dst,
|
|
|
|
const std::string& fstype, const std::string& options, uintptr_t flags, isDir_t is_dir,
|
|
|
|
bool is_mandatory, const std::string& src_env, const std::string& dst_env,
|
|
|
|
const std::string& src_content, bool is_symlink) {
|
|
|
|
if (!src_env.empty()) {
|
|
|
|
const char* e = getenv(src_env.c_str());
|
2023-10-20 20:15:36 +08:00
|
|
|
if (e == nullptr) {
|
2022-08-10 21:23:53 +08:00
|
|
|
LOG_W("No such envar:%s", QC(src_env));
|
2017-05-28 06:15:53 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-02-10 21:38:01 +08:00
|
|
|
mnt->src = e;
|
|
|
|
}
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->src.append(src);
|
2017-05-28 06:15:53 +08:00
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
if (!dst_env.empty()) {
|
|
|
|
const char* e = getenv(dst_env.c_str());
|
2023-10-20 20:15:36 +08:00
|
|
|
if (e == nullptr) {
|
2022-08-10 21:23:53 +08:00
|
|
|
LOG_W("No such envar:%s", QC(dst_env));
|
2017-05-28 06:15:53 +08:00
|
|
|
return false;
|
|
|
|
}
|
2018-02-10 21:38:01 +08:00
|
|
|
mnt->dst = e;
|
|
|
|
}
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->dst.append(dst);
|
2017-05-28 06:15:53 +08:00
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->fs_type = fstype;
|
|
|
|
mnt->options = options;
|
2018-02-10 21:38:01 +08:00
|
|
|
mnt->flags = flags;
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->is_symlink = is_symlink;
|
|
|
|
mnt->is_mandatory = is_mandatory;
|
2018-02-10 21:38:01 +08:00
|
|
|
mnt->mounted = false;
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->src_content = src_content;
|
2017-05-28 06:15:53 +08:00
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
switch (is_dir) {
|
2017-10-07 06:18:21 +08:00
|
|
|
case NS_DIR_YES:
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->is_dir = true;
|
2017-10-07 06:18:21 +08:00
|
|
|
break;
|
|
|
|
case NS_DIR_NO:
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->is_dir = false;
|
2017-10-07 06:18:21 +08:00
|
|
|
break;
|
2017-10-09 05:00:45 +08:00
|
|
|
case NS_DIR_MAYBE: {
|
2018-02-12 06:44:43 +08:00
|
|
|
if (!src_content.empty()) {
|
|
|
|
mnt->is_dir = false;
|
2018-02-10 21:38:01 +08:00
|
|
|
} else if (mnt->src.empty()) {
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->is_dir = true;
|
2018-02-10 21:38:01 +08:00
|
|
|
} else if (mnt->flags & MS_BIND) {
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->is_dir = mnt::isDir(mnt->src.c_str());
|
2017-10-09 05:00:45 +08:00
|
|
|
} else {
|
2018-02-12 06:44:43 +08:00
|
|
|
mnt->is_dir = true;
|
2017-05-28 06:15:53 +08:00
|
|
|
}
|
2017-10-09 05:00:45 +08:00
|
|
|
} break;
|
2017-10-07 06:18:21 +08:00
|
|
|
default:
|
2018-02-12 06:44:43 +08:00
|
|
|
LOG_E("Unknown is_dir value: %d", is_dir);
|
|
|
|
return false;
|
2017-05-28 06:15:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2017-05-29 22:52:24 +08:00
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
bool addMountPtHead(nsjconf_t* nsjconf, const std::string& src, const std::string& dst,
|
|
|
|
const std::string& fstype, const std::string& options, uintptr_t flags, isDir_t is_dir,
|
|
|
|
bool is_mandatory, const std::string& src_env, const std::string& dst_env,
|
|
|
|
const std::string& src_content, bool is_symlink) {
|
2018-02-10 22:50:12 +08:00
|
|
|
mount_t mnt;
|
2018-02-12 06:44:43 +08:00
|
|
|
if (!addMountPt(&mnt, src, dst, fstype, options, flags, is_dir, is_mandatory, src_env,
|
|
|
|
dst_env, src_content, is_symlink)) {
|
2018-02-10 21:38:01 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
nsjconf->mountpts.insert(nsjconf->mountpts.begin(), mnt);
|
|
|
|
return true;
|
2017-10-08 05:32:25 +08:00
|
|
|
}
|
|
|
|
|
2018-02-12 06:44:43 +08:00
|
|
|
bool addMountPtTail(nsjconf_t* nsjconf, const std::string& src, const std::string& dst,
|
|
|
|
const std::string& fstype, const std::string& options, uintptr_t flags, isDir_t is_dir,
|
|
|
|
bool is_mandatory, const std::string& src_env, const std::string& dst_env,
|
|
|
|
const std::string& src_content, bool is_symlink) {
|
2018-02-10 22:50:12 +08:00
|
|
|
mount_t mnt;
|
2018-02-12 06:44:43 +08:00
|
|
|
if (!addMountPt(&mnt, src, dst, fstype, options, flags, is_dir, is_mandatory, src_env,
|
|
|
|
dst_env, src_content, is_symlink)) {
|
2018-02-10 21:38:01 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
nsjconf->mountpts.push_back(mnt);
|
|
|
|
return true;
|
2017-10-08 05:32:25 +08:00
|
|
|
}
|
|
|
|
|
2018-02-11 07:24:43 +08:00
|
|
|
const std::string describeMountPt(const mount_t& mpt) {
|
2018-02-20 23:03:32 +08:00
|
|
|
std::string descr;
|
|
|
|
|
2022-08-10 21:23:53 +08:00
|
|
|
descr.append(mpt.src.empty() ? "" : QC(mpt.src))
|
2022-08-09 18:05:33 +08:00
|
|
|
.append(mpt.src.empty() ? "" : " -> ")
|
2022-08-10 21:23:53 +08:00
|
|
|
.append(QC(mpt.dst))
|
2022-08-09 18:05:33 +08:00
|
|
|
.append(" flags:")
|
2018-02-20 23:03:32 +08:00
|
|
|
.append(flagsToStr(mpt.flags))
|
2022-08-09 18:05:33 +08:00
|
|
|
.append(" type:")
|
2022-08-10 21:23:53 +08:00
|
|
|
.append(QC(mpt.fs_type))
|
2022-08-09 18:05:33 +08:00
|
|
|
.append(" options:")
|
2022-08-10 21:23:53 +08:00
|
|
|
.append(QC(mpt.options));
|
2018-02-21 10:29:26 +08:00
|
|
|
|
2018-02-20 23:03:32 +08:00
|
|
|
if (mpt.is_dir) {
|
2019-09-01 04:08:02 +08:00
|
|
|
descr.append(" dir:true");
|
2018-02-20 23:03:32 +08:00
|
|
|
} else {
|
2019-09-01 04:08:02 +08:00
|
|
|
descr.append(" dir:false");
|
2018-02-20 23:03:32 +08:00
|
|
|
}
|
2018-02-12 06:44:43 +08:00
|
|
|
if (!mpt.is_mandatory) {
|
2018-02-20 23:03:32 +08:00
|
|
|
descr.append(" mandatory:false");
|
2017-05-29 22:52:24 +08:00
|
|
|
}
|
2018-02-10 21:38:01 +08:00
|
|
|
if (!mpt.src_content.empty()) {
|
2018-02-20 23:03:32 +08:00
|
|
|
descr.append(" src_content_len:").append(std::to_string(mpt.src_content.length()));
|
2017-05-29 22:52:24 +08:00
|
|
|
}
|
2018-02-12 06:44:43 +08:00
|
|
|
if (mpt.is_symlink) {
|
2018-02-20 23:03:32 +08:00
|
|
|
descr.append(" symlink:true");
|
2017-06-29 06:32:20 +08:00
|
|
|
}
|
2017-05-29 22:52:24 +08:00
|
|
|
|
2018-02-20 23:03:32 +08:00
|
|
|
return descr;
|
2017-05-29 22:52:24 +08:00
|
|
|
}
|
2018-02-10 01:26:16 +08:00
|
|
|
|
|
|
|
} // namespace mnt
|