nsjail/mount.c

337 lines
8.7 KiB
C
Raw Normal View History

2016-03-03 22:37:04 +08:00
/*
nsjail - CLONE_NEWNS routines
-----------------------------------------
Copyright 2014 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#include "mount.h"
#include <errno.h>
#include <fcntl.h>
2016-07-21 21:48:47 +08:00
#include <linux/sched.h>
#include <sched.h>
2016-03-03 22:37:04 +08:00
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <sys/syscall.h>
#include <sys/types.h>
#include <sys/wait.h>
2016-03-03 22:37:04 +08:00
#include <unistd.h>
#include "log.h"
#include "subproc.h"
2016-08-19 00:59:06 +08:00
#include "util.h"
2016-03-03 22:37:04 +08:00
2017-05-22 01:44:54 +08:00
#define VALSTR_STRUCT(x) { x, #x }
2017-05-21 23:37:18 +08:00
#if !defined(MS_LAZYTIME)
#define MS_LAZYTIME (1<<25)
#endif /* if !defined(MS_LAZYTIME) */
const char *mountFlagsToStr(uintptr_t flags)
2017-05-21 23:37:18 +08:00
{
static __thread char mountFlagsStr[1024];
mountFlagsStr[0] = '\0';
2017-05-21 23:37:18 +08:00
/* *INDENT-OFF* */
static struct {
const uintptr_t flag;
const char* const name;
2017-05-21 23:37:18 +08:00
} const mountFlags[] = {
2017-05-22 01:44:54 +08:00
VALSTR_STRUCT(MS_RDONLY),
VALSTR_STRUCT(MS_NOSUID),
VALSTR_STRUCT(MS_NODEV),
VALSTR_STRUCT(MS_NOEXEC),
VALSTR_STRUCT(MS_SYNCHRONOUS),
VALSTR_STRUCT(MS_REMOUNT),
VALSTR_STRUCT(MS_MANDLOCK),
VALSTR_STRUCT(MS_DIRSYNC),
VALSTR_STRUCT(MS_NOATIME),
VALSTR_STRUCT(MS_NODIRATIME),
VALSTR_STRUCT(MS_BIND),
VALSTR_STRUCT(MS_MOVE),
VALSTR_STRUCT(MS_REC),
VALSTR_STRUCT(MS_SILENT),
VALSTR_STRUCT(MS_POSIXACL),
VALSTR_STRUCT(MS_UNBINDABLE),
VALSTR_STRUCT(MS_PRIVATE),
VALSTR_STRUCT(MS_SLAVE),
VALSTR_STRUCT(MS_SHARED),
VALSTR_STRUCT(MS_RELATIME),
VALSTR_STRUCT(MS_KERNMOUNT),
VALSTR_STRUCT(MS_I_VERSION),
VALSTR_STRUCT(MS_STRICTATIME),
VALSTR_STRUCT(MS_LAZYTIME),
2017-05-21 23:37:18 +08:00
};
/* *INDENT-ON* */
for (size_t i = 0; i < ARRAYSIZE(mountFlags); i++) {
if (flags & mountFlags[i].flag) {
utilSSnPrintf(mountFlagsStr, sizeof(mountFlagsStr), "%s|",
mountFlags[i].name);
2017-05-21 23:37:18 +08:00
}
}
uintptr_t knownFlagMask = 0U;
for (size_t i = 0; i < ARRAYSIZE(mountFlags); i++) {
knownFlagMask |= mountFlags[i].flag;
}
utilSSnPrintf(mountFlagsStr, sizeof(mountFlagsStr), "%#tx", flags & ~(knownFlagMask));
return mountFlagsStr;
2017-05-21 23:37:18 +08:00
}
bool mountIsDir(const char *path)
2016-03-03 22:37:04 +08:00
{
/*
2016-08-17 03:12:23 +08:00
* If the source dir is NULL, we assume it's a dir (for /proc and tmpfs)
*/
2016-08-17 03:12:23 +08:00
if (path == NULL) {
return true;
2016-03-03 22:37:04 +08:00
}
struct stat st;
if (stat(path, &st) == -1) {
PLOG_E("stat('%s')", path);
return false;
}
if (S_ISDIR(st.st_mode)) {
return true;
}
return false;
}
static bool mountMount(struct mounts_t *mpt, const char *oldroot, const char *dst)
2016-03-03 22:37:04 +08:00
{
LOG_D("Mounting '%s' on '%s' (fstype:'%s', flags:%s, options:'%s', is_dir:%s)",
mpt->src ? mpt->src : "[NULL]", dst, mpt->fs_type ? mpt->fs_type : "[NULL]",
mountFlagsToStr(mpt->flags), mpt->options ? mpt->options : "[NULL]",
mpt->isDir ? "True" : "False");
2016-03-03 22:37:04 +08:00
char srcpath[PATH_MAX];
if (mpt->src != NULL && strlen(mpt->src) > 0) {
2016-08-17 03:12:23 +08:00
snprintf(srcpath, sizeof(srcpath), "%s/%s", oldroot, mpt->src);
} else {
snprintf(srcpath, sizeof(srcpath), "none");
2016-08-17 01:54:50 +08:00
}
if (mpt->isDir == true) {
2016-08-19 00:59:06 +08:00
if (utilCreateDirRecursively(dst) == false) {
LOG_W("Couldn't create upper directories for '%s'", dst);
return false;
}
2016-03-03 22:37:04 +08:00
if (mkdir(dst, 0711) == -1 && errno != EEXIST) {
PLOG_W("mkdir('%s')", dst);
}
} else {
2016-08-19 00:59:06 +08:00
if (utilCreateDirRecursively(dst) == false) {
LOG_W("Couldn't create upper directories for '%s'", dst);
return false;
}
int fd = TEMP_FAILURE_RETRY(open(dst, O_CREAT | O_RDONLY | O_CLOEXEC, 0644));
2016-03-03 22:37:04 +08:00
if (fd >= 0) {
2016-07-29 21:38:22 +08:00
close(fd);
2016-03-03 22:37:04 +08:00
} else {
PLOG_W("open('%s', O_CREAT|O_RDONLY|O_CLOEXEC, 0700)", dst);
2016-03-03 22:37:04 +08:00
}
}
/*
* Initially mount it as RW, it will be remounted later on if needed
*/
unsigned long flags = mpt->flags & ~(MS_RDONLY);
if (mount(srcpath, dst, mpt->fs_type, flags, mpt->options) == -1) {
if (mpt->mandatory == false) {
PLOG_D
("mount(src:'%s', dst:'%s', fstype:'%s', flags:'%s', mandatory:%s) failed. "
"Skipping this mount as it's non-mandatory", srcpath, dst,
mpt->fs_type ? mpt->fs_type : "[NULL]", mountFlagsToStr(mpt->flags),
mpt->mandatory ? "true" : "false");
} else if (errno == EACCES) {
PLOG_E
("mount(src:'%s', dst:'%s', fstype:'%s', flags:'%s', mandatory:%s) failed. "
"Try fixing this problem by applying 'chmod o+x' to the '%s' directory and "
"its ancestors", srcpath, dst, mpt->fs_type ? mpt->fs_type : "[NULL]",
mountFlagsToStr(mpt->flags), mpt->src,
mpt->mandatory ? "true" : "false");
2016-03-03 22:37:04 +08:00
} else {
PLOG_E
("mount(src:'%s', dst:'%s', fstype:'%s', flags:'%s' mandatory:%s) failed",
srcpath, dst, mpt->fs_type ? mpt->fs_type : "[NULL]",
mountFlagsToStr(mpt->flags), mpt->mandatory ? "true" : "false");
2017-05-27 21:17:11 +08:00
}
if (mpt->mandatory) {
return false;
2016-03-03 22:37:04 +08:00
}
}
return true;
}
static bool mountRemountRO(struct mounts_t *mpt)
{
2017-05-22 09:34:54 +08:00
if (!(mpt->flags & MS_RDONLY)) {
return true;
}
2016-03-03 22:37:04 +08:00
struct statvfs vfs;
if (TEMP_FAILURE_RETRY(statvfs(mpt->dst, &vfs)) == -1) {
2016-03-03 22:37:04 +08:00
PLOG_E("statvfs('%s')", mpt->dst);
return false;
}
2017-05-22 09:34:54 +08:00
/*
* It's fine to use 'flags | vfs.f_flag' here as per
* /usr/include/x86_64-linux-gnu/bits/statvfs.h: 'Definitions for
* the flag in `f_flag'. These definitions should be
* kept in sync with the definitions in <sys/mount.h>'
*/
unsigned long new_flags = MS_REMOUNT | MS_RDONLY | vfs.f_flag;
2016-03-03 22:37:04 +08:00
2017-05-22 09:34:54 +08:00
LOG_D("Re-mounting R/O '%s' (old_flags:%s, new_flags:%s)", mpt->dst,
mountFlagsToStr(vfs.f_flag), mountFlagsToStr(new_flags));
2017-05-22 01:44:54 +08:00
2017-05-22 09:34:54 +08:00
if (mount(mpt->dst, mpt->dst, NULL, new_flags, 0) == -1) {
2017-05-27 21:17:11 +08:00
PLOG_W("mount('%s', flags:%s)", mpt->dst, mountFlagsToStr(new_flags));
if (mpt->mandatory) {
return false;
}
2016-03-03 22:37:04 +08:00
}
2017-05-22 09:34:54 +08:00
2016-03-03 22:37:04 +08:00
return true;
}
static bool mountInitNsInternal(struct nsjconf_t *nsjconf)
2016-03-03 22:37:04 +08:00
{
if (nsjconf->clone_newns == false) {
if (chroot(nsjconf->chroot) == -1) {
PLOG_E("chroot('%s')", nsjconf->chroot) {
return false;
}
}
if (chdir("/") == -1) {
PLOG_E("chdir('/')");
return false;
}
return true;
}
const char *const destdir = "/tmp";
if (mount(NULL, destdir, "tmpfs", 0, NULL) == -1) {
2017-02-10 09:25:50 +08:00
PLOG_E("mount('%s', 'tmpfs')", destdir);
2016-03-03 22:37:04 +08:00
return false;
}
2016-08-17 01:54:50 +08:00
char oldrootdir[PATH_MAX];
2016-08-17 01:59:51 +08:00
snprintf(oldrootdir, sizeof(oldrootdir), "%s/old_root", destdir);
2016-08-17 01:54:50 +08:00
if (mkdir(oldrootdir, 0755) == -1) {
PLOG_E("mkdir('%s')", oldrootdir);
return false;
}
if (syscall(__NR_pivot_root, destdir, oldrootdir) == -1) {
PLOG_E("pivot_root('%s', '%s')", destdir, oldrootdir);
return false;
}
if (chdir("/") == -1) {
2016-08-17 01:59:51 +08:00
PLOG_E("chdir('/')");
2016-08-17 01:54:50 +08:00
return false;
}
const char *newrootdir;
if (nsjconf->pivot_root_only == false) {
newrootdir = "/new_root";
if (mkdir(newrootdir, 0755) == -1) {
PLOG_E("mkdir('%s')", newrootdir);
return false;
}
} else {
newrootdir = "/";
2016-03-03 22:37:04 +08:00
}
struct mounts_t *p;
TAILQ_FOREACH(p, &nsjconf->mountpts, pointers) {
2017-05-22 09:34:54 +08:00
/*
* The intention behind pivot_root_only is to allow creating
* nested usernamespaces. If we bind mount over /, the kernel
* will see the process as chrooted and deny CLONE_NEWUSER.
*/
if (nsjconf->pivot_root_only && strcmp(p->dst, "/") == 0) {
continue;
}
2016-03-03 22:37:04 +08:00
char dst[PATH_MAX];
snprintf(dst, sizeof(dst), "%s/%s", newrootdir, p->dst);
if (mountMount(p, "/old_root", dst) == false) {
2016-03-03 22:37:04 +08:00
return false;
}
}
2016-08-17 01:59:51 +08:00
if (umount2("/old_root", MNT_DETACH) == -1) {
PLOG_E("umount2('/old_root', MNT_DETACH)");
2016-03-03 22:37:04 +08:00
return false;
}
if (nsjconf->pivot_root_only == false) {
if (chroot(newrootdir) == -1) {
PLOG_E("chroot('%s')", newrootdir);
return false;
}
2016-09-25 20:48:39 +08:00
} else {
if (rmdir("/old_root") == -1) {
PLOG_E("rmdir('/old_root')");
return false;
}
2016-03-03 22:37:04 +08:00
}
if (chdir(nsjconf->cwd) == -1) {
PLOG_E("chdir('%s')", nsjconf->cwd);
return false;
}
TAILQ_FOREACH(p, &nsjconf->mountpts, pointers) {
if (mountRemountRO(p) == false) {
return false;
}
}
return true;
}
/*
* With mode MODE_STANDALONE_EXECVE it's required to mount /proc inside a new process,
2016-10-18 04:53:31 +08:00
* as the current process is still in the original PID namespace (man pid_namespaces)
*/
bool mountInitNs(struct nsjconf_t * nsjconf)
{
if (nsjconf->mode != MODE_STANDALONE_EXECVE) {
return mountInitNsInternal(nsjconf);
}
pid_t pid = subprocClone(CLONE_FS | SIGCHLD);
if (pid == -1) {
return false;
}
if (pid == 0) {
exit(mountInitNsInternal(nsjconf) ? 0 : 1);
}
int status;
while (wait4(pid, &status, 0, NULL) != pid) ;
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
return true;
}
return false;
}