diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..4c3ff73 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kafel"] + path = kafel + url = https://github.com/google/kafel.git diff --git a/Makefile b/Makefile index ccde2a6..fcfcd5b 100644 --- a/Makefile +++ b/Makefile @@ -34,6 +34,11 @@ ifdef DEBUG CFLAGS += -g -ggdb -gdwarf-4 endif +ifneq ("$(wildcard kafel/include/kafel.h)","") + CFLAGS += -I./kafel/include/ -DUSE_KAFEL + LIBS += kafel/libkafel.a +endif + ifeq ("$(wildcard /usr/include/libnl3/netlink/route/link/macvlan.h)","/usr/include/libnl3/netlink/route/link/macvlan.h") CFLAGS += -DNSJAIL_NL3_WITH_MACVLAN -I/usr/include/libnl3 LDFLAGS += -lnl-3 -lnl-route-3 @@ -44,11 +49,19 @@ endif all: $(BIN) -$(BIN): $(OBJS) - $(CC) -o $(BIN) $(OBJS) $(LDFLAGS) +$(BIN): $(OBJS) $(LIBS) + $(CC) -o $(BIN) $(OBJS) $(LIBS) $(LDFLAGS) + +ifneq ("$(wildcard kafel/Makefile)","") +kafel/libkafel.a: + $(MAKE) -C kafel +endif clean: $(RM) core Makefile.bak $(OBJS) $(BIN) +ifneq ("$(wildcard kafel/Makefile)","") + $(MAKE) -C kafel clean +endif depend: makedepend -Y. -- -- $(SRCS) diff --git a/cmdline.c b/cmdline.c index e59dbe3..a1b2931 100644 --- a/cmdline.c +++ b/cmdline.c @@ -41,6 +41,10 @@ #include #include +#if USE_KAFEL +#include +#endif + #include "log.h" #include "util.h" @@ -294,6 +298,7 @@ bool cmdlineParse(int argc, char *argv[], struct nsjconf_t * nsjconf) .cgroup_mem_mount = "/sys/fs/cgroup/memory", .cgroup_mem_parent = "NSJAIL", .cgroup_mem_max = (size_t)0, + .seccomp_fprog = {0, NULL}, .iface_no_lo = false, .iface = NULL, .iface_vs_ip = "0.0.0.0", @@ -375,6 +380,9 @@ bool cmdlineParse(int argc, char *argv[], struct nsjconf_t * nsjconf) {{"tmpfsmount", required_argument, NULL, 'T'}, "List of mountpoints to be mounted as RW/tmpfs inside the container. Can be specified multiple times. Supports 'dest' syntax"}, {{"tmpfs_size", required_argument, NULL, 0x0602}, "Number of bytes to allocate for tmpfsmounts (default: 4194304)"}, {{"disable_proc", no_argument, NULL, 0x0603}, "Disable mounting /proc in the jail"}, +#if USE_KAFEL + {{"seccomp_policy", required_argument, NULL, 0x0901}, "Seccomp policy filename"}, +#endif {{"cgroup_mem_max", required_argument, NULL, 0x0801}, "Maximum number of bytes to use in the group (default: '0' - disabled)"}, {{"cgroup_mem_mount", required_argument, NULL, 0x0802}, "Location of memory cgroup FS (default: '/sys/fs/cgroup/memory')"}, {{"cgroup_mem_parent", required_argument, NULL, 0x0803}, "Which pre-existing memory cgroup to use as a parent (default: 'NSJAIL')"}, @@ -620,6 +628,28 @@ bool cmdlineParse(int argc, char *argv[], struct nsjconf_t * nsjconf) case 0x803: nsjconf->cgroup_mem_parent = optarg; break; +#if USE_KAFEL + case 0x901: + { + FILE *f = fopen(optarg, "r"); + if (f == NULL) { + LOG_E("Could not open policy file `%s'", optarg); + return false; + } + kafel_ctxt_t ctxt = kafel_ctxt_create(); + kafel_set_input_file(ctxt, f); + if (kafel_compile(ctxt, &nsjconf->seccomp_fprog) != 0) { + fclose(f); + LOG_E("Could not compile policy: %s", + kafel_error_msg(ctxt)); + kafel_ctxt_destroy(&ctxt); + return false; + } + fclose(f); + kafel_ctxt_destroy(&ctxt); + } + break; +#endif default: cmdlineUsage(argv[0], custom_opts); return false; diff --git a/common.h b/common.h index 46cf9b5..3f188d9 100644 --- a/common.h +++ b/common.h @@ -23,6 +23,7 @@ #define NS_COMMON_H #include +#include #include #include #include @@ -135,6 +136,7 @@ struct nsjconf_t { const char *cgroup_mem_mount; const char *cgroup_mem_parent; size_t cgroup_mem_max; + struct sock_fprog seccomp_fprog; TAILQ_HEAD(envlist, charptr_t) envs; TAILQ_HEAD(pidslist, pids_t) pids; TAILQ_HEAD(mountptslist, mounts_t) mountpts; diff --git a/kafel b/kafel new file mode 160000 index 0000000..f7b4868 --- /dev/null +++ b/kafel @@ -0,0 +1 @@ +Subproject commit f7b486817e9a738c0705ecabc5ba6f8204a32685 diff --git a/sandbox.c b/sandbox.c index 998dfec..16b27f6 100644 --- a/sandbox.c +++ b/sandbox.c @@ -37,54 +37,59 @@ * A demo policy, it disallows syslog and ptrace syscalls, both in 32 and 64 * modes */ -static bool sandboxPrepareAndCommit(void) +static bool sandboxPrepareAndCommit(struct nsjconf_t *nsjconf) { #if defined(__x86_64__) || defined(__i386__) - struct bpf_labels l = {.count = 0 }; - struct sock_filter filter[] = { - LOAD_ARCH, - JEQ32(AUDIT_ARCH_I386, JUMP(&l, label_i386)), - JEQ32(AUDIT_ARCH_X86_64, JUMP(&l, label_x86_64)), + if (nsjconf->seccomp_fprog.filter == NULL) { + struct bpf_labels l = {.count = 0 }; + struct sock_filter filter[] = { + LOAD_ARCH, + JEQ32(AUDIT_ARCH_I386, JUMP(&l, label_i386)), + JEQ32(AUDIT_ARCH_X86_64, JUMP(&l, label_x86_64)), - /* I386 */ - LABEL(&l, label_i386), - LOAD_SYSCALL_NR, + /* I386 */ + LABEL(&l, label_i386), + LOAD_SYSCALL_NR, #define __NR_syslog_32 103 #define __NR_uselib_32 86 - JEQ32(__NR_syslog_32, ERRNO(ENOENT)), - JEQ32(__NR_uselib_32, KILL), - ALLOW, + JEQ32(__NR_syslog_32, ERRNO(ENOENT)), + JEQ32(__NR_uselib_32, KILL), + ALLOW, - /* X86_64 */ - LABEL(&l, label_x86_64), - LOAD_SYSCALL_NR, + /* X86_64 */ + LABEL(&l, label_x86_64), + LOAD_SYSCALL_NR, #define __NR_syslog_64 103 #define __NR_uselib_64 134 - JEQ32(__NR_syslog_64, ERRNO(ENOENT)), - JEQ32(__NR_uselib_64, KILL), - ALLOW, - }; - - struct sock_fprog prog = { - .filter = filter, - .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])), - }; - if (bpf_resolve_jumps(&l, filter, sizeof(filter) / sizeof(*filter)) != 0) { - LOG_W("bpf_resolve_jumps() failed"); - return false; + JEQ32(__NR_syslog_64, ERRNO(ENOENT)), + JEQ32(__NR_uselib_64, KILL), + ALLOW, + }; + /* *INDENT-OFF* */ + nsjconf->seccomp_fprog = (struct sock_fprog) { + .filter = filter, + .len = (unsigned short)(sizeof(filter) / sizeof(filter[0])), + }; + /* *INDENT-ON* */ + if (bpf_resolve_jumps(&l, filter, sizeof(filter) / sizeof(*filter)) != 0) { + LOG_W("bpf_resolve_jumps() failed"); + return false; + } } +#endif /* defined(__x86_64__) || defined(__i386__) */ + if (nsjconf->seccomp_fprog.filter != NULL) { #ifndef PR_SET_NO_NEW_PRIVS #define PR_SET_NO_NEW_PRIVS 38 #endif /* PR_SET_NO_NEW_PRIVS */ - if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { - PLOG_W("prctl(PR_SET_NO_NEW_PRIVS, 1) failed"); - return false; + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + PLOG_W("prctl(PR_SET_NO_NEW_PRIVS, 1) failed"); + return false; + } + if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &nsjconf->seccomp_fprog, 0, 0)) { + PLOG_W("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER) failed"); + return false; + } } - if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0)) { - PLOG_W("prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER) failed"); - return false; - } -#endif /* defined(__x86_64__) || defined(__i386__) */ return true; } @@ -93,7 +98,7 @@ bool sandboxApply(struct nsjconf_t * nsjconf) if (nsjconf->apply_sandbox == false) { return true; } - if (sandboxPrepareAndCommit() == false) { + if (sandboxPrepareAndCommit(nsjconf) == false) { return false; } return true;