[5/7] seccomp: Add engine to restore per-thread seccomp chains

Submitted by Cyrill Gorcunov on April 6, 2018, 7:17 p.m.

Details

Message ID 20180406191708.11325-6-gorcunov@gmail.com
State Rejected
Series "seccomp, v2: Add support for per-thread c/r"
Headers show

Commit Message

Cyrill Gorcunov April 6, 2018, 7:17 p.m.
At now we pretend that all threads are sharing seccomp chains
and at checkpoint moment we test seccomp modes to make sure
if this assumption is valid refusing to dump otherwise.

Still the kernel tacks seccomp filter chains per each thread
and now we've faced applications (such as java) where per-thread
chains are actively used. Thus we need to bring support of handling
filters via per-thread basis.

In this a bit intrusive patch the restore engine is lifted up
to treat each thread separately. Here what is done:

 - Image core file is modified to keep seccomp filters
   inside thread_core_entry. For backward compatibility
   former seccomp_mode and seccomp_filter members in
   task_core_entry are renamed to have old_ prefix and
   on restore we test if we're dealing with old images.

   Since per-thread dump is not yet implemeneted the
   dumping procedure continue operating with old_ members.

 - In pie restorer code memory containing filters are addressed
   from inside thread_restore_args structure which now
   contains seccomp mode itself and chain attributes
   (number of filters and etc).

   Reading of per-thread data is done in seccomp_prepare_threads
   helper -- we take one pstree_item and walks over every thread
   inside to allocate pie memory and pin data there.

   Because of PIE specific, before jumping into pie code
   we have to relocate this memory into new place and
   for this seccomp_rst_reloc is served.

   In restorer itself we check if thread_restore_args provides
   us enabled seccomp mode (strict or filter passed) and call
   for restore_seccomp_filter if needed.

 - To unify names we start using seccomp_ prefix for all related
   stuff involved into this change (prepare_seccomp_filters renamed
   to seccomp_read_image because it only reads image and nothing
   more, image handler is renamed to seccomp_img_entry instead
   of too short 'se'.

With this change we're now allowed to start collecting and
dumping seccomp filters per each thread, which will be
done in next patch.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
---
 criu/cr-dump.c          |   8 +--
 criu/cr-restore.c       |  33 ++++++++---
 criu/include/restorer.h |  16 +++++-
 criu/include/rst_info.h |   5 ++
 criu/include/seccomp.h  |  10 +++-
 criu/pie/restorer.c     | 119 +++++++++++++++++++++++++-------------
 criu/seccomp.c          | 149 ++++++++++++++++++++++++++++++++++--------------
 images/core.proto       |   8 ++-
 8 files changed, 245 insertions(+), 103 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-dump.c b/criu/cr-dump.c
index 094d016489ea..4960a7b279d2 100644
--- a/criu/cr-dump.c
+++ b/criu/cr-dump.c
@@ -762,12 +762,12 @@  static int dump_task_core_all(struct parasite_ctl *ctl,
 	creds = dmpi(item)->pi_creds;
 	if (creds->s.seccomp_mode != SECCOMP_MODE_DISABLED) {
 		pr_info("got seccomp mode %d for %d\n", creds->s.seccomp_mode, vpid(item));
-		core->tc->has_seccomp_mode = true;
-		core->tc->seccomp_mode = creds->s.seccomp_mode;
+		core->tc->has_old_seccomp_mode = true;
+		core->tc->old_seccomp_mode = creds->s.seccomp_mode;
 
 		if (creds->s.seccomp_mode == SECCOMP_MODE_FILTER) {
-			core->tc->has_seccomp_filter = true;
-			core->tc->seccomp_filter = creds->last_filter;
+			core->tc->has_old_seccomp_filter = true;
+			core->tc->old_seccomp_filter = creds->last_filter;
 		}
 	}
 
diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index db913b2dae2e..1404a3ee87c5 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -331,7 +331,7 @@  static int root_prepare_shared(void)
 	if (prepare_remaps())
 		return -1;
 
-	if (prepare_seccomp_filters())
+	if (seccomp_read_image())
 		return -1;
 
 	if (collect_images(cinfos, ARRAY_SIZE(cinfos)))
@@ -1031,7 +1031,7 @@  static int restore_one_alive_task(int pid, CoreEntry *core)
 	if (prepare_timerfds(ta))
 		return -1;
 
-	if (seccomp_filters_get_rst_pos(core, ta) < 0)
+	if (seccomp_prepare_threads(current, ta) < 0)
 		return -1;
 
 	if (prepare_itimers(pid, ta, core) < 0)
@@ -1236,6 +1236,21 @@  static int check_core(CoreEntry *core, struct pstree_item *me)
 			pr_err("Core info data missed for non-zombie\n");
 			goto out;
 		}
+
+		/*
+		 * Seccomp are moved to per-thread origin,
+		 * so for old images we need to move per-task
+		 * data into proper place.
+		 */
+		if (core->tc->has_old_seccomp_mode) {
+			core->thread_core->has_seccomp_mode = core->tc->has_old_seccomp_mode;
+			core->thread_core->seccomp_mode = core->tc->old_seccomp_mode;
+		}
+		if (core->tc->has_old_seccomp_filter) {
+			core->thread_core->has_seccomp_filter = core->tc->has_old_seccomp_filter;
+			core->thread_core->seccomp_filter = core->tc->old_seccomp_filter;
+			rsti(me)->has_old_seccomp_filter = true;
+		}
 	}
 
 	ret = 0;
@@ -1505,13 +1520,16 @@  static inline int fork_with_pid(struct pstree_item *item)
 		item->pid->state = ca.core->tc->task_state;
 		rsti(item)->cg_set = ca.core->tc->cg_set;
 
-		rsti(item)->has_seccomp = ca.core->tc->seccomp_mode != SECCOMP_MODE_DISABLED;
-
 		if (item->pid->state != TASK_DEAD && !task_alive(item)) {
 			pr_err("Unknown task state %d\n", item->pid->state);
 			return -1;
 		}
 
+		if (item->pid->state != TASK_DEAD)
+			rsti(item)->has_seccomp = ca.core->thread_core->seccomp_mode != SECCOMP_MODE_DISABLED;
+		else
+			rsti(item)->has_seccomp = false;
+
 		if (unlikely(item == root_item))
 			maybe_clone_parent(item, &ca);
 	} else {
@@ -3654,12 +3672,8 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 	RST_MEM_FIXUP_PPTR(task_args->rlims);
 	RST_MEM_FIXUP_PPTR(task_args->helpers);
 	RST_MEM_FIXUP_PPTR(task_args->zombies);
-	RST_MEM_FIXUP_PPTR(task_args->seccomp_filters);
 	RST_MEM_FIXUP_PPTR(task_args->vma_ios);
 
-	if (core->tc->has_seccomp_mode)
-		task_args->seccomp_mode = core->tc->seccomp_mode;
-
 	task_args->compatible_mode = core_is_compat(core);
 
 	if (opts.check_only)
@@ -3749,6 +3763,9 @@  static int sigreturn_restore(pid_t pid, struct task_restore_args *task_args, uns
 		if (ret)
 			goto err;
 
+		seccomp_rst_reloc(&thread_args[i]);
+		thread_args[i].seccomp_force_tsync = rsti(current)->has_old_seccomp_filter;
+
 		thread_args[i].mz = mz + i;
 		sigframe = (struct rt_sigframe *)&mz[i].rt_sigframe;
 
diff --git a/criu/include/restorer.h b/criu/include/restorer.h
index 15307d9c0701..b75d6687ba2d 100644
--- a/criu/include/restorer.h
+++ b/criu/include/restorer.h
@@ -4,6 +4,7 @@ 
 #include <signal.h>
 #include <limits.h>
 #include <sys/resource.h>
+#include <linux/filter.h>
 
 #include "common/config.h"
 #include "types.h"
@@ -76,6 +77,11 @@  struct thread_creds_args {
 	unsigned long			mem_pos_next;
 };
 
+struct thread_seccomp_filter {
+	struct sock_fprog		sock_fprog;
+	unsigned int			flags;
+};
+
 struct thread_restore_args {
 	struct restore_mem_zone		*mz;
 
@@ -100,6 +106,13 @@  struct thread_restore_args {
 
 	bool				check_only;
 	struct thread_creds_args	*creds_args;
+
+	int				seccomp_mode;
+	unsigned long			seccomp_filters_pos;
+	struct thread_seccomp_filter	*seccomp_filters;
+	void				*seccomp_filters_data;
+	unsigned int			seccomp_filters_n;
+	bool				seccomp_force_tsync;
 } __aligned(64);
 
 typedef long (*thread_restore_fcall_t) (struct thread_restore_args *args);
@@ -163,9 +176,6 @@  struct task_restore_args {
 	pid_t				*zombies;
 	unsigned int			zombies_n;
 
-	struct sock_fprog		*seccomp_filters;
-	unsigned int			seccomp_filters_n;
-
 	/* * * * * * * * * * * * * * * * * * * * */
 
 	unsigned long			task_size;
diff --git a/criu/include/rst_info.h b/criu/include/rst_info.h
index f9840d1681ff..07c634f4adfd 100644
--- a/criu/include/rst_info.h
+++ b/criu/include/rst_info.h
@@ -61,6 +61,11 @@  struct rst_info {
 	 * restorer blob.
 	 */
 	bool			has_seccomp;
+	/*
+	 * To be compatible with old images where filters
+	 * are bound to group leader and we need to use tsync flag.
+	 */
+	bool			has_old_seccomp_filter;
 
 	bool			has_thp_enabled;
 
diff --git a/criu/include/seccomp.h b/criu/include/seccomp.h
index b50ea34e20bb..0791597fefd6 100644
--- a/criu/include/seccomp.h
+++ b/criu/include/seccomp.h
@@ -27,6 +27,9 @@ 
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
 
+struct thread_restore_args;
+struct task_restore_args;
+
 struct seccomp_info {
 	struct seccomp_info	*prev;
 	int			id;
@@ -35,6 +38,9 @@  struct seccomp_info {
 
 extern int collect_seccomp_filters(void);
 extern int prepare_seccomp_filters(void);
-struct task_restore_args;
-extern int seccomp_filters_get_rst_pos(CoreEntry *item, struct task_restore_args *);
+
+extern int seccomp_read_image(void);
+extern int seccomp_prepare_threads(struct pstree_item *item, struct task_restore_args *ta);
+extern void seccomp_rst_reloc(struct thread_restore_args *thread_arg);
+
 #endif
diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c
index 091026103805..79a06d880f59 100644
--- a/criu/pie/restorer.c
+++ b/criu/pie/restorer.c
@@ -395,54 +395,87 @@  static int restore_signals(siginfo_t *ptr, int nr, bool group)
 	return 0;
 }
 
-static int restore_seccomp(struct task_restore_args *args)
+static int restore_seccomp_filter(pid_t tid, struct thread_restore_args *args)
 {
+	unsigned int flags = args->seccomp_force_tsync ? SECCOMP_FILTER_FLAG_TSYNC : 0;
+	size_t i;
 	int ret;
 
-	switch (args->seccomp_mode) {
-	case SECCOMP_MODE_DISABLED:
-		return 0;
-	case SECCOMP_MODE_STRICT:
-		ret = sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
-		if (ret < 0) {
-			pr_err("prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT) returned %d\n", ret);
-			goto die;
-		}
-		return 0;
-	case SECCOMP_MODE_FILTER: {
-		int i;
-		void *filter_data;
+	for (i = 0; i < args->seccomp_filters_n; i++) {
+		struct thread_seccomp_filter *filter = &args->seccomp_filters[i];
 
-		filter_data = &args->seccomp_filters[args->seccomp_filters_n];
+		pr_debug("seccomp: Restoring mode %d flags %x on tid %d filter %d\n",
+			 SECCOMP_SET_MODE_FILTER, (filter->flags | flags), tid, (int)i);
 
-		for (i = 0; i < args->seccomp_filters_n; i++) {
-			struct sock_fprog *fprog = &args->seccomp_filters[i];
+		ret = sys_seccomp(SECCOMP_SET_MODE_FILTER, filter->flags | flags, (void *)&filter->sock_fprog);
+		if (ret < 0) {
+			if (ret == -ENOSYS) {
+				pr_debug("seccomp: sys_seccomp is not supported in kernel, "
+					 "switching to prctl interface\n");
+				ret = sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+						(long)(void *)&filter->sock_fprog, 0, 0);
+				if (ret) {
+					pr_err("seccomp: PR_SET_SECCOMP returned %d on tid %d\n",
+					       ret, tid);
+					return -1;
+				}
+			} else {
+				pr_err("seccomp: SECCOMP_SET_MODE_FILTER returned %d on tid %d\n",
+				       ret, tid);
+				return -1;
+			}
+		}
+	}
 
-			fprog->filter = filter_data;
+	return 0;
+}
 
-			/* We always TSYNC here, since we require that the
-			 * creds for all threads be the same; this means we
-			 * don't have to restore_seccomp() in threads, and that
-			 * future TSYNC behavior will be correct.
-			 */
-			ret = sys_seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, (char *) fprog);
-			if (ret < 0) {
-				pr_err("sys_seccomp() returned %d\n", ret);
-				goto die;
-			}
+static int restore_seccomp(struct thread_restore_args *args)
+{
+	pid_t tid = 0;
+	int ret, i;
 
-			filter_data += fprog->len * sizeof(struct sock_filter);
+	for (i = 0; i < MAX_NS_NESTING; i++) {
+		if (args->pid[i] == 0) {
+			tid = args->pid[i - 1];
+			break;
 		}
+	}
 
-		return 0;
+	if (tid != sys_gettid()) {
+		pr_err("seccomp: Unexpected tid %d != %d\n",
+		       tid, (pid_t)sys_gettid());
+		return -1;
 	}
+
+	switch (args->seccomp_mode) {
+	case SECCOMP_MODE_DISABLED:
+		pr_debug("seccomp: mode %d on tid %d\n", SECCOMP_MODE_DISABLED, tid);
+		return 0;
+		break;
+	case SECCOMP_MODE_STRICT:
+		ret = sys_prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+		if (ret < 0) {
+			pr_err("seccomp: SECCOMP_MODE_STRICT returned %d on tid %d\n",
+			       ret, tid);
+		}
+		break;
+	case SECCOMP_MODE_FILTER:
+		ret = restore_seccomp_filter(tid, args);
+		break;
 	default:
-		goto die;
+		pr_err("seccomp: Unknown seccomp mode %d on tid %d\n",
+		       args->seccomp_mode, tid);
+		ret = -1;
+		break;
 	}
 
-	return 0;
-die:
-	return -1;
+	if (!ret) {
+		pr_debug("seccomp: Restored mode %d on tid %d\n",
+			 args->seccomp_mode, tid);
+	}
+
+	return ret;
 }
 
 static int restore_robust_futex(struct thread_restore_args *args)
@@ -541,6 +574,13 @@  long __export_restore_thread(struct thread_restore_args *args)
 		sys_close(fd);
 	}
 
+	/*
+	 * Make sure it's before creds, since it's privileged
+	 * operation bound to uid 0 in current user ns.
+	 */
+	if (restore_seccomp(args))
+		goto core_restore_end;
+
 	ret = restore_creds(args->creds_args, args->ta->proc_fd);
 	if (ret)
 		goto core_restore_end;
@@ -559,9 +599,6 @@  long __export_restore_thread(struct thread_restore_args *args)
 	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_SIGCHLD);
 	restore_pdeath_sig(args);
 
-	if (args->ta->seccomp_mode != SECCOMP_MODE_DISABLED)
-		pr_info("Restoring seccomp mode %d for %ld\n", args->ta->seccomp_mode, sys_getpid());
-
 	restore_finish_stage(task_entries_local, CR_STATE_RESTORE_CREDS);
 
 	futex_dec_and_wake(&thread_inprogress);
@@ -1677,11 +1714,11 @@  long __export_restore_task(struct task_restore_args *args)
 		sys_close(fd);
 	}
 
-	/* The kernel restricts setting seccomp to uid 0 in the current user
-	 * ns, so we must do this before restore_creds.
+	/*
+	 * Make sure it's before creds, since it's privileged
+	 * operation bound to uid 0 in current user ns.
 	 */
-	pr_info("restoring seccomp mode %d for %ld\n", args->seccomp_mode, sys_getpid());
-	if (restore_seccomp(args))
+	if (restore_seccomp(args->t))
 		goto core_restore_end;
 
 	/*
diff --git a/criu/seccomp.c b/criu/seccomp.c
index c8cd35f9ae46..8da5a2932e83 100644
--- a/criu/seccomp.c
+++ b/criu/seccomp.c
@@ -21,6 +21,8 @@ 
 #undef	LOG_PREFIX
 #define LOG_PREFIX "seccomp: "
 
+static SeccompEntry *seccomp_img_entry;
+
 /* populated on dump during collect_seccomp_filters() */
 static int next_filter_id = 0;
 static struct seccomp_info **filters = NULL;
@@ -233,10 +235,8 @@  int collect_seccomp_filters(void)
 	return 0;
 }
 
-/* Populated on restore by prepare_seccomp_filters */
-static SeccompEntry *se;
-
-int prepare_seccomp_filters(void)
+/* The seccomp_img_entry will be shared between all children */
+int seccomp_read_image(void)
 {
 	struct cr_img *img;
 	int ret;
@@ -245,66 +245,129 @@  int prepare_seccomp_filters(void)
 	if (!img)
 		return -1;
 
-	ret = pb_read_one_eof(img, &se, PB_SECCOMP);
+	ret = pb_read_one_eof(img, &seccomp_img_entry, PB_SECCOMP);
 	close_image(img);
 	if (ret <= 0)
 		return 0; /* there were no filters */
 
-	BUG_ON(!se);
+	BUG_ON(!seccomp_img_entry);
 
 	return 0;
 }
 
-int seccomp_filters_get_rst_pos(CoreEntry *core, struct task_restore_args *ta)
+/* seccomp_img_entry will be freed per-children after forking */
+static void free_seccomp_filters(void)
 {
-	SeccompFilter *sf = NULL;
-	struct sock_fprog *arr = NULL;
-	void *filter_data = NULL;
-	int ret = -1, i, n_filters;
-	size_t filter_size = 0;
+	if (seccomp_img_entry) {
+		seccomp_entry__free_unpacked(seccomp_img_entry, NULL);
+		seccomp_img_entry = NULL;
+	}
+}
 
-	ta->seccomp_filters_n = 0;
+void seccomp_rst_reloc(struct thread_restore_args *args)
+{
+	size_t j, off;
 
-	if (!core->tc->has_seccomp_filter)
-		return 0;
+	if (!args->seccomp_filters_n)
+		return;
 
-	ta->seccomp_filters = (struct sock_fprog *)rst_mem_align_cpos(RM_PRIVATE);
+	args->seccomp_filters = rst_mem_remap_ptr(args->seccomp_filters_pos, RM_PRIVATE);
+	args->seccomp_filters_data = (void *)args->seccomp_filters +
+			args->seccomp_filters_n * sizeof(struct thread_seccomp_filter);
 
-	BUG_ON(core->tc->seccomp_filter > se->n_seccomp_filters);
-	sf = se->seccomp_filters[core->tc->seccomp_filter];
+	for (j = off = 0; j < args->seccomp_filters_n; j++) {
+		struct thread_seccomp_filter *f = &args->seccomp_filters[j];
 
-	while (1) {
-		ta->seccomp_filters_n++;
-		filter_size += sf->filter.len;
+		f->sock_fprog.filter = args->seccomp_filters_data + off;
+		off += f->sock_fprog.len * sizeof(struct sock_filter);
+	}
+}
 
-		if (!sf->has_prev)
-			break;
+int seccomp_prepare_threads(struct pstree_item *item, struct task_restore_args *ta)
+{
+	struct thread_restore_args *args_array = (struct thread_restore_args *)(&ta[1]);
+	size_t i, j, nr_filters, filters_size, rst_size, off;
 
-		sf = se->seccomp_filters[sf->prev];
-	}
+	for (i = 0; i < item->nr_threads; i++) {
+		ThreadCoreEntry *thread_core = item->core[i]->thread_core;
+		struct thread_restore_args *args = &args_array[i];
+		SeccompFilter *sf;
 
-	n_filters = ta->seccomp_filters_n;
-	arr = rst_mem_alloc(sizeof(struct sock_fprog) * n_filters + filter_size, RM_PRIVATE);
-	if (!arr)
-		goto out;
+		args->seccomp_mode		= SECCOMP_MODE_DISABLED;
+		args->seccomp_filters_pos	= 0;
+		args->seccomp_filters_n		= 0;
+		args->seccomp_filters		= NULL;
+		args->seccomp_filters_data	= NULL;
 
-	filter_data = &arr[n_filters];
-	sf = se->seccomp_filters[core->tc->seccomp_filter];
-	for (i = 0; i < n_filters; i++) {
-		struct sock_fprog *fprog = &arr[i];
+		if (thread_core->has_seccomp_mode)
+			args->seccomp_mode = thread_core->seccomp_mode;
 
-		BUG_ON(sf->filter.len % sizeof(struct sock_filter));
-		fprog->len = sf->filter.len / sizeof(struct sock_filter);
+		if (args->seccomp_mode != SECCOMP_MODE_FILTER)
+			continue;
 
-		memcpy(filter_data, sf->filter.data, sf->filter.len);
+		if (thread_core->seccomp_filter >= seccomp_img_entry->n_seccomp_filters) {
+			pr_err("Corrupted filter index on tid %d (%u > %zu)\n",
+			       item->threads[i]->ns[0].virt, thread_core->seccomp_filter,
+			       seccomp_img_entry->n_seccomp_filters);
+			return -1;
+		}
 
-		filter_data += sf->filter.len;
-		sf = se->seccomp_filters[sf->prev];
-	}
+		sf = seccomp_img_entry->seccomp_filters[thread_core->seccomp_filter];
+		if (sf->filter.len % (sizeof(struct sock_filter))) {
+			pr_err("Corrupted filter len on tid %d (index %u)\n",
+			       item->threads[i]->ns[0].virt,
+			       thread_core->seccomp_filter);
+			return -1;
+		}
+		filters_size = sf->filter.len;
+		nr_filters = 1;
+
+		while (sf->has_prev) {
+			if (sf->prev >= seccomp_img_entry->n_seccomp_filters) {
+				pr_err("Corrupted filter index on tid %d (%u > %zu)\n",
+				       item->threads[i]->ns[0].virt, sf->prev,
+				       seccomp_img_entry->n_seccomp_filters);
+				return -1;
+			}
 
-	ret = 0;
+			sf = seccomp_img_entry->seccomp_filters[sf->prev];
+			if (sf->filter.len % (sizeof(struct sock_filter))) {
+				pr_err("Corrupted filter len on tid %d (index %u)\n",
+				       item->threads[i]->ns[0].virt, sf->prev);
+				return -1;
+			}
+			filters_size += sf->filter.len;
+			nr_filters++;
+		}
 
-out:
-	seccomp_entry__free_unpacked(se, NULL);
-	return ret;
+		args->seccomp_filters_n = nr_filters;
+
+		rst_size = filters_size + nr_filters * sizeof(struct thread_seccomp_filter);
+		args->seccomp_filters_pos = rst_mem_align_cpos(RM_PRIVATE);
+		args->seccomp_filters = rst_mem_alloc(rst_size, RM_PRIVATE);
+		if (!args->seccomp_filters) {
+			pr_err("Can't allocate %zu bytes for filters on tid %d\n",
+			       rst_size, item->threads[i]->ns[0].virt);
+			return -ENOMEM;
+		}
+		args->seccomp_filters_data = (void *)args->seccomp_filters +
+			nr_filters * sizeof(struct thread_seccomp_filter);
+
+		sf = seccomp_img_entry->seccomp_filters[thread_core->seccomp_filter];
+		for (j = off = 0; j < nr_filters; j++) {
+			struct thread_seccomp_filter *f = &args->seccomp_filters[j];
+
+			f->sock_fprog.len	= sf->filter.len / sizeof(struct sock_filter);
+			f->sock_fprog.filter	= args->seccomp_filters_data + off;
+			f->flags		= sf->flags;
+
+			memcpy(f->sock_fprog.filter, sf->filter.data, sf->filter.len);
+
+			off += sf->filter.len;
+			sf = seccomp_img_entry->seccomp_filters[sf->prev];
+		}
+	}
+
+	free_seccomp_filters();
+	return 0;
 }
diff --git a/images/core.proto b/images/core.proto
index 0291fae68ea8..726803646444 100644
--- a/images/core.proto
+++ b/images/core.proto
@@ -40,8 +40,9 @@  message task_core_entry {
 
 	optional signal_queue_entry	signals_s	= 10;
 
-	optional seccomp_mode		seccomp_mode	= 11;
-	optional uint32			seccomp_filter	= 12;
+	/* These two are deprecated, should be per-thread */
+	optional seccomp_mode	old_seccomp_mode	= 11;
+	optional uint32		old_seccomp_filter	= 12;
 
 	optional uint32			loginuid	= 13;
 
@@ -87,6 +88,9 @@  message thread_core_entry {
 
 	optional signal_queue_entry	signals_p	= 9;
 	optional creds_entry		creds		= 10;
+
+	optional seccomp_mode		seccomp_mode	= 11;
+	optional uint32			seccomp_filter	= 12;
 }
 
 message task_rlimits_entry {