[Devel,rh7] seccomp, ptrace: Save original BPF program when setting the filer

Submitted by Kirill Gorkunov on Dec. 7, 2016, 5:29 p.m.

Details

Message ID 20161207172922.GC3015@uranus
State New
Series "seccomp, ptrace: Save original BPF program when setting the filer"
Headers show

Commit Message

Kirill Gorkunov Dec. 7, 2016, 5:29 p.m.
The vanilla kernel is quite reworked in filter management, in particular
the filters passed into sockets or seccomp are saved in the userspace form
as struct bpf_prog::orig_prog. We can't port all the patches right now,
lets rather do a trick for seccomp sake and simply carry a copy inside
struct seccomp_filter. The socket filters are decoded into userspace form
anyway so this area is safe.

https://jira.sw.ru/browse/PSBM-55593

CC: Andrey Vagin <avagin@openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
The patch is on top of [PATCH rh7] seccomp, ptrace: Fix typo in filter fetching

 kernel/seccomp.c |   25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

Patch hide | download patch | download mbox

Index: linux-pcs7.git/kernel/seccomp.c
===================================================================
--- linux-pcs7.git.orig/kernel/seccomp.c
+++ linux-pcs7.git/kernel/seccomp.c
@@ -54,6 +54,9 @@ 
 struct seccomp_filter {
 	atomic_t usage;
 	struct seccomp_filter *prev;
+#if CONFIG_VE
+	struct sock_fprog orig_prog;
+#endif
 	unsigned short len;  /* Instruction count */
 	struct sock_filter insns[];
 };
@@ -265,6 +268,16 @@  static long seccomp_attach_filter(struct
 	if (copy_from_user(filter->insns, fprog->filter, fp_size))
 		goto fail;
 
+#if CONFIG_VE
+	filter->orig_prog.len = fprog->len;
+	filter->orig_prog.filter = kmemdup(filter->insns, fp_size,
+					   GFP_KERNEL|__GFP_NOWARN);
+	if (!filter->orig_prog.filter) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+#endif
+
 	/* Check and rewrite the fprog via the skb checker */
 	ret = sk_chk_filter(filter->insns, filter->len);
 	if (ret)
@@ -283,6 +296,9 @@  static long seccomp_attach_filter(struct
 	current->seccomp.filter = filter;
 	return 0;
 fail:
+#if CONFIG_VE
+	kfree(filter->orig_prog.filter);
+#endif
 	kfree(filter);
 	return ret;
 }
@@ -332,6 +348,9 @@  void put_seccomp_filter(struct task_stru
 	while (orig && atomic_dec_and_test(&orig->usage)) {
 		struct seccomp_filter *freeme = orig;
 		orig = orig->prev;
+#if CONFIG_VE
+		kfree(freeme->orig_prog.filter);
+#endif
 		kfree(freeme);
 	}
 }
@@ -566,8 +585,14 @@  long seccomp_get_filter(struct task_stru
 	get_seccomp_filter(task);
 	spin_unlock_irq(&task->sighand->siglock);
 
+#if CONFIG_VE
+	if (copy_to_user(data, filter->orig_prog.filter,
+			 filter->orig_prog.len * sizeof(filter->orig_prog.filter[0])))
+		ret = -EFAULT;
+#else
 	if (copy_to_user(data, filter->insns, filter->len * sizeof(filter->insns[0])))
 		ret = -EFAULT;
+#endif
 
 	put_seccomp_filter(task);
 	return ret;

Comments

Konstantin Khorenko Dec. 8, 2016, 1:28 p.m.
Andrey, i'm committing this, but please, review anyway.

--
Best regards,

Konstantin Khorenko,
Virtuozzo Linux Kernel Team

On 12/07/2016 08:29 PM, Cyrill Gorcunov wrote:
> The vanilla kernel is quite reworked in filter management, in particular
> the filters passed into sockets or seccomp are saved in the userspace form
> as struct bpf_prog::orig_prog. We can't port all the patches right now,
> lets rather do a trick for seccomp sake and simply carry a copy inside
> struct seccomp_filter. The socket filters are decoded into userspace form
> anyway so this area is safe.
>
> https://jira.sw.ru/browse/PSBM-55593
>
> CC: Andrey Vagin <avagin@openvz.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
> The patch is on top of [PATCH rh7] seccomp, ptrace: Fix typo in filter fetching
>
>  kernel/seccomp.c |   25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
>
> Index: linux-pcs7.git/kernel/seccomp.c
> ===================================================================
> --- linux-pcs7.git.orig/kernel/seccomp.c
> +++ linux-pcs7.git/kernel/seccomp.c
> @@ -54,6 +54,9 @@
>  struct seccomp_filter {
>  	atomic_t usage;
>  	struct seccomp_filter *prev;
> +#if CONFIG_VE
> +	struct sock_fprog orig_prog;
> +#endif
>  	unsigned short len;  /* Instruction count */
>  	struct sock_filter insns[];
>  };
> @@ -265,6 +268,16 @@ static long seccomp_attach_filter(struct
>  	if (copy_from_user(filter->insns, fprog->filter, fp_size))
>  		goto fail;
>
> +#if CONFIG_VE
> +	filter->orig_prog.len = fprog->len;
> +	filter->orig_prog.filter = kmemdup(filter->insns, fp_size,
> +					   GFP_KERNEL|__GFP_NOWARN);
> +	if (!filter->orig_prog.filter) {
> +		ret = -ENOMEM;
> +		goto fail;
> +	}
> +#endif
> +
>  	/* Check and rewrite the fprog via the skb checker */
>  	ret = sk_chk_filter(filter->insns, filter->len);
>  	if (ret)
> @@ -283,6 +296,9 @@ static long seccomp_attach_filter(struct
>  	current->seccomp.filter = filter;
>  	return 0;
>  fail:
> +#if CONFIG_VE
> +	kfree(filter->orig_prog.filter);
> +#endif
>  	kfree(filter);
>  	return ret;
>  }
> @@ -332,6 +348,9 @@ void put_seccomp_filter(struct task_stru
>  	while (orig && atomic_dec_and_test(&orig->usage)) {
>  		struct seccomp_filter *freeme = orig;
>  		orig = orig->prev;
> +#if CONFIG_VE
> +		kfree(freeme->orig_prog.filter);
> +#endif
>  		kfree(freeme);
>  	}
>  }
> @@ -566,8 +585,14 @@ long seccomp_get_filter(struct task_stru
>  	get_seccomp_filter(task);
>  	spin_unlock_irq(&task->sighand->siglock);
>
> +#if CONFIG_VE
> +	if (copy_to_user(data, filter->orig_prog.filter,
> +			 filter->orig_prog.len * sizeof(filter->orig_prog.filter[0])))
> +		ret = -EFAULT;
> +#else
>  	if (copy_to_user(data, filter->insns, filter->len * sizeof(filter->insns[0])))
>  		ret = -EFAULT;
> +#endif
>
>  	put_seccomp_filter(task);
>  	return ret;
> .
>
Andrey Vagin Dec. 8, 2016, 10:04 p.m.
On Wed, Dec 07, 2016 at 08:29:22PM +0300, Cyrill Gorcunov wrote:
> The vanilla kernel is quite reworked in filter management, in particular
> the filters passed into sockets or seccomp are saved in the userspace form
> as struct bpf_prog::orig_prog. We can't port all the patches right now,
> lets rather do a trick for seccomp sake and simply carry a copy inside
> struct seccomp_filter. The socket filters are decoded into userspace form
> anyway so this area is safe.
> 
> https://jira.sw.ru/browse/PSBM-55593
> 
> CC: Andrey Vagin <avagin@openvz.org>
> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
> ---
> The patch is on top of [PATCH rh7] seccomp, ptrace: Fix typo in filter fetching
> 
>  kernel/seccomp.c |   25 +++++++++++++++++++++++++
>  1 file changed, 25 insertions(+)
> 
> Index: linux-pcs7.git/kernel/seccomp.c
> ===================================================================
> --- linux-pcs7.git.orig/kernel/seccomp.c
> +++ linux-pcs7.git/kernel/seccomp.c
> @@ -54,6 +54,9 @@
>  struct seccomp_filter {
>  	atomic_t usage;
>  	struct seccomp_filter *prev;
> +#if CONFIG_VE

Why do you not use CONFIG_CHECKPOINT_RESTORE here?

> +	struct sock_fprog orig_prog;
> +#endif
>  	unsigned short len;  /* Instruction count */
>  	struct sock_filter insns[];
>  };
> @@ -265,6 +268,16 @@ static long seccomp_attach_filter(struct
>  	if (copy_from_user(filter->insns, fprog->filter, fp_size))
>  		goto fail;
>  
> +#if CONFIG_VE
> +	filter->orig_prog.len = fprog->len;
> +	filter->orig_prog.filter = kmemdup(filter->insns, fp_size,
> +					   GFP_KERNEL|__GFP_NOWARN);
> +	if (!filter->orig_prog.filter) {
> +		ret = -ENOMEM;
> +		goto fail;
> +	}
> +#endif
> +
>  	/* Check and rewrite the fprog via the skb checker */
>  	ret = sk_chk_filter(filter->insns, filter->len);
>  	if (ret)
> @@ -283,6 +296,9 @@ static long seccomp_attach_filter(struct
>  	current->seccomp.filter = filter;
>  	return 0;
>  fail:
> +#if CONFIG_VE
> +	kfree(filter->orig_prog.filter);
> +#endif
>  	kfree(filter);
>  	return ret;
>  }
> @@ -332,6 +348,9 @@ void put_seccomp_filter(struct task_stru
>  	while (orig && atomic_dec_and_test(&orig->usage)) {
>  		struct seccomp_filter *freeme = orig;
>  		orig = orig->prev;
> +#if CONFIG_VE
> +		kfree(freeme->orig_prog.filter);
> +#endif
>  		kfree(freeme);
>  	}
>  }
> @@ -566,8 +585,14 @@ long seccomp_get_filter(struct task_stru
>  	get_seccomp_filter(task);
>  	spin_unlock_irq(&task->sighand->siglock);
>  
> +#if CONFIG_VE
> +	if (copy_to_user(data, filter->orig_prog.filter,
> +			 filter->orig_prog.len * sizeof(filter->orig_prog.filter[0])))
> +		ret = -EFAULT;
> +#else
>  	if (copy_to_user(data, filter->insns, filter->len * sizeof(filter->insns[0])))
>  		ret = -EFAULT;
> +#endif
>  
>  	put_seccomp_filter(task);
>  	return ret;
Kirill Gorkunov Dec. 8, 2016, 10:11 p.m.
On Thu, Dec 08, 2016 at 02:04:19PM -0800, Andrey Vagin wrote:
> > 
> > Index: linux-pcs7.git/kernel/seccomp.c
> > ===================================================================
> > --- linux-pcs7.git.orig/kernel/seccomp.c
> > +++ linux-pcs7.git/kernel/seccomp.c
> > @@ -54,6 +54,9 @@
> >  struct seccomp_filter {
> >  	atomic_t usage;
> >  	struct seccomp_filter *prev;
> > +#if CONFIG_VE
> 
> Why do you not use CONFIG_CHECKPOINT_RESTORE here?

Because it's custom patch, unrelated to vanilla, we have
CONFIG_CHECKPOINT_RESTORE always set in our config. I could
use both CONFIG_ here but I think it is overhead.