[v2] net: Create child with CLONE_VM in prepare_net_namespaces()

Submitted by Kirill Tkhai on March 15, 2017, 4:56 p.m.

Details

Message ID 148959697944.376.17417225370238148825.stgit@localhost.localdomain
State New
Series "net: Create child with CLONE_VM in prepare_net_namespaces()"
Headers show

Commit Message

Kirill Tkhai March 15, 2017, 4:56 p.m.
Some functions in prepare_net_ns() use vmalloc(), and this
memory should be visible to our children.

v2: No functional changes, just killed "continue".

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/net.c |   21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/net.c b/criu/net.c
index 309e2bd66..d40ca7f11 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1756,20 +1756,27 @@  static int create_net_ns(void *arg)
 
 int prepare_net_namespaces()
 {
-	char stack[128] __stack_aligned__;
+	int status, stack_size;
 	struct ns_id *nsid;
-	int status;
+	char *stack;
 	pid_t pid;
 
 	if (!(root_ns_mask & CLONE_NEWNET))
 		return 0;
 
+	stack_size = 2 * 1024 * 1024;
+	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (stack == MAP_FAILED) {
+		pr_perror("Can't allocate stack");
+		return -1;
+	}
+
 	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
 		if (nsid->nd != &net_ns_desc)
 			continue;
 
 		if (root_user_ns && nsid->user_ns != root_user_ns) {
-			pid = clone(create_net_ns, stack + 128, SIGCHLD, nsid);
+			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
 			if (pid < 0) {
 				pr_perror("Can't clone");
 				goto err;
@@ -1778,13 +1785,15 @@  int prepare_net_namespaces()
 				pr_perror("Child process waiting %d", status);
 				goto err;
 			}
-			continue;
+		} else {
+			if (do_create_net_ns(nsid))
+				goto err;
+
 		}
 
-		if (do_create_net_ns(nsid))
-			goto err;
 	}
 
+	munmap(stack, stack_size);
 	close_service_fd(NS_FD_OFF);
 
 	return 0;

Comments

Kirill Tkhai March 16, 2017, 9:04 a.m.
Unrelated alpine build environment problem.

On 15.03.2017 21:57, Patchwork wrote:
> == Series Details ==
> 
> Series: net: Create child with CLONE_VM in prepare_net_namespaces() (rev2)
> URL   : https://patchwork.criu.org/series/1329/
> State : failure
> 
> == Logs ==
> 
> For more details see: https://travis-ci.org/criupatchwork/criu/builds/211457100
>
Andrey Vagin March 17, 2017, 5:52 p.m.
On Wed, Mar 15, 2017 at 07:56:31PM +0300, Kirill Tkhai wrote:
> Some functions in prepare_net_ns() use vmalloc(), and this
> memory should be visible to our children.

Why do our tests not fail? Can we fix them to not skip this issue?

> 
> v2: No functional changes, just killed "continue".
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  criu/net.c |   21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/criu/net.c b/criu/net.c
> index 309e2bd66..d40ca7f11 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1756,20 +1756,27 @@ static int create_net_ns(void *arg)
>  
>  int prepare_net_namespaces()
>  {
> -	char stack[128] __stack_aligned__;
> +	int status, stack_size;
>  	struct ns_id *nsid;
> -	int status;
> +	char *stack;
>  	pid_t pid;
>  
>  	if (!(root_ns_mask & CLONE_NEWNET))
>  		return 0;
>  
> +	stack_size = 2 * 1024 * 1024;
> +	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	if (stack == MAP_FAILED) {
> +		pr_perror("Can't allocate stack");
> +		return -1;
> +	}
> +
>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>  		if (nsid->nd != &net_ns_desc)
>  			continue;
>  
>  		if (root_user_ns && nsid->user_ns != root_user_ns) {
> -			pid = clone(create_net_ns, stack + 128, SIGCHLD, nsid);
> +			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
>  			if (pid < 0) {
>  				pr_perror("Can't clone");
>  				goto err;
> @@ -1778,13 +1785,15 @@ int prepare_net_namespaces()
>  				pr_perror("Child process waiting %d", status);
>  				goto err;
>  			}
> -			continue;
> +		} else {
> +			if (do_create_net_ns(nsid))
> +				goto err;
> +
>  		}
>  
> -		if (do_create_net_ns(nsid))
> -			goto err;
>  	}
>  
> +	munmap(stack, stack_size);
>  	close_service_fd(NS_FD_OFF);
>  
>  	return 0;
>
Kirill Tkhai March 17, 2017, 6:19 p.m.
On 17.03.2017 20:52, Andrei Vagin wrote:
> On Wed, Mar 15, 2017 at 07:56:31PM +0300, Kirill Tkhai wrote:
>> Some functions in prepare_net_ns() use vmalloc(), and this
>> memory should be visible to our children.
> 
> Why do our tests not fail? Can we fix them to not skip this issue?

I'm not in close relationship with network tests, but I suppose it's possible (by fixing, not via writing a new one).

But sadly at the moment I have 40+ patches for pid_ns and not in a time to dive into this one.
We may keep in mind a test for this for the future. I suppose to do not implement partial cases,
but just to do a feature (test option), which will unshare all namespaces in test_init().
This should cover problems like this.

***
There is one more problem in this function, I wrote. We do not restore net ns after unshare().
Please, look at that one

>>
>> v2: No functional changes, just killed "continue".
>>
>> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
>> ---
>>  criu/net.c |   21 +++++++++++++++------
>>  1 file changed, 15 insertions(+), 6 deletions(-)
>>
>> diff --git a/criu/net.c b/criu/net.c
>> index 309e2bd66..d40ca7f11 100644
>> --- a/criu/net.c
>> +++ b/criu/net.c
>> @@ -1756,20 +1756,27 @@ static int create_net_ns(void *arg)
>>  
>>  int prepare_net_namespaces()
>>  {
>> -	char stack[128] __stack_aligned__;
>> +	int status, stack_size;
>>  	struct ns_id *nsid;
>> -	int status;
>> +	char *stack;
>>  	pid_t pid;
>>  
>>  	if (!(root_ns_mask & CLONE_NEWNET))
>>  		return 0;
>>  
>> +	stack_size = 2 * 1024 * 1024;
>> +	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>> +	if (stack == MAP_FAILED) {
>> +		pr_perror("Can't allocate stack");
>> +		return -1;
>> +	}
>> +
>>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>>  		if (nsid->nd != &net_ns_desc)
>>  			continue;
>>  
>>  		if (root_user_ns && nsid->user_ns != root_user_ns) {
>> -			pid = clone(create_net_ns, stack + 128, SIGCHLD, nsid);
>> +			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
>>  			if (pid < 0) {
>>  				pr_perror("Can't clone");
>>  				goto err;
>> @@ -1778,13 +1785,15 @@ int prepare_net_namespaces()
>>  				pr_perror("Child process waiting %d", status);
>>  				goto err;
>>  			}
>> -			continue;
>> +		} else {
>> +			if (do_create_net_ns(nsid))
>> +				goto err;
>> +
>>  		}
>>  
>> -		if (do_create_net_ns(nsid))
>> -			goto err;
>>  	}
>>  
>> +	munmap(stack, stack_size);
>>  	close_service_fd(NS_FD_OFF);
>>  
>>  	return 0;
>>
Andrey Vagin March 17, 2017, 6:35 p.m.
On Fri, Mar 17, 2017 at 09:19:26PM +0300, Kirill Tkhai wrote:
> On 17.03.2017 20:52, Andrei Vagin wrote:
> > On Wed, Mar 15, 2017 at 07:56:31PM +0300, Kirill Tkhai wrote:
> >> Some functions in prepare_net_ns() use vmalloc(), and this
> >> memory should be visible to our children.
> > 
> > Why do our tests not fail? Can we fix them to not skip this issue?
> 
> I'm not in close relationship with network tests, but I suppose it's possible (by fixing, not via writing a new one).
> 
> But sadly at the moment I have 40+ patches for pid_ns and not in a time to dive into this one.
> We may keep in mind a test for this for the future. I suppose to do not implement partial cases,
> but just to do a feature (test option), which will unshare all namespaces in test_init().
> This should cover problems like this.

I don't like this sort of comments. All this logic is a part of
nested-userns support. And I think you need only half an our to make
this test. I know that nobody wants to spend time for tests, but we have
to do that.

> 
> ***
> There is one more problem in this function, I wrote. We do not restore net ns after unshare().
> Please, look at that one

Why do we need to restore netns after unshare()?

> 
> >>
> >> v2: No functional changes, just killed "continue".
> >>
> >> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> >> ---
> >>  criu/net.c |   21 +++++++++++++++------
> >>  1 file changed, 15 insertions(+), 6 deletions(-)
> >>
> >> diff --git a/criu/net.c b/criu/net.c
> >> index 309e2bd66..d40ca7f11 100644
> >> --- a/criu/net.c
> >> +++ b/criu/net.c
> >> @@ -1756,20 +1756,27 @@ static int create_net_ns(void *arg)
> >>  
> >>  int prepare_net_namespaces()
> >>  {
> >> -	char stack[128] __stack_aligned__;
> >> +	int status, stack_size;
> >>  	struct ns_id *nsid;
> >> -	int status;
> >> +	char *stack;
> >>  	pid_t pid;
> >>  
> >>  	if (!(root_ns_mask & CLONE_NEWNET))
> >>  		return 0;
> >>  
> >> +	stack_size = 2 * 1024 * 1024;
> >> +	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> >> +	if (stack == MAP_FAILED) {
> >> +		pr_perror("Can't allocate stack");
> >> +		return -1;
> >> +	}
> >> +
> >>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
> >>  		if (nsid->nd != &net_ns_desc)
> >>  			continue;
> >>  
> >>  		if (root_user_ns && nsid->user_ns != root_user_ns) {
> >> -			pid = clone(create_net_ns, stack + 128, SIGCHLD, nsid);
> >> +			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
> >>  			if (pid < 0) {
> >>  				pr_perror("Can't clone");
> >>  				goto err;
> >> @@ -1778,13 +1785,15 @@ int prepare_net_namespaces()
> >>  				pr_perror("Child process waiting %d", status);
> >>  				goto err;
> >>  			}
> >> -			continue;
> >> +		} else {
> >> +			if (do_create_net_ns(nsid))
> >> +				goto err;
> >> +
> >>  		}
> >>  
> >> -		if (do_create_net_ns(nsid))
> >> -			goto err;
> >>  	}
> >>  
> >> +	munmap(stack, stack_size);
> >>  	close_service_fd(NS_FD_OFF);
> >>  
> >>  	return 0;
> >>
Andrey Vagin March 17, 2017, 7:41 p.m.
On Wed, Mar 15, 2017 at 07:56:31PM +0300, Kirill Tkhai wrote:
> Some functions in prepare_net_ns() use vmalloc(), and this
> memory should be visible to our children.
> 
> v2: No functional changes, just killed "continue".
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  criu/net.c |   21 +++++++++++++++------
>  1 file changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/criu/net.c b/criu/net.c
> index 309e2bd66..d40ca7f11 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1756,20 +1756,27 @@ static int create_net_ns(void *arg)
>  
>  int prepare_net_namespaces()
>  {
> -	char stack[128] __stack_aligned__;
> +	int status, stack_size;
>  	struct ns_id *nsid;
> -	int status;
> +	char *stack;
>  	pid_t pid;
>  
>  	if (!(root_ns_mask & CLONE_NEWNET))
>  		return 0;
>  
> +	stack_size = 2 * 1024 * 1024;
> +	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	if (stack == MAP_FAILED) {
> +		pr_perror("Can't allocate stack");
> +		return -1;
> +	}
> +
>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>  		if (nsid->nd != &net_ns_desc)
>  			continue;
>  
>  		if (root_user_ns && nsid->user_ns != root_user_ns) {
> -			pid = clone(create_net_ns, stack + 128, SIGCHLD, nsid);
> +			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);

Can we add CLONE_VFORK and use the current stack?

>  			if (pid < 0) {
>  				pr_perror("Can't clone");
>  				goto err;
> @@ -1778,13 +1785,15 @@ int prepare_net_namespaces()
>  				pr_perror("Child process waiting %d", status);
>  				goto err;
>  			}
> -			continue;
> +		} else {
> +			if (do_create_net_ns(nsid))
> +				goto err;
> +
>  		}
>  
> -		if (do_create_net_ns(nsid))
> -			goto err;
>  	}
>  
> +	munmap(stack, stack_size);
>  	close_service_fd(NS_FD_OFF);
>  
>  	return 0;
>