[v2] ns: Do not reuse PROC_SELF after CLONE_VM child

Submitted by Kirill Tkhai on March 23, 2017, 10:25 a.m.

Details

Message ID 149026470186.19713.7358127591472867674.stgit@localhost.localdomain
State Accepted
Series "ns: Do not reuse PROC_SELF after CLONE_VM child"
Headers show

Commit Message

Kirill Tkhai March 23, 2017, 10:25 a.m.
Child opens PROC_SELF, populates open_proc_self_pid and exits. If parent creates
one more child with the same pid later, the new child will try to reuse PROC_SELF,
set by exited child. So, we need to close PROC_SELF after the first child has finished.

We have this issue in two places, which have the same code. Let's move the code
into new function call_in_child_process() and fix the issue there using close_pid_proc().

https://travis-ci.org/tkhai/criu/builds/214182862

v2: Introduce the helper call_in_child_process() and fix issue there.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 criu/include/util.h |    2 ++
 criu/namespaces.c   |   30 ++----------------------------
 criu/net.c          |   25 ++++---------------------
 criu/util.c         |   34 ++++++++++++++++++++++++++++++++++
 4 files changed, 42 insertions(+), 49 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/include/util.h b/criu/include/util.h
index c1d38c93..0428490b 100644
--- a/criu/include/util.h
+++ b/criu/include/util.h
@@ -309,4 +309,6 @@  extern int epoll_prepare(int nr_events, struct epoll_event **evs);
 
 extern int open_fd_of_real_pid(pid_t pid, int fd, int flags);
 
+extern int call_in_child_process(int (*fn)(void *), void *arg);
+
 #endif /* __CR_UTIL_H__ */
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 77308ccb..7637c34e 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -993,11 +993,8 @@  static int dump_user_ns(void *arg)
 
 int collect_user_ns(struct ns_id *ns, void *oarg)
 {
-	int status, stack_size;
 	struct ns_id *p_ns;
-	pid_t pid = -1;
 	UsernsEntry *e;
-	char *stack;
 
 	p_ns = ns->parent;
 
@@ -1021,32 +1018,9 @@  int collect_user_ns(struct ns_id *ns, void *oarg)
 		 * we need to enter its parent ns. As entered to user_ns
 		 * task has no a way back, we create a child for that.
 		 * NS_ROOT is dumped w/o clone(), it's xids maps is relatively
-		 * to NS_CRIU. We use CLONE_VM to make child share our memory,
-		 * and to allow us see allocated maps, he do. Child's open_proc()
-		 * may do changes about CRIU's internal files states in memory,
-		 * so pass CLONE_FILES to reflect that.
+		 * to NS_CRIU.
 		 */
-		stack_size = 2 * 1024 * 1024;
-		stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-		if (stack == MAP_FAILED) {
-			pr_perror("Can't allocate stack");
-			return -1;
-		}
-		pid = clone(dump_user_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, ns);
-		if (pid == -1) {
-			pr_perror("Can't clone");
-			return -1;
-		}
-		if (waitpid(pid, &status, 0) != pid) {
-			pr_perror("Unable to wait the %d process", pid);
-			return -1;
-		}
-		if (!WIFEXITED(status) || WEXITSTATUS(status)) {
-			pr_err("Can't dump nested user_ns: %x\n", status);
-			return -1;
-		}
-		munmap(stack, stack_size);
-		return 0;
+		return call_in_child_process(dump_user_ns, ns);
 	} else {
 		if (__dump_user_ns(ns))
 			return -1;
diff --git a/criu/net.c b/criu/net.c
index 45f6fcee..0737a61f 100644
--- a/criu/net.c
+++ b/criu/net.c
@@ -1760,47 +1760,30 @@  static int create_net_ns(void *arg)
 
 int prepare_net_namespaces()
 {
-	int status, stack_size, ret = -1;
 	struct ns_id *nsid;
-	char *stack;
-	pid_t pid;
+	int ret = -1;
 
 	if (!(root_ns_mask & CLONE_NEWNET))
 		return 0;
 
-	stack_size = 2 * 1024 * 1024;
-	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-	if (stack == MAP_FAILED) {
-		pr_perror("Can't allocate stack");
-		return -1;
-	}
-
 	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
 		if (nsid->nd != &net_ns_desc)
 			continue;
 
 		if (root_user_ns && nsid->user_ns != root_user_ns) {
-			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
-			if (pid < 0) {
-				pr_perror("Can't clone");
-				goto err;
-			}
-			if (waitpid(pid, &status, 0) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
-				pr_perror("Child process waiting %d", status);
+			if (call_in_child_process(create_net_ns, nsid) < 0)
 				goto err;
-			}
 		} else {
 			if (do_create_net_ns(nsid))
 				goto err;
-
 		}
-
 	}
 
 	close_service_fd(NS_FD_OFF);
 	ret = 0;
 err:
-	munmap(stack, stack_size);
+	if (ret)
+		pr_err("Can't create net_ns\n");
 
 	return ret;
 }
diff --git a/criu/util.c b/criu/util.c
index bbe8f200..cbbafeae 100644
--- a/criu/util.c
+++ b/criu/util.c
@@ -1367,3 +1367,37 @@  int open_fd_of_real_pid(pid_t pid, int fd, int flags)
 		BUG();
 	return ret;
 }
+
+int call_in_child_process(int (*fn)(void *), void *arg)
+{
+	int size, status, ret = -1;
+	char *stack;
+	pid_t pid;
+
+	size = 2 * 1024 * 1024; /* 2Mb */
+	stack = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (stack == MAP_FAILED) {
+		pr_perror("Can't allocate stack");
+		return -1;
+	}
+	pid = clone(fn, stack + size, CLONE_VM | CLONE_FILES | SIGCHLD, arg);
+	if (pid == -1) {
+		pr_perror("Can't clone");
+		goto out_munmap;
+	}
+	errno = 0;
+	if (waitpid(pid, &status, 0) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
+		pr_err("Can't wait or bad status: errno=%d, status=%d", errno, status);
+		goto out_close;
+	}
+	ret = 0;
+out_close:
+	/*
+	 * Child opened PROC_SELF for pid. If we create one more child
+	 * with the same pid later, it will try to reuse this /proc/self.
+	 */
+	close_pid_proc();
+out_munmap:
+	munmap(stack, size);
+	return ret;
+}

Comments

Andrey Vagin March 23, 2017, 10:50 p.m.
Applied

On Thu, Mar 23, 2017 at 01:25:23PM +0300, Kirill Tkhai wrote:
> Child opens PROC_SELF, populates open_proc_self_pid and exits. If parent creates
> one more child with the same pid later, the new child will try to reuse PROC_SELF,
> set by exited child. So, we need to close PROC_SELF after the first child has finished.
> 
> We have this issue in two places, which have the same code. Let's move the code
> into new function call_in_child_process() and fix the issue there using close_pid_proc().
> 
> https://travis-ci.org/tkhai/criu/builds/214182862
> 
> v2: Introduce the helper call_in_child_process() and fix issue there.
> 
> Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
> ---
>  criu/include/util.h |    2 ++
>  criu/namespaces.c   |   30 ++----------------------------
>  criu/net.c          |   25 ++++---------------------
>  criu/util.c         |   34 ++++++++++++++++++++++++++++++++++
>  4 files changed, 42 insertions(+), 49 deletions(-)
> 
> diff --git a/criu/include/util.h b/criu/include/util.h
> index c1d38c93..0428490b 100644
> --- a/criu/include/util.h
> +++ b/criu/include/util.h
> @@ -309,4 +309,6 @@ extern int epoll_prepare(int nr_events, struct epoll_event **evs);
>  
>  extern int open_fd_of_real_pid(pid_t pid, int fd, int flags);
>  
> +extern int call_in_child_process(int (*fn)(void *), void *arg);
> +
>  #endif /* __CR_UTIL_H__ */
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index 77308ccb..7637c34e 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -993,11 +993,8 @@ static int dump_user_ns(void *arg)
>  
>  int collect_user_ns(struct ns_id *ns, void *oarg)
>  {
> -	int status, stack_size;
>  	struct ns_id *p_ns;
> -	pid_t pid = -1;
>  	UsernsEntry *e;
> -	char *stack;
>  
>  	p_ns = ns->parent;
>  
> @@ -1021,32 +1018,9 @@ int collect_user_ns(struct ns_id *ns, void *oarg)
>  		 * we need to enter its parent ns. As entered to user_ns
>  		 * task has no a way back, we create a child for that.
>  		 * NS_ROOT is dumped w/o clone(), it's xids maps is relatively
> -		 * to NS_CRIU. We use CLONE_VM to make child share our memory,
> -		 * and to allow us see allocated maps, he do. Child's open_proc()
> -		 * may do changes about CRIU's internal files states in memory,
> -		 * so pass CLONE_FILES to reflect that.
> +		 * to NS_CRIU.
>  		 */
> -		stack_size = 2 * 1024 * 1024;
> -		stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -		if (stack == MAP_FAILED) {
> -			pr_perror("Can't allocate stack");
> -			return -1;
> -		}
> -		pid = clone(dump_user_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, ns);
> -		if (pid == -1) {
> -			pr_perror("Can't clone");
> -			return -1;
> -		}
> -		if (waitpid(pid, &status, 0) != pid) {
> -			pr_perror("Unable to wait the %d process", pid);
> -			return -1;
> -		}
> -		if (!WIFEXITED(status) || WEXITSTATUS(status)) {
> -			pr_err("Can't dump nested user_ns: %x\n", status);
> -			return -1;
> -		}
> -		munmap(stack, stack_size);
> -		return 0;
> +		return call_in_child_process(dump_user_ns, ns);
>  	} else {
>  		if (__dump_user_ns(ns))
>  			return -1;
> diff --git a/criu/net.c b/criu/net.c
> index 45f6fcee..0737a61f 100644
> --- a/criu/net.c
> +++ b/criu/net.c
> @@ -1760,47 +1760,30 @@ static int create_net_ns(void *arg)
>  
>  int prepare_net_namespaces()
>  {
> -	int status, stack_size, ret = -1;
>  	struct ns_id *nsid;
> -	char *stack;
> -	pid_t pid;
> +	int ret = -1;
>  
>  	if (!(root_ns_mask & CLONE_NEWNET))
>  		return 0;
>  
> -	stack_size = 2 * 1024 * 1024;
> -	stack = mmap(NULL, stack_size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> -	if (stack == MAP_FAILED) {
> -		pr_perror("Can't allocate stack");
> -		return -1;
> -	}
> -
>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>  		if (nsid->nd != &net_ns_desc)
>  			continue;
>  
>  		if (root_user_ns && nsid->user_ns != root_user_ns) {
> -			pid = clone(create_net_ns, stack + stack_size, CLONE_VM | CLONE_FILES | SIGCHLD, nsid);
> -			if (pid < 0) {
> -				pr_perror("Can't clone");
> -				goto err;
> -			}
> -			if (waitpid(pid, &status, 0) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
> -				pr_perror("Child process waiting %d", status);
> +			if (call_in_child_process(create_net_ns, nsid) < 0)
>  				goto err;
> -			}
>  		} else {
>  			if (do_create_net_ns(nsid))
>  				goto err;
> -
>  		}
> -
>  	}
>  
>  	close_service_fd(NS_FD_OFF);
>  	ret = 0;
>  err:
> -	munmap(stack, stack_size);
> +	if (ret)
> +		pr_err("Can't create net_ns\n");
>  
>  	return ret;
>  }
> diff --git a/criu/util.c b/criu/util.c
> index bbe8f200..cbbafeae 100644
> --- a/criu/util.c
> +++ b/criu/util.c
> @@ -1367,3 +1367,37 @@ int open_fd_of_real_pid(pid_t pid, int fd, int flags)
>  		BUG();
>  	return ret;
>  }
> +
> +int call_in_child_process(int (*fn)(void *), void *arg)
> +{
> +	int size, status, ret = -1;
> +	char *stack;
> +	pid_t pid;
> +
> +	size = 2 * 1024 * 1024; /* 2Mb */
> +	stack = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	if (stack == MAP_FAILED) {
> +		pr_perror("Can't allocate stack");
> +		return -1;
> +	}
> +	pid = clone(fn, stack + size, CLONE_VM | CLONE_FILES | SIGCHLD, arg);
> +	if (pid == -1) {
> +		pr_perror("Can't clone");
> +		goto out_munmap;
> +	}
> +	errno = 0;
> +	if (waitpid(pid, &status, 0) != pid || !WIFEXITED(status) || WEXITSTATUS(status)) {
> +		pr_err("Can't wait or bad status: errno=%d, status=%d", errno, status);
> +		goto out_close;
> +	}
> +	ret = 0;
> +out_close:
> +	/*
> +	 * Child opened PROC_SELF for pid. If we create one more child
> +	 * with the same pid later, it will try to reuse this /proc/self.
> +	 */
> +	close_pid_proc();
> +out_munmap:
> +	munmap(stack, size);
> +	return ret;
> +}
>