[v2] namespace: mark mount namespaces as populated after the forking stage

Submitted by Andrei Vagin on July 5, 2016, 8:53 p.m.

Details

Message ID 1467752009-15220-1-git-send-email-avagin@openvz.org
State Rejected
Series "namespace: mark mount namespaces as populated after the forking stage"
Headers show

Commit Message

Andrei Vagin July 5, 2016, 8:53 p.m.
From: Andrew Vagin <avagin@virtuozzo.com>

Currently we mark a mount namespaces as populated when a target process
(ns_pid) switches into it. But if a process inherited the right
namespace from a parent, it doesn't call do_restore_task_mnt_ns()
and a namespace can remain unmarked.

af55c059fb6 ("mount: fix a race between restoring namespaces and file mappings")
After this patch we could simplify logic around ns_populated. Currently
it's a futex, but nodoby waits on it. We can set ns_populated when we
are going to close namespace descriptors. To avoid additional locks, we
can do this when all task pass the forking stage and don't start the
next stage.

https://jira.sw.ru/browse/PSBM-48222

v2: add a comment why we wait a CR_STATE_FORKING stage

Signed-off-by: Andrew Vagin <avagin@virtuozzo.com>
---
 criu/cr-restore.c         |  7 ++++---
 criu/include/namespaces.h |  2 +-
 criu/mount.c              | 14 ++++++--------
 criu/namespaces.c         |  4 ++--
 4 files changed, 13 insertions(+), 14 deletions(-)

Patch hide | download patch | download mbox

diff --git a/criu/cr-restore.c b/criu/cr-restore.c
index 284faaf..4176517 100644
--- a/criu/cr-restore.c
+++ b/criu/cr-restore.c
@@ -1431,16 +1431,17 @@  static int restore_task_with_children(void *_arg)
 
 	restore_pgid();
 
-	if (restore_finish_stage(CR_STATE_FORKING) < 0)
-		goto err;
-
 	if (current->parent == NULL) {
+		futex_wait_while_gt(&task_entries->nr_in_progress, 1);
 		if (depopulate_roots_yard())
 			goto err;
 
 		fini_restore_mntns();
 	}
 
+	if (restore_finish_stage(CR_STATE_FORKING) < 0)
+		goto err;
+
 	if (restore_one_task(current->pid.virt, ca->core))
 		goto err;
 
diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
index 4cd2c85..3a3f7d4 100644
--- a/criu/include/namespaces.h
+++ b/criu/include/namespaces.h
@@ -93,7 +93,7 @@  struct ns_id {
 	 * are mounted) and other tasks may do setns on it
 	 * and proceed.
 	 */
-	futex_t ns_populated;
+	bool ns_populated;
 
 	union {
 		struct {
diff --git a/criu/mount.c b/criu/mount.c
index 911edad..22cb822 100644
--- a/criu/mount.c
+++ b/criu/mount.c
@@ -2856,7 +2856,7 @@  static int rst_collect_local_mntns(enum ns_type typ)
 	if (!mntinfo)
 		return -1;
 
-	futex_set(&nsid->ns_populated, 1);
+	nsid->ns_populated = true;
 	return 0;
 }
 
@@ -3112,9 +3112,6 @@  static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *curren
 	}
 	close(fd);
 
-	if (nsid->ns_pid == current->pid.virt)
-		futex_set_and_wake(&nsid->ns_populated, 1);
-
 	return 0;
 }
 
@@ -3161,9 +3158,10 @@  void fini_restore_mntns(void)
 	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
 		if (nsid->nd != &mnt_ns_desc)
 			continue;
-		close(nsid->mnt.ns_fd);
+		close_safe(&nsid->mnt.ns_fd);
 		if (nsid->type != NS_ROOT)
-			close(nsid->mnt.root_fd);
+			close_safe(&nsid->mnt.root_fd);
+		nsid->ns_populated = true;
 	}
 }
 
@@ -3431,7 +3429,7 @@  ns_created:
 			if (nsid->mnt.ns_fd < 0)
 				goto err;
 			/* we set ns_populated so we don't need to open root_fd */
-			futex_set(&nsid->ns_populated, 1);
+			nsid->ns_populated = true;
 			continue;
 		}
 
@@ -3565,7 +3563,7 @@  int mntns_get_root_fd(struct ns_id *mntns)
 	 * root from the root task.
 	 */
 
-	if (!futex_get(&mntns->ns_populated)) {
+	if (!mntns->ns_populated) {
 		int fd;
 
 		fd = open_proc(root_item->pid.virt, "fd/%d", mntns->mnt.root_fd);
diff --git a/criu/namespaces.c b/criu/namespaces.c
index 04a1b11..f9a7779 100644
--- a/criu/namespaces.c
+++ b/criu/namespaces.c
@@ -298,7 +298,7 @@  struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
 	if (nsid) {
 		nsid->type = type;
 		nsid_add(nsid, nd, id, pid);
-		futex_set(&nsid->ns_populated, 0);
+		nsid->ns_populated = false;
 	}
 
 	return nsid;
@@ -417,7 +417,7 @@  static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
 
 	nsid->type = type;
 	nsid->kid = kid;
-	futex_set(&nsid->ns_populated, 1);
+	nsid->ns_populated = true;
 	nsid_add(nsid, nd, ns_next_id++, pid);
 
 found:

Comments

Andrey Vagin July 5, 2016, 9 p.m.
On Tue, Jul 05, 2016 at 11:53:29PM +0300, Andrey Vagin wrote:
> From: Andrew Vagin <avagin@virtuozzo.com>
> 
> Currently we mark a mount namespaces as populated when a target process
> (ns_pid) switches into it. But if a process inherited the right
> namespace from a parent, it doesn't call do_restore_task_mnt_ns()
> and a namespace can remain unmarked.
> 
> af55c059fb6 ("mount: fix a race between restoring namespaces and file mappings")
> After this patch we could simplify logic around ns_populated. Currently
> it's a futex, but nodoby waits on it. We can set ns_populated when we
> are going to close namespace descriptors. To avoid additional locks, we
> can do this when all task pass the forking stage and don't start the
> next stage.
> 
> https://jira.sw.ru/browse/PSBM-48222
> 
> v2: add a comment why we wait a CR_STATE_FORKING stage

Pls, ignore this patch. I forgot to commit changes

> 
> Signed-off-by: Andrew Vagin <avagin@virtuozzo.com>
> ---
>  criu/cr-restore.c         |  7 ++++---
>  criu/include/namespaces.h |  2 +-
>  criu/mount.c              | 14 ++++++--------
>  criu/namespaces.c         |  4 ++--
>  4 files changed, 13 insertions(+), 14 deletions(-)
> 
> diff --git a/criu/cr-restore.c b/criu/cr-restore.c
> index 284faaf..4176517 100644
> --- a/criu/cr-restore.c
> +++ b/criu/cr-restore.c
> @@ -1431,16 +1431,17 @@ static int restore_task_with_children(void *_arg)
>  
>  	restore_pgid();
>  
> -	if (restore_finish_stage(CR_STATE_FORKING) < 0)
> -		goto err;
> -
>  	if (current->parent == NULL) {
> +		futex_wait_while_gt(&task_entries->nr_in_progress, 1);
>  		if (depopulate_roots_yard())
>  			goto err;
>  
>  		fini_restore_mntns();
>  	}
>  
> +	if (restore_finish_stage(CR_STATE_FORKING) < 0)
> +		goto err;
> +
>  	if (restore_one_task(current->pid.virt, ca->core))
>  		goto err;
>  
> diff --git a/criu/include/namespaces.h b/criu/include/namespaces.h
> index 4cd2c85..3a3f7d4 100644
> --- a/criu/include/namespaces.h
> +++ b/criu/include/namespaces.h
> @@ -93,7 +93,7 @@ struct ns_id {
>  	 * are mounted) and other tasks may do setns on it
>  	 * and proceed.
>  	 */
> -	futex_t ns_populated;
> +	bool ns_populated;
>  
>  	union {
>  		struct {
> diff --git a/criu/mount.c b/criu/mount.c
> index 911edad..22cb822 100644
> --- a/criu/mount.c
> +++ b/criu/mount.c
> @@ -2856,7 +2856,7 @@ static int rst_collect_local_mntns(enum ns_type typ)
>  	if (!mntinfo)
>  		return -1;
>  
> -	futex_set(&nsid->ns_populated, 1);
> +	nsid->ns_populated = true;
>  	return 0;
>  }
>  
> @@ -3112,9 +3112,6 @@ static int do_restore_task_mnt_ns(struct ns_id *nsid, struct pstree_item *curren
>  	}
>  	close(fd);
>  
> -	if (nsid->ns_pid == current->pid.virt)
> -		futex_set_and_wake(&nsid->ns_populated, 1);
> -
>  	return 0;
>  }
>  
> @@ -3161,9 +3158,10 @@ void fini_restore_mntns(void)
>  	for (nsid = ns_ids; nsid != NULL; nsid = nsid->next) {
>  		if (nsid->nd != &mnt_ns_desc)
>  			continue;
> -		close(nsid->mnt.ns_fd);
> +		close_safe(&nsid->mnt.ns_fd);
>  		if (nsid->type != NS_ROOT)
> -			close(nsid->mnt.root_fd);
> +			close_safe(&nsid->mnt.root_fd);
> +		nsid->ns_populated = true;
>  	}
>  }
>  
> @@ -3431,7 +3429,7 @@ ns_created:
>  			if (nsid->mnt.ns_fd < 0)
>  				goto err;
>  			/* we set ns_populated so we don't need to open root_fd */
> -			futex_set(&nsid->ns_populated, 1);
> +			nsid->ns_populated = true;
>  			continue;
>  		}
>  
> @@ -3565,7 +3563,7 @@ int mntns_get_root_fd(struct ns_id *mntns)
>  	 * root from the root task.
>  	 */
>  
> -	if (!futex_get(&mntns->ns_populated)) {
> +	if (!mntns->ns_populated) {
>  		int fd;
>  
>  		fd = open_proc(root_item->pid.virt, "fd/%d", mntns->mnt.root_fd);
> diff --git a/criu/namespaces.c b/criu/namespaces.c
> index 04a1b11..f9a7779 100644
> --- a/criu/namespaces.c
> +++ b/criu/namespaces.c
> @@ -298,7 +298,7 @@ struct ns_id *rst_new_ns_id(unsigned int id, pid_t pid,
>  	if (nsid) {
>  		nsid->type = type;
>  		nsid_add(nsid, nd, id, pid);
> -		futex_set(&nsid->ns_populated, 0);
> +		nsid->ns_populated = false;
>  	}
>  
>  	return nsid;
> @@ -417,7 +417,7 @@ static unsigned int generate_ns_id(int pid, unsigned int kid, struct ns_desc *nd
>  
>  	nsid->type = type;
>  	nsid->kid = kid;
> -	futex_set(&nsid->ns_populated, 1);
> +	nsid->ns_populated = true;
>  	nsid_add(nsid, nd, ns_next_id++, pid);
>  
>  found:
> -- 
> 2.7.4
>